I'm facing with this error properly and I could not see any exact solution or a solution formula for this error. My inputs are like (48x48) and that's not matching with the input shape of the resnet101. How can I edit my input to fit to the resnet101? You can see my code below, it probably helps you to understand my problem.
if __name__ == "__main__":
vid = cv2.VideoCapture(0)
emotions = []
while vid.isOpened():
image = cv2.imread("/home/berkay/Desktop/angry_man.jpg")
_, frame = vid.read()
# takes in a gray coloured filter of the frame
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# initializing the haarcascade face detector
faces = face_cascade.detectMultiScale(frame)
for (x,y,w,h) in faces:
# takes the region of interest of the face only in gray
roi_gray = gray[y:y+h, x:x+h]
resized = cv2.resize(roi_gray, (48, 48)) # resizes to 48x48 sized image
# predict the mood
img = img2tensor(resized)
prediction = predict(img)
In that point, I'm getting this error:
weight of size [64, 3, 7, 7], expected input[1, 1, 229, 229] to have 3 channels, but got 1 channels instead
How can I fix this? Thanks in advance
You can modify the input layer of resnet so that it would accept a single-channel tensors inputs using
In [1]: model = resnet101()
In [2]: model.conv1 = nn.Conv2d(1, 64, kernel_size=(2, 2))
In [3]: model(torch.rand(10, 1, 48, 48))
Out[3]:
tensor([[-0.5015, 0.6124, 0.1370, ..., 1.2181, -0.4707, 0.3285],
[-0.4776, 1.1027, 0.0161, ..., 0.6363, -0.4733, 0.6218],
[-0.3935, 0.8276, -0.0316, ..., 0.6853, -0.4735, 0.6424],
...,
[-0.2986, 1.1758, 0.0158, ..., 0.7422, -0.4422, 0.4792],
[-0.2668, 0.7884, -0.1205, ..., 1.1445, -0.6249, 0.6697],
[-0.2139, 1.0412, 0.2326, ..., 0.8332, -0.8744, 0.4827]],
grad_fn=<AddmmBackward0>)
(you will probably need to modify the kernel size accordingly too)
Related
i'm trying to preprocess set of images before passing it to the CNN model, and i had to convert it to greyscale because my model my model required to, the issue is when i save the converted image and load it , it works correctly without any errors because the shape is (244,244,3) but if i pass it directly to the model the shape of images be (None , 224,224) and it produced this error:
ValueError: Input 0 of layer sequential_5 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, 224, 224)
the code is:
gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
resized_image = cv2.resize(crop_img, (224, 224))
frames.append(resized_image)
cv2.imwrite("/Users/naimahalaqeel/Documents/graduation project/toArabicText/test2/%d.jpg" % count2, resized_image)
count2 = count2 +1
success,image = vidcap.read()
count+=1
afterPrecceseFrames = np.array(frames)
predicted_classes = model.predict_classes(afterPrecceseFrames)
I think you are using cv2.resize() to crop_img not gray. Is that the problem.
i'm new to machine learning and pytorch. I'm using imgaug library for images augmentation (https://github.com/aleju/imgaug)
I have this code:
class ImgAugTransform:
def __init__(self):
self.aug = seq = iaa.Sequential(
[
# Apply the following augmenters to most images
iaa.Fliplr(0.5), # horizontally flip 50% of all images
iaa.Flipud(0.2), # vertically flip 20% of all images
random_aug_use(iaa.CropAndPad( # crop images by -5% to 10% of their height/width
percent=(-0.1, 0.2),
pad_mode=ia.ALL,
pad_cval=(0.,255)
)),
random_aug_use(iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
rotate=(-45, 45), # rotate by -45 to +45 degrees
shear=(-16, 16), # shear by -16 to +16 degrees
order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
cval=(0, 255), # if mode is constant, use a cval between 0 and 255
mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
))
],
random_order=True)
def __call__(self, img):
img = np.array(img)
return self.aug.augment_image(img)
train_transforms = ImgAugTransform()
train_dataset = torchvision.datasets.ImageFolder(train_dir, train_transforms)
train_dataloader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size)
So now i cant do this:
X_batch, y_batch = next(iter(train_dataloader))
I get error:
ValueError: some of the strides of a given numpy array are negative. This is currently not supported, but will be added in future releases.
I came across this error as well.
The solution that worked to me was:
def __call__(self, img):
img = np.array(img)
return self.aug.augment_image(img).copy()
But, if you're composing imgaug with torchvision.transforms you can do something like:
def __call__(self, img):
img = self.aug.augment_image(np.array(img))
transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(self.normalization[0],
self.normalization[1]),
])
return transforms(img.copy())
You should make your augmented numpy arrays contiguous again.
try modifying your augmenter code to:
def __call__(self, img):
img = np.array(img)
return np.ascontiguousarray(self.aug.augment_image(img))
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread
img = imread('dog2.jpg')
#img is a shape of (360, 480, 3)
w = img.shape[0]
h = img.shape[1]
c = img.shape[2]
k = 3 # for my convenience
plt.subplot(1,2,1)
plt.imshow(img)
img = tf.cast(img, tf.float32)
img4d = tf.reshape(img,[1,w,h,c])
diag = np.array([[1,1,1],[0,0,0],[1,1,1]]*k, np.float32)
# diag = np.diag(diag)
diag4d = tf.reshape(diag,[k,k,c,1])
convolved = tf.nn.conv2d(img4d, diag4d, strides=[1,1,1,1], padding='SAME')
with tf.Session() as sess:
result = sess.run(convolved)
print result.shape
plt.subplot(1,2,2)
plt.imshow(np.squeeze(result))
plt.show()
I am just trying to use convolution and apply some blur effect initially. Yeah I know that my kernel values aren't right. But my question is, I am giving an input image that has 3 channels. How could I get an output image of 3 channels. Well. I tried. But all I get is some one channeled values alone.
You are passing a kernel of shape [3, 3, 3, 1] to tf.nn.conv2d(). If you want to get a 3-channel image output from your convolution, the fourth dimension of your kernel (called out_channels in the official documentation) should be 3 instead of 1; [3, 3, 3, 3] for example.
You could also take a look at the conv2d documentation, this question and this question to better understand Tensorflow's conv2d method.
I'm trying to use keras model.fit_generator() to fit a model, below is my definition of the generator:
from sklearn.utils import shuffle
IMG_PATH_PREFIX = "./data/IMG/"
def generator(samples, batch_size=64):
num_samples = len(samples)
while 1: # Loop forever so the generator never terminates
shuffle(samples)
for offset in range(0, num_samples, batch_size):
batch_samples = samples[offset:offset+batch_size]
images = []
angles = []
for batch_sample in batch_samples:
name = IMG_PATH_PREFIX + batch_sample[0].split('/')[-1]
center_image = cv2.imread(name)
center_angle = float(batch_sample[3])
images.append(center_image)
angles.append(center_angle)
X_train = np.array(images)
y_train = np.array(angles)
#X_train = np.expand_dims(X_train, axis=0)
#y_train = np.expand_dims(y_train, axis=1)
print("X_train shape: ", X_train.shape, " y_train shape:", y_train.shape)
#print("X train: ", X_train)
yield X_train, y_train
train_generator = generator(train_samples, batch_size = 32)
validation_generator = generator(validation_samples, batch_size = 32)
Here the output shape is:
X_train shape: (32, 160, 320, 3) y_train shape: (32,)
The model fit code is:
model = Sequential()
#cropping layer
model.add(Cropping2D(cropping=((50,20), (1,1)), input_shape=(160,320,3), dim_ordering='tf'))
model.compile(loss = "mse", optimizer="adam")
model.fit_generator(train_generator, samples_per_epoch= len(train_samples), validation_data=validation_generator, nb_val_samples=len(validation_samples), nb_epoch=3)
Then I get the error message:
ValueError: Error when checking model target: expected cropping2d_6 to have 4 dimensions, but got array with shape (32, 1)
Could someone help let me know what's the issue?
The big question here is : do you know what you are trying to do ?
1) If you read here, the input is a 4D tensor and the output is ALSO a 4D tensor. Your target is a 2D tensor of shape (batch_size,1). So of course, when keras tries to compute the error between the output which has 3D (without batch dimension) and the target which has 1D (without batch dimension), it can not make sense out of that. Outputs and targets must have the same dimensions.
2) Do you know what cropping2D is actually doing ? It is cropping your images... So removing values at the beginning and end of your cropping dimensions. In your case you are outputing images of shape (90, 218, 3). This is not a prediction, there is no weight to train on this layer so no reason to fit the "model". Your model is just cropping images. No training needed for that.
I am trying to use DBSCAN from scikitlearn to segment an image based on color. The results I'm getting are . As you can see there are 3 clusters. My goal is to separate the buoys in the picture into different clusters. But obviously they are showing up as the same cluster. I've tried a wide range of eps values and min_samples but those two things always cluster together. My code is:
img= cv2.imread("buoy1.jpg)
labimg = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
n = 0
while(n<4):
labimg = cv2.pyrDown(labimg)
n = n+1
feature_image=np.reshape(labimg, [-1, 3])
rows, cols, chs = labimg.shape
db = DBSCAN(eps=5, min_samples=50, metric = 'euclidean',algorithm ='auto')
db.fit(feature_image)
labels = db.labels_
plt.figure(2)
plt.subplot(2, 1, 1)
plt.imshow(img)
plt.axis('off')
plt.subplot(2, 1, 2)
plt.imshow(np.reshape(labels, [rows, cols]))
plt.axis('off')
plt.show()
I assume this is taking the euclidean distance and since its in lab space euclidean distance would be different between different colors. If anyone can give me guidance on this I'd really appreciate it.
Update:
The below answer works. Since DBSCAN requires an array with no more then 2 dimensions I concatenated the columns to the original image and reshaped to produce a n x 5 matrix where n is the x dimension times the y dimension. This seems to work for me.
indices = np.dstack(np.indices(img.shape[:2]))
xycolors = np.concatenate((img, indices), axis=-1)
np.reshape(xycolors, [-1,5])
You need to use both color and position.
Right now, you are using colors only.
Could you please add the enitre code in the answer? Im not able to understand where do I add the those 3 lines which have worked for you – user8306074 Sep 4 at 8:58
Let me answer for you, and here is the full version of the code:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
img= cv2.imread('your image')
labimg = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
n = 0
while(n<4):
labimg = cv2.pyrDown(labimg)
n = n+1
feature_image=np.reshape(labimg, [-1, 3])
rows, cols, chs = labimg.shape
db = DBSCAN(eps=5, min_samples=50, metric = 'euclidean',algorithm ='auto')
db.fit(feature_image)
labels = db.labels_
indices = np.dstack(np.indices(labimg.shape[:2]))
xycolors = np.concatenate((labimg, indices), axis=-1)
feature_image2 = np.reshape(xycolors, [-1,5])
db.fit(feature_image2)
labels2 = db.labels_
plt.figure(2)
plt.subplot(2, 1, 1)
plt.imshow(img)
plt.axis('off')
# plt.subplot(2, 1, 2)
# plt.imshow(np.reshape(labels, [rows, cols]))
# plt.axis('off')
plt.subplot(2, 1, 2)
plt.imshow(np.reshape(labels2, [rows, cols]))
plt.axis('off')
plt.show()