What is the difference between UpSampling2D and Conv2DTranspose functions in keras? - machine-learning

Here in this code UpSampling2D and Conv2DTranspose seem to be used interchangeably. I want to know why this is happening.
# u-net model with up-convolution or up-sampling and weighted binary-crossentropy as loss func
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.utils import plot_model
from keras import backend as K
def unet_model(n_classes=5, im_sz=160, n_channels=8, n_filters_start=32, growth_factor=2, upconv=True,
class_weights=[0.2, 0.3, 0.1, 0.1, 0.3]):
droprate=0.25
n_filters = n_filters_start
inputs = Input((im_sz, im_sz, n_channels))
#inputs = BatchNormalization()(inputs)
conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
#pool1 = Dropout(droprate)(pool1)
n_filters *= growth_factor
pool1 = BatchNormalization()(pool1)
conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
pool2 = Dropout(droprate)(pool2)
n_filters *= growth_factor
pool2 = BatchNormalization()(pool2)
conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
pool3 = Dropout(droprate)(pool3)
n_filters *= growth_factor
pool3 = BatchNormalization()(pool3)
conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool3)
conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_0)
pool4_1 = MaxPooling2D(pool_size=(2, 2))(conv4_0)
pool4_1 = Dropout(droprate)(pool4_1)
n_filters *= growth_factor
pool4_1 = BatchNormalization()(pool4_1)
conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_1)
conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_1)
pool4_2 = MaxPooling2D(pool_size=(2, 2))(conv4_1)
pool4_2 = Dropout(droprate)(pool4_2)
n_filters *= growth_factor
conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_2)
conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv5)
n_filters //= growth_factor
if upconv:
up6_1 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv5), conv4_1])
else:
up6_1 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4_1])
up6_1 = BatchNormalization()(up6_1)
conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_1)
conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_1)
conv6_1 = Dropout(droprate)(conv6_1)
n_filters //= growth_factor
if upconv:
up6_2 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_1), conv4_0])
else:
up6_2 = concatenate([UpSampling2D(size=(2, 2))(conv6_1), conv4_0])
up6_2 = BatchNormalization()(up6_2)
conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_2)
conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_2)
conv6_2 = Dropout(droprate)(conv6_2)
n_filters //= growth_factor
if upconv:
up7 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_2), conv3])
else:
up7 = concatenate([UpSampling2D(size=(2, 2))(conv6_2), conv3])
up7 = BatchNormalization()(up7)
conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv7)
conv7 = Dropout(droprate)(conv7)
n_filters //= growth_factor
if upconv:
up8 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv7), conv2])
else:
up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2])
up8 = BatchNormalization()(up8)
conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv8)
conv8 = Dropout(droprate)(conv8)
n_filters //= growth_factor
if upconv:
up9 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv8), conv1])
else:
up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1])
conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv9)
conv10 = Conv2D(n_classes, (1, 1), activation='sigmoid')(conv9)
model = Model(inputs=inputs, outputs=conv10)
def weighted_binary_crossentropy(y_true, y_pred):
class_loglosses = K.mean(K.binary_crossentropy(y_true, y_pred), axis=[0, 1, 2])
return K.sum(class_loglosses * K.constant(class_weights))
model.compile(optimizer=Adam(), loss=weighted_binary_crossentropy)
return model

UpSampling2D is just a simple scaling up of the image by using nearest neighbour or bilinear upsampling, so nothing smart. Advantage is it's cheap.
Conv2DTranspose is a convolution operation whose kernel is learnt (just like normal conv2d operation) while training your model. Using Conv2DTranspose will also upsample its input but the key difference is the model should learn what is the best upsampling for the job.
EDIT: Link to nice visualization of transposed convolution: https://towardsdatascience.com/types-of-convolutions-in-deep-learning-717013397f4d

Related

dice coefficient and dice loss very low in UNET segmentation

I'm doing binary segmentation using UNET. My dataset is composed of images and masks. I divided the images and masks into different folders ( train_images, train_masks, val_images and val_masks). Then I performed Data Augmentation.
#Define the generator.
#We are not doing any rotation or zoom to make sure mask values are not interpolated.
#It is important to keep pixel values in mask as 0, 1, 2, 3, .....
def trainGenerator(train_img_path, train_mask_path):
img_data_gen_args = dict(horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect',shear_range=0.5,
rotation_range=50,
zoom_range=0.2,
width_shift_range=0.2, height_shift_range=0.2,
rescale=1/255.)
mask_data_gen_args = dict(horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect',shear_range=0.5,
rotation_range=50,
zoom_range=0.2,
width_shift_range=0.2, height_shift_range=0.2,
preprocessing_function = lambda x: np.where(x>0, 1, 0).astype(x.dtype)) #Binarize the output again.
image_datagen = ImageDataGenerator(**img_data_gen_args)
mask_datagen = ImageDataGenerator(**mask_data_gen_args)
image_generator = image_datagen.flow_from_directory(
train_img_path,
class_mode = None,
color_mode = 'grayscale',
target_size=(512,512),
batch_size = batch_size,
seed = seed)
mask_generator = mask_datagen.flow_from_directory(
train_mask_path,
class_mode = None ,
color_mode = 'grayscale',
target_size=(512,512),
batch_size = batch_size,
seed = seed)
train_generator = zip(image_generator, mask_generator)
return train_generator'''
And this produced:
Found 2942 images belonging to 1 classes.
Found 2942 images belonging to 1 classes.
Found 318 images belonging to 1 classes.
Found 318 images belonging to 1 classes.
Then I built the model
inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
#Contraction path
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(inputs)
c1= tf.keras.layers.BatchNormalization()(c1)
c1 = tf.keras.layers.Dropout(0.1)(c1)
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
c1= tf.keras.layers.BatchNormalization()(c1)
p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
c2= tf.keras.layers.BatchNormalization()(c2)
c2 = tf.keras.layers.Dropout(0.1)(c2)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
c2= tf.keras.layers.BatchNormalization()(c2)
p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
c3= tf.keras.layers.BatchNormalization()(c3)
c3 = tf.keras.layers.Dropout(0.2)(c3)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
c3= tf.keras.layers.BatchNormalization()(c3)
p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
c4= tf.keras.layers.BatchNormalization()(c4)
c4 = tf.keras.layers.Dropout(0.2)(c4)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
c4= tf.keras.layers.BatchNormalization()(c4)
p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)
c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
c5= tf.keras.layers.BatchNormalization()(c5)
c5 = tf.keras.layers.Dropout(0.3)(c5)
c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)
c5= tf.keras.layers.BatchNormalization()(c5)
#Expansive path
u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
u6 = tf.keras.layers.concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
c6 = tf.keras.layers.Dropout(0.2)(c6)
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)
u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
u7 = tf.keras.layers.concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
c7 = tf.keras.layers.Dropout(0.2)(c7)
c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)
u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
u8 = tf.keras.layers.concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
c8 = tf.keras.layers.Dropout(0.1)(c8)
c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)
u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
c9 = tf.keras.layers.Dropout(0.1)(c9)
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)
outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
from keras import backend as K
def dice_coefficient(y_true, y_pred, smooth=0.0001):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return ((2. * intersection + smooth) / (K.sum(y_true_f) +
K.sum(y_pred_f) + smooth))
def dice_coefficient_loss(y_true, y_pred):
return 1.0-dice_coefficient(y_true, y_pred)
LR = 0.00001
optim = tf.keras.optimizers.Adam(LR)
metrics = ['Accuracy', 'Precision', 'Recall', dice_coefficient ]
model.compile(optimizer=optim, loss=dice_coefficient_loss, metrics=metrics)
history=model.fit(train_generator,
steps_per_epoch=steps_per_epoch,
epochs=10,
verbose=1,
validation_data=val_generator,
validation_steps=val_steps_per_epoch)
*However results are very bad and I can't understand why
Any ideas ?
Thanks *

Why I have good val_acc during training, but always wrong manul prediction on the same images

I trained my model of CNN net on images with good val_acc=0.97 and using model.fit_generator.
Here is the output of last epoch, proofing high validation accuracy:
199/200 [============================>.] - ETA: 1s - loss: 0.1563 - acc: 0.9563
200/200 [==============================] - 306s 2s/step - loss: 0.1556 - acc: 0.9565 - val_loss: 0.1402 - val_acc: 0.9691
Epoch 00005: val_acc improved from 0.96701 to 0.96907, saving model to /home/sergorl/cars/color_weights.hdf5
But when I use the same validation data set, which I use during training, but test only one image and for every image in my validation set I always get the wrong predicted label and the predicted probabilities looks like a uniform distribution.
I read this links:
Wrong prediction on images
Why is Keras training well but returning wrong predictions?
Keras Val_acc is good but prediction for same data is poor
But I don't find the solution!
from keras.models import Sequential,Model,load_model
from keras.optimizers import SGD
from keras.layers import BatchNormalization, Lambda, Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation
from keras.layers.merge import Concatenate
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
import os
import cv2
import numpy as np
class CarColorNet:
def __init__(self, numClasses=6, imageWidth=256, imageHeight=256):
self.classes = {}
self.numClasses = numClasses
self.imageWidth = imageWidth
self.imageHeight = imageHeight
input_image = Input(shape=(self.imageWidth, self.imageHeight, 3))
# ------------------------------------ TOP BRANCH ------------------------------------
# first top convolution layer
top_conv1 = Convolution2D(filters=48, kernel_size=(11, 11), strides=(4, 4),
input_shape=(self.imageWidth, self.imageHeight, 3), activation='relu')(input_image)
top_conv1 = BatchNormalization()(top_conv1)
top_conv1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(top_conv1)
# second top convolution layer
# split feature map by half
top_top_conv2 = Lambda(lambda x: x[:, :, :, :24])(top_conv1)
top_bot_conv2 = Lambda(lambda x: x[:, :, :, 24:])(top_conv1)
top_top_conv2 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_top_conv2)
top_top_conv2 = BatchNormalization()(top_top_conv2)
top_top_conv2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(top_top_conv2)
top_bot_conv2 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_bot_conv2)
top_bot_conv2 = BatchNormalization()(top_bot_conv2)
top_bot_conv2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(top_bot_conv2)
# third top convolution layer
# concat 2 feature map
top_conv3 = Concatenate()([top_top_conv2, top_bot_conv2])
top_conv3 = Convolution2D(filters=192, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_conv3)
# fourth top convolution layer
# split feature map by half
top_top_conv4 = Lambda(lambda x: x[:, :, :, :96])(top_conv3)
top_bot_conv4 = Lambda(lambda x: x[:, :, :, 96:])(top_conv3)
top_top_conv4 = Convolution2D(filters=96, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_top_conv4)
top_bot_conv4 = Convolution2D(filters=96, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_bot_conv4)
# fifth top convolution layer
top_top_conv5 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_top_conv4)
top_top_conv5 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(top_top_conv5)
top_bot_conv5 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(top_bot_conv4)
top_bot_conv5 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(top_bot_conv5)
# ------------------------------------ TOP BOTTOM ------------------------------------
# first bottom convolution layer
bottom_conv1 = Convolution2D(filters=48, kernel_size=(11, 11), strides=(4, 4),
input_shape=(224, 224, 3), activation='relu')(input_image)
bottom_conv1 = BatchNormalization()(bottom_conv1)
bottom_conv1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(bottom_conv1)
# second bottom convolution layer
# split feature map by half
bottom_top_conv2 = Lambda(lambda x: x[:, :, :, :24])(bottom_conv1)
bottom_bot_conv2 = Lambda(lambda x: x[:, :, :, 24:])(bottom_conv1)
bottom_top_conv2 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_top_conv2)
bottom_top_conv2 = BatchNormalization()(bottom_top_conv2)
bottom_top_conv2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(bottom_top_conv2)
bottom_bot_conv2 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_bot_conv2)
bottom_bot_conv2 = BatchNormalization()(bottom_bot_conv2)
bottom_bot_conv2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(bottom_bot_conv2)
# third bottom convolution layer
# concat 2 feature map
bottom_conv3 = Concatenate()([bottom_top_conv2, bottom_bot_conv2])
bottom_conv3 = Convolution2D(filters=192, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_conv3)
# fourth bottom convolution layer
# split feature map by half
bottom_top_conv4 = Lambda(lambda x: x[:, :, :, :96])(bottom_conv3)
bottom_bot_conv4 = Lambda(lambda x: x[:, :, :, 96:])(bottom_conv3)
bottom_top_conv4 = Convolution2D(filters=96, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_top_conv4)
bottom_bot_conv4 = Convolution2D(filters=96, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_bot_conv4)
# fifth bottom convolution layer
bottom_top_conv5 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_top_conv4)
bottom_top_conv5 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(bottom_top_conv5)
bottom_bot_conv5 = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu',
padding='same')(bottom_bot_conv4)
bottom_bot_conv5 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(bottom_bot_conv5)
# ---------------------------------- CONCATENATE TOP AND BOTTOM BRANCH ------------------------------------
conv_output = Concatenate()([top_top_conv5, top_bot_conv5, bottom_top_conv5, bottom_bot_conv5])
# Flatten
flatten = Flatten()(conv_output)
# Fully-connected layer
FC_1 = Dense(units=4096, activation='relu')(flatten)
FC_1 = Dropout(0.6)(FC_1)
FC_2 = Dense(units=4096, activation='relu')(FC_1)
FC_2 = Dropout(0.6)(FC_2)
output = Dense(units=self.numClasses, activation='softmax')(FC_2)
self.model = Model(inputs=input_image, outputs=output)
sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# sgd = SGD(lr=0.01, momentum=0.9, decay=0.0005, nesterov=True)
self.model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
def train(self,
pathToTrainSet,
pathToValidSet,
pathToSaveModel,
epochs=7,
batchSize=32,
stepsPerEpoch=200,
validationSteps=1000):
fileOfWeights = 'color_weights.hdf5'
checkpoint = ModelCheckpoint(os.path.join(pathToSaveModel, fileOfWeights),
monitor='val_acc', verbose=1,
save_best_only=True, mode='max')
checkpoint2 = ModelCheckpoint(os.path.join(pathToSaveModel, fileOfWeights),
monitor='val_loss', verbose=1,
save_best_only=True, mode='max')
trainDataGen = ImageDataGenerator(rescale=1.0/255, shear_range=0.2,
zoom_range=0.3, horizontal_flip=True)
validDataGen = ImageDataGenerator(rescale=1.0/255)
trainSet = trainDataGen.flow_from_directory(
pathToTrainSet,
target_size=(self.imageWidth, self.imageHeight),
batch_size=batchSize,
class_mode='categorical'
)
self.classes = {v: k for k, v in trainSet.class_indices.items()}
np.save(os.path.join(pathToSaveModel, 'class_index.npy'), self.classes)
validSet = validDataGen.flow_from_directory(
pathToValidSet,
target_size=(self.imageWidth, self.imageHeight),
batch_size=batchSize,
class_mode='categorical'
)
self.model.fit_generator(
trainSet,
steps_per_epoch=stepsPerEpoch,
epochs=epochs,
validation_data=validSet,
validation_steps=validationSteps//batchSize,
callbacks=[checkpoint, checkpoint2])
print('============================ Saving is here ============================')
self.model.save(os.path.join(pathToSaveModel, 'car_color_net.h5'))
#staticmethod
def load(pathToModel, pathToClassIndexes):
model = load_model(pathToModel)
layers = model.layers
inputShape, outputShape = layers[0].input_shape, layers[-1].output_shape,
imageWidth, imageHeight = inputShape[1], inputShape[2]
numClasses = outputShape[1]
net = CarColorNet(numClasses, imageWidth, imageHeight)
net.classes = np.load(os.path.join(pathToClassIndexes, 'class_index.npy')).item()
return net
def predictOneImage(self, pathToImage):
frame = cv2.imread(pathToImage)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (self.imageWidth, self.imageHeight))
frame = np.expand_dims(frame, axis=0)
# cv2.imshow("boxed", frame[0, :, :, :])
# cv2.waitKey(0)
frame = np.asarray(frame, dtype='float32')
img = frame/255
probs = self.model.predict(img)
ind = probs.argmax(axis=-1)[0]
return self.classes[ind]
if __name__ == '__main__':
pathToTrainSet = '/home/sergorl/cars/train'
pathToValidSet = '/home/sergorl/cars/valid'
pathToSaveModel = '/home/sergorl/cars'
## Train net
# net = CarColorNet(numClasses=6)
# net.train(pathToTrainSet, pathToValidSet, pathToSaveModel)
# Test on all images from validSet
net = CarColorNet.load(os.path.join(pathToSaveModel, 'car_color_net.h5'), pathToSaveModel)
count, countTrueLabels = 0, 0
for dirpath, _dirnames, filenames in os.walk(pathToValidSet):
trueLabel = dirpath.split('/')[-1]
for file in filenames:
label = net.predictOneImage(os.path.join(dirpath, file))
print(trueLabel, label)
if label == trueLabel:
countTrueLabels += 1
count += 1
print('rate is {0:.2f}'.format(float(countTrueLabels) / float(count) * 100))
If I have a good val_acc=0.97, I'll expect the same result (or nearly), testing every image in validation set. But always have wrong prediction!
I ran net immediately after train was done and see that learning was good:
if __name__ == '__main__':
pathToTrainSet = '/home/sergorl/cars/train'
pathToValidSet = '/home/sergorl/cars/valid'
pathToSaveModel = '/home/sergorl/cars'
# Train net
net = CarColorNet(numClasses=6)
net.train(pathToTrainSet, pathToValidSet, pathToSaveModel)
# Test on all images from validSet
count, countTrueLabels = 0, 0
for dirpath, _dirnames, filenames in os.walk(pathToValidSet):
trueLabel = dirpath.split('/')[-1]
for file in filenames:
label = net.predictOneImage(os.path.join(dirpath, file))
print(trueLabel, label)
if label == trueLabel:
countTrueLabels += 1
count += 1
print('rate is {0:.2f}'.format(float(countTrueLabels) / float(count) * 100))
So it seems the problem is inside model.save and it looks like saving doesn't work!. I found many related issues on git, for example:
https://github.com/keras-team/keras/issues/4875
https://github.com/keras-team/keras/issues/4904
But I don't know how to fix it with Python 3.7.3 and keras 2.0.0
Can you share more about the issue like what is the output you are getting. From the code i can see that you are training for 6 classes and using categorical cross entropy so ideally you should be getting an array with 6 values with each value bw 0 and 1 and the index of highest value in that array should be the output.

Incompatible shapes in keras

I am trying to use U-net network architeture for stereo vision.
I have datasets with 3 different image sizes (1240x368, 1224x368 and 1384x1104).
Here is My whole class:
import pickle
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.utils import np_utils
import sys, numpy as np
import keras
import cv2
pkl_file = open('data.p', 'rb')
dict = pickle.load(pkl_file)
X_data = dict['images']
Y_data = dict['disparity']
data_num = len(X_data)
train_num = int(data_num * 0.8)
X_train = X_data[:train_num]
X_test = X_data[train_num:]
Y_train = Y_data[:train_num]
Y_test = Y_data[train_num:]
def gen(X, Y):
while True:
for x, y in zip(X, Y):
yield x, y
model = Sequential()
model.add(Convolution2D(6, (2, 2), input_shape=(None, None, 6), activation='relu', padding='same'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(3, (3, 3), activation='relu'))
model.compile(loss=['mse'], optimizer='adam', metrics=['accuracy'])
model.fit_generator(gen(X_train, Y_train), steps_per_epoch=len(X_train), epochs=5)
scores = model.evaluate(X_test, Y_test, verbose=0)
When I try to run this code, I get an error in which it says:
Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
I resized the pictures to be divisible by 8 since I have 3 maxpool layers.
As input I put 2 images (I am doing stereo vision) and as an output I get disparity map for the first image. I am concatenating 2 images by putting the second one in third dimension (np.concatenate((img1,img2), axis=-1).
Can somebody tell me what I am doing wrong?
Here is my trace:
Traceback (most recent call last):
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
[[Node: loss/conv2d_transpose_9_loss/sub = Sub[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/loss/conv2d_transpose_9_loss/sub_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv2d_transpose_9/Relu-1-0-TransposeNCHWToNHWC-LayoutOptimizer, _arg_conv2d_transpose_9_target_0_2/_303)]]
[[Node: loss/mul/_521 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2266_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I tried resizing pictures and learning works, but since as a result I get disparity maps, resizing is not a good option. Does anybody have any advice?
If the picture is too big to fit in conv2dTransponse, you can use Cropping2d layer so it crops the picture on wished size. This works if input picture has even number of pixels.

keras error in fit method : expected model_2 to have shape (None, 252, 252, 1) but got array with shape (300, 128, 128, 3)

I am building a image classifier for one-class classification in which i've used autoencoder.
While running this model I am getting this error by the line autoencoder_model.fit:
ValueError: Error when checking target: expected model_2 to have shape (None, 252, 252, 1) but got array with shape (300, 128, 128, 3)
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:376]=0
names = ['cats']
input_shape=img_data[0].shape
X_train, X_test = train_test_split(img_data, test_size=0.2, random_state=2)
inputTensor = Input(input_shape)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(inputTensor)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded_data = MaxPooling2D((2, 2), padding='same')(x)
encoder_model = Model(inputTensor,encoded_data)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
encoded_input = Input((4,4,8))
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu',padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded_data = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
decoder_model = Model(encoded_input,decoded_data)
autoencoder_input = Input(input_shape)
encoded = encoder_model(autoencoder_input)
decoded = decoder_model(encoded)
autoencoder_model = Model(autoencoder_input, decoded)
autoencoder_model.compile(optimizer='adadelta', enter code here`loss='binary_crossentropy')
autoencoder_model.fit(X_train, X_train,
epochs=50,
batch_size=32,
validation_data=(X_test, X_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
As the auto-encoder tries to re-create the original images, it seems you are reconstructing an image with different dimensions than the original, due to the fact to have only two MaxPool2D layers in your encoder and three UpSampling2D layers in your decoder.
When the auto-encoder tries to evaluate the loss of the reconstruction, it runs into an error due to a dimension miss-match.
Use this for your encoder and let us know if it works:
inputTensor = Input(input_shape)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(inputTensor)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded_data = MaxPooling2D((2, 2), padding='same')(x)
encoder_model = Model(inputTensor,encoded_data)

Validation accuracy does not make sense

My data set is the following:
Training set: 5589 images
Validation set: 1398 images
Test set: 1996 images
Size: 1156,256,1
The problem is a binary classification problem. I got some results (reaching ~83% accuracy in test set) using hot-encoded target arrays [0,1],[1,0]. Realizing how stupid this was I changed target arrays to binary form [0] or 1 and changing categorical_crossentropy to binary crossentropy.
With this approach the validation accuracy gets stuck at 82.05% no matter what learning rate I use and the training accuracy gets stuck at 25.80%. Of course, this has no sense, and the in the test set accuracy is around ~30%.
Why can be this happening? I checked both training data and metadata and they are correct. I post my code below.
inp = Input(shape=input_shape)
out = Conv2D(16, (5, 5),activation = 'relu', kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(0.01), padding='same')(inp)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Dropout(0.5)(out)
out = Conv2D(32, (3, 3),activation = 'relu',kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Dropout(0.5)(out)
out = Conv2D(32, (3, 3),activation = 'relu',kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = Dropout(0.5)(out)
out = Conv2D(64, (3, 3), activation = 'relu',kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = Conv2D(64, (3, 3),activation = 'relu', kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Conv2D(128, (3, 3), activation = 'relu',kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = Conv2D(128, (3, 3),activation = 'relu', kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Conv2D(256, (3, 3),activation = 'relu', kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = Conv2D(256, (3, 3), activation = 'relu',kernel_initializer='glorot_uniform',kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Conv2D(512, (3, 3), activation = 'relu',kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(0.01), padding='same')(out)
out = MaxPooling2D(pool_size=(2, 2))(out)
out = Flatten()(out)
out = Dropout(0.5)(out)
dense1 = Dense(1, activation="softmax")(out)
model = Model(inputs = inp, outputs = dense1)
And the epochs look like this:
Epochs
Change your last activation from softmax to sigmoid, like
dense1 = Dense(1, activation="sigmoid")(out)
and try lowering your learning rate
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

Resources