Related
I used Dice Loss and binary_crossentropy whenever I train my model it shows very high train and validation accuracy but always prints out blank images. My masks are black and white binary images where 0 corresponds to black and 1 corresponds to white. In my output image, almost all pixels have value 0 please tell me where am I going wrong.
def train_generator():
while True:
for start in range(0, len(os.listdir('/gdrive/My Drive/Train/img/images/')), 16):
x_batch = np.empty((16,256,512,1),dtype=np.float32)
y_batch = np.empty((16,256,512,1),dtype=np.float32)
end = min(start + 16, len(os.listdir('/gdrive/My Drive/Train/img/images/')))
ids_train_batch_images =os.listdir('/gdrive/My Drive/Train/img/images/')[start:end]
ids_train_batch_mask =os.listdir('/gdrive/My Drive/Train/msk/mask/')[start:end]
for i,id in enumerate(ids_train_batch_images):
x_sample = cv2.imread('/gdrive/My Drive/Train/img/images/'+ids_train_batch_images[i])
y_sample = cv2.imread('/gdrive/My Drive/Train/msk/mask/'+ids_train_batch_mask[i])
x_sample=cv2.resize(x_sample,(512,256),interpolation = cv2.INTER_AREA)
y_sample=cv2.resize(y_sample,(512,256),interpolation = cv2.INTER_AREA)
x_sample=x_sample[:,:,0]
y_sample=y_sample[:,:,0]
x_sample=np.expand_dims(x_sample,axis=-1)
y_sample=np.expand_dims(y_sample,axis=-1)
x_batch[i]=x_sample
y_batch[i]=y_sample.astype(np.bool)
x_batch = np.array(x_batch, np.float32)/255.0
y_batch = np.array(y_batch, np.bool)
yield x_batch, y_batch
def val_generator():
while True:
for start in range(0, len(os.listdir('/gdrive/My Drive/Validation/img/images/')), 16):
x_batch = np.empty((16,256,512,1),dtype=np.float32)
y_batch = np.empty((16,256,512,1),dtype=np.float32)
end = min(start + 16, len(os.listdir('/gdrive/My Drive/Validation/img/images/')))
ids_train_batch_images =os.listdir('/gdrive/My Drive/Validation/img/images/')[start:end]
ids_train_batch_mask =os.listdir('/gdrive/My Drive/Validation/msk/mask/')[start:end]
for i,id in enumerate(ids_train_batch_images):
x_sample = cv2.imread('/gdrive/My Drive/Validation/img/images/'+ids_train_batch_images[i])
y_sample = cv2.imread('/gdrive/My Drive/Validation/msk/mask/'+ids_train_batch_mask[i])
x_sample=cv2.resize(x_sample,(512,256),interpolation = cv2.INTER_AREA)
y_sample=cv2.resize(y_sample,(512,256),interpolation = cv2.INTER_AREA)
x_sample=x_sample[:,:,0]
y_sample=y_sample[:,:,0]
x_sample=np.expand_dims(x_sample,axis=-1)
y_sample=np.expand_dims(y_sample,axis=-1)
x_batch[i]=x_sample
y_batch[i]=y_sample.astype(np.bool)
x_batch = np.array(x_batch, np.float32)/255.0
y_batch = np.array(y_batch, np.bool)
yield x_batch, y_batch
train_gen=train_generator()
val_gen=val_generator()
def unet():
inputs = tf.keras.layers.Input((256,512,1))
s = inputs
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(s)
c1 = tf.keras.layers.Dropout(0.3)(c1)
c1 = tf.keras.layers.Conv2D(16, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c1)
p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal', padding='same')(p1)
c2 = tf.keras.layers.Dropout(0.3)(c2)
c2 = tf.keras.layers.Conv2D(32, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c2)
p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(p2)
c3 = tf.keras.layers.Dropout(0.3)(c3)
c3 = tf.keras.layers.Conv2D(64, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c3)
p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(p3)
c4 = tf.keras.layers.Dropout(0.3)(c4)
c4 = tf.keras.layers.Conv2D(128, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c4)
p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)
c6 = tf.keras.layers.Conv2D(256, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(p4)
c6 = tf.keras.layers.Dropout(0.3)(c6)
c6 = tf.keras.layers.Conv2D(256, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c6)
p6 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c6)
# c6 = tf.keras.layers.Conv2D(1024, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(p5)
# c6 = tf.keras.layers.Dropout(0.1)(c6)
# c6 = tf.keras.layers.Conv2D(1024, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c6)
# p6 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c6)
c7 = tf.keras.layers.Conv2D(512, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(p6)
c7 = tf.keras.layers.Dropout(0.3)(c7)
c7 = tf.keras.layers.Conv2D(512, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c7)
# u8 = tf.keras.layers.Conv2DTranspose(1024, (2, 2), strides=(2, 2), padding='same')(c7)
# u8 = tf.keras.layers.concatenate([u8, c6])
# c8 = tf.keras.layers.Conv2D(1024, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u8)
# c8 = tf.keras.layers.Dropout(0.1)(c8)
# c8 = tf.keras.layers.Conv2D(1024, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c8)
u9 = tf.keras.layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c7)
u9 = tf.keras.layers.concatenate([u9, c6])
c9 = tf.keras.layers.Conv2D(256, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u9)
c9 = tf.keras.layers.Dropout(0.3)(c9)
c9 = tf.keras.layers.Conv2D(256, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal', padding='same')(c9)
u10 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c9)
u10 = tf.keras.layers.concatenate([u10, c4])
c10 = tf.keras.layers.Conv2D(128, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u10)
c10 = tf.keras.layers.Dropout(0.3)(c10)
c10 = tf.keras.layers.Conv2D(128, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c10)
u11 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c10)
u11 = tf.keras.layers.concatenate([u11, c3], axis=3)
c11 = tf.keras.layers.Conv2D(64, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u11)
c11 = tf.keras.layers.Dropout(0.3)(c11)
c11 = tf.keras.layers.Conv2D(64, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c11)
u12 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c11)
u12 = tf.keras.layers.concatenate([u12, c2], axis=3)
c12 = tf.keras.layers.Conv2D(32, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u12)
c12 = tf.keras.layers.Dropout(0.3)(c12)
c12 = tf.keras.layers.Conv2D(32, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c12)
u13 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c12)
u13 = tf.keras.layers.concatenate([u13, c1], axis=3)
c13 = tf.keras.layers.Conv2D(16, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(u13)
c13 = tf.keras.layers.Dropout(0.3)(c13)
c13 = tf.keras.layers.Conv2D(16, (3, 3), activation=tf.keras.activations.elu, kernel_initializer='he_normal',padding='same')(c13)
outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c13)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
return model
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_4 (InputLayer) [(None, 256, 512, 1) 0
__________________________________________________________________________________________________
conv2d_69 (Conv2D) (None, 256, 512, 16) 160 input_4[0][0]
__________________________________________________________________________________________________
dropout_33 (Dropout) (None, 256, 512, 16) 0 conv2d_69[0][0]
__________________________________________________________________________________________________
conv2d_70 (Conv2D) (None, 256, 512, 16) 2320 dropout_33[0][0]
__________________________________________________________________________________________________
max_pooling2d_15 (MaxPooling2D) (None, 128, 256, 16) 0 conv2d_70[0][0]
__________________________________________________________________________________________________
conv2d_71 (Conv2D) (None, 128, 256, 32) 4640 max_pooling2d_15[0][0]
__________________________________________________________________________________________________
dropout_34 (Dropout) (None, 128, 256, 32) 0 conv2d_71[0][0]
__________________________________________________________________________________________________
conv2d_72 (Conv2D) (None, 128, 256, 32) 9248 dropout_34[0][0]
__________________________________________________________________________________________________
max_pooling2d_16 (MaxPooling2D) (None, 64, 128, 32) 0 conv2d_72[0][0]
__________________________________________________________________________________________________
conv2d_73 (Conv2D) (None, 64, 128, 64) 18496 max_pooling2d_16[0][0]
__________________________________________________________________________________________________
dropout_35 (Dropout) (None, 64, 128, 64) 0 conv2d_73[0][0]
__________________________________________________________________________________________________
conv2d_74 (Conv2D) (None, 64, 128, 64) 36928 dropout_35[0][0]
__________________________________________________________________________________________________
max_pooling2d_17 (MaxPooling2D) (None, 32, 64, 64) 0 conv2d_74[0][0]
__________________________________________________________________________________________________
conv2d_75 (Conv2D) (None, 32, 64, 128) 73856 max_pooling2d_17[0][0]
__________________________________________________________________________________________________
dropout_36 (Dropout) (None, 32, 64, 128) 0 conv2d_75[0][0]
__________________________________________________________________________________________________
conv2d_76 (Conv2D) (None, 32, 64, 128) 147584 dropout_36[0][0]
__________________________________________________________________________________________________
max_pooling2d_18 (MaxPooling2D) (None, 16, 32, 128) 0 conv2d_76[0][0]
__________________________________________________________________________________________________
conv2d_77 (Conv2D) (None, 16, 32, 256) 295168 max_pooling2d_18[0][0]
__________________________________________________________________________________________________
dropout_37 (Dropout) (None, 16, 32, 256) 0 conv2d_77[0][0]
__________________________________________________________________________________________________
conv2d_78 (Conv2D) (None, 16, 32, 256) 590080 dropout_37[0][0]
__________________________________________________________________________________________________
max_pooling2d_19 (MaxPooling2D) (None, 8, 16, 256) 0 conv2d_78[0][0]
__________________________________________________________________________________________________
conv2d_79 (Conv2D) (None, 8, 16, 512) 1180160 max_pooling2d_19[0][0]
__________________________________________________________________________________________________
dropout_38 (Dropout) (None, 8, 16, 512) 0 conv2d_79[0][0]
__________________________________________________________________________________________________
conv2d_80 (Conv2D) (None, 8, 16, 512) 2359808 dropout_38[0][0]
__________________________________________________________________________________________________
conv2d_transpose_15 (Conv2DTran (None, 16, 32, 256) 524544 conv2d_80[0][0]
__________________________________________________________________________________________________
concatenate_15 (Concatenate) (None, 16, 32, 512) 0 conv2d_transpose_15[0][0]
conv2d_78[0][0]
__________________________________________________________________________________________________
conv2d_81 (Conv2D) (None, 16, 32, 256) 1179904 concatenate_15[0][0]
__________________________________________________________________________________________________
dropout_39 (Dropout) (None, 16, 32, 256) 0 conv2d_81[0][0]
__________________________________________________________________________________________________
conv2d_82 (Conv2D) (None, 16, 32, 256) 590080 dropout_39[0][0]
__________________________________________________________________________________________________
conv2d_transpose_16 (Conv2DTran (None, 32, 64, 128) 131200 conv2d_82[0][0]
__________________________________________________________________________________________________
concatenate_16 (Concatenate) (None, 32, 64, 256) 0 conv2d_transpose_16[0][0]
conv2d_76[0][0]
__________________________________________________________________________________________________
conv2d_83 (Conv2D) (None, 32, 64, 128) 295040 concatenate_16[0][0]
__________________________________________________________________________________________________
dropout_40 (Dropout) (None, 32, 64, 128) 0 conv2d_83[0][0]
__________________________________________________________________________________________________
conv2d_84 (Conv2D) (None, 32, 64, 128) 147584 dropout_40[0][0]
__________________________________________________________________________________________________
conv2d_transpose_17 (Conv2DTran (None, 64, 128, 64) 32832 conv2d_84[0][0]
__________________________________________________________________________________________________
concatenate_17 (Concatenate) (None, 64, 128, 128) 0 conv2d_transpose_17[0][0]
conv2d_74[0][0]
__________________________________________________________________________________________________
conv2d_85 (Conv2D) (None, 64, 128, 64) 73792 concatenate_17[0][0]
__________________________________________________________________________________________________
dropout_41 (Dropout) (None, 64, 128, 64) 0 conv2d_85[0][0]
__________________________________________________________________________________________________
conv2d_86 (Conv2D) (None, 64, 128, 64) 36928 dropout_41[0][0]
__________________________________________________________________________________________________
conv2d_transpose_18 (Conv2DTran (None, 128, 256, 32) 8224 conv2d_86[0][0]
__________________________________________________________________________________________________
concatenate_18 (Concatenate) (None, 128, 256, 64) 0 conv2d_transpose_18[0][0]
conv2d_72[0][0]
__________________________________________________________________________________________________
conv2d_87 (Conv2D) (None, 128, 256, 32) 18464 concatenate_18[0][0]
__________________________________________________________________________________________________
dropout_42 (Dropout) (None, 128, 256, 32) 0 conv2d_87[0][0]
__________________________________________________________________________________________________
conv2d_88 (Conv2D) (None, 128, 256, 32) 9248 dropout_42[0][0]
__________________________________________________________________________________________________
conv2d_transpose_19 (Conv2DTran (None, 256, 512, 16) 2064 conv2d_88[0][0]
__________________________________________________________________________________________________
concatenate_19 (Concatenate) (None, 256, 512, 32) 0 conv2d_transpose_19[0][0]
conv2d_70[0][0]
__________________________________________________________________________________________________
conv2d_89 (Conv2D) (None, 256, 512, 16) 4624 concatenate_19[0][0]
__________________________________________________________________________________________________
dropout_43 (Dropout) (None, 256, 512, 16) 0 conv2d_89[0][0]
__________________________________________________________________________________________________
conv2d_90 (Conv2D) (None, 256, 512, 16) 2320 dropout_43[0][0]
__________________________________________________________________________________________________
conv2d_91 (Conv2D) (None, 256, 512, 1) 17 conv2d_90[0][0]
==================================================================================================
Total params: 7,775,313
Trainable params: 7,775,313
Non-trainable params: 0
_________________________________________________________
from keras import backend as K
def dice_coef(y_true, y_pred, smooth=1):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_coef_loss(y_true, y_pred):
return 1-dice_coef(y_true, y_pred)
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
NO_OF_TRAINING_IMAGES = len(os.listdir('/gdrive/My Drive/Train/img/images/'))
NO_OF_VAL_IMAGES = len(os.listdir('/gdrive/My Drive/Validation/img/images/'))
NO_OF_EPOCHS = 1
BATCH_SIZE = 32
filepath="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
m = unet()
opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
m.compile(optimizer=opt,loss=dice_coef_loss, metrics=[dice_coef])
checkpoint = ModelCheckpoint(filepath, monitor=dice_coef_loss,
verbose=1, save_best_only=True, mode='min')
earlystopping = EarlyStopping(monitor = dice_coef_loss, verbose = 1,
min_delta = 0.01, patience = 1, mode ='min')
callbacks_list = [checkpoint,earlystopping]
results = m.fit_generator(train_gen, epochs=NO_OF_EPOCHS,
steps_per_epoch = (NO_OF_TRAINING_IMAGES//BATCH_SIZE),
validation_data=val_gen,
validation_steps=(NO_OF_VAL_IMAGES//BATCH_SIZE),
use_multiprocessing=False,
workers=1)
m.save('Model.h5')
418/418 [==============================] - 9828s 24s/step - loss: 0.0700 - dice_coef: 0.9300 - val_loss: 0.0299 - val_dice_coef: 0.9701
but wen i take the output everything is just blank. I am scaling up the output by multiplying it by 255 before visualizing and batch normalization is also off
Your output is likely normalized between integer values 0-20~ which requires scaling up these values to 0-255 range prior to visualization.
Furthermore, make sure to turn off batch normalization by indicating that the model is running in inference mode.
Supposing out is your output from the model
img1 = out[0,:,:,:] # select first element from our batch
img1 = img1.permute(1,2,0) # model outputs channel in first dim but to visualize we need it in last dim
matplotlib.imshow( img1 )
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
I am new to Keras and I have a problem, that given an image I have to make a convolution neural network which outputs another image based upon it.
Now all the examples I have seen on the internet consists of classification problems where each image is given a label with one hot encoding. I want to give an image as a label.
A series of progressive convolutions can be followed by a series of resizing interpolations, e.g. as implemented in a layer such as this:
class Interpolation(Layer):
def __init__(self, output_dim, num_channels, **kwargs):
self.num_channels = num_channels
self.output_dim = output_dim
super(Interpolation, self).__init__(**kwargs)
def build(self, input_shape):
super(Interpolation, self).build(input_shape)
def call(self, x):
return K.tf.image.resize_bilinear(x, self.output_dim)
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[1] *2 , input_shape[2]* 2, self.num_channels
Then you can apply a series of transformations that will result in an output shape matching your input shape. Below is an example code showcasing the use of this layer:
# grayscale in
uncolored = Input(shape=(200,200,1,))
# first block 200x200x3
conv0 = Conv2D(3, (3,3), padding='same', activation='relu', data_format='channels_last', name='0', kernel_regularizer='l2')(uncolored)
bn0 = BatchNormalization()(conv0)
# second block 200x200x64
conv1 = Conv2D(64, (3,3), padding='same', activation='relu', data_format='channels_last', kernel_regularizer='l2')(conv0)
bn1 = BatchNormalization()(conv1) # 200x200x64
pool0 = MaxPooling2D(pool_size=2, padding='same')(conv0) # 100x100x64
# third block # 100x100x128
conv2 = Conv2D(128, (3,3), padding='same', activation='relu', data_format='channels_last', kernel_regularizer='l2')(pool0)
bn2 = BatchNormalization()(conv2) # 100 x 100 x 128
pool1 = MaxPooling2D(pool_size=2, padding='same')(conv2) # 50x50x128
# fourth block 50x50x256
conv3 = Conv2D(256, (3,3), padding='same', activation='relu', data_format='channels_last', name='2', kernel_regularizer='l2')(pool1)
bn3 = BatchNormalization()(conv3) # 50 x 50 x 256
pool2 = MaxPooling2D(pool_size=2, padding='same')(conv3) # 25 x 25 x 256
# fifth block 25 x 25 x 512
conv4 = Conv2D(512, (3,3), padding='same', activation='relu', data_format='channels_last', kernel_regularizer='l2')(pool2)
bn4 = BatchNormalization()(conv4)
rconv0 = Conv2D(256, (1,1), padding='same', activation='sigmoid', data_format='channels_last', kernel_regularizer='l2')(conv4)
# first upscale
interp_layer0 = Interpolation(output_dim=(50,50),
num_channels=256) (rconv0) #
# first addition
intermediate_0 = Add()([interp_layer0, bn3])
rconv1 = Conv2D(128, (3,3), padding='same',
activation='sigmoid', data_format='channels_last')(intermediate_0)
# second upscale
interp_layer1 = Interpolation(output_dim=(100,100),
num_channels=128)(rconv1)
# second addition
intermediate_1 = Add()([interp_layer1, bn2])
rconv2 = Conv2D(64, (3,3), padding='same',
activation='sigmoid', data_format='channels_last')(intermediate_1)
# third upscale
interp_layer2 = Interpolation(output_dim=(200,200),
num_channels= 64)(rconv2)
# third addition
intermediate_2 = Add()([interp_layer2, bn1 ])
rconv3 = Conv2D(3, (3,3), padding='same',
activation='sigmoid', data_format='channels_last')(intermediate_2)
# fourth addition
intermediate_3 = Add()([rconv3,bn0])
rconv4 = Conv2D(3, (3,3), padding='same', activation='sigmoid', data_format='channels_last')(intermediate_3)
model = Model(inputs=[uncolored], outputs=[rconv4])
And the model summary:
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) (None, 200, 200, 1) 0
__________________________________________________________________________________________________
0 (Conv2D) (None, 200, 200, 3) 30 input_5[0][0]
__________________________________________________________________________________________________
max_pooling2d_13 (MaxPooling2D) (None, 100, 100, 3) 0 0[0][0]
__________________________________________________________________________________________________
conv2d_34 (Conv2D) (None, 100, 100, 128 3584 max_pooling2d_13[0][0]
__________________________________________________________________________________________________
max_pooling2d_14 (MaxPooling2D) (None, 50, 50, 128) 0 conv2d_34[0][0]
__________________________________________________________________________________________________
2 (Conv2D) (None, 50, 50, 256) 295168 max_pooling2d_14[0][0]
__________________________________________________________________________________________________
max_pooling2d_15 (MaxPooling2D) (None, 25, 25, 256) 0 2[0][0]
__________________________________________________________________________________________________
conv2d_35 (Conv2D) (None, 25, 25, 512) 1180160 max_pooling2d_15[0][0]
__________________________________________________________________________________________________
conv2d_36 (Conv2D) (None, 25, 25, 256) 131328 conv2d_35[0][0]
__________________________________________________________________________________________________
interpolation_13 (Interpolation (None, 50, 50, 256) 0 conv2d_36[0][0]
__________________________________________________________________________________________________
batch_normalization_24 (BatchNo (None, 50, 50, 256) 1024 2[0][0]
__________________________________________________________________________________________________
add_17 (Add) (None, 50, 50, 256) 0 interpolation_13[0][0]
batch_normalization_24[0][0]
__________________________________________________________________________________________________
conv2d_37 (Conv2D) (None, 50, 50, 128) 295040 add_17[0][0]
__________________________________________________________________________________________________
interpolation_14 (Interpolation (None, 100, 100, 128 0 conv2d_37[0][0]
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 100, 100, 128 512 conv2d_34[0][0]
__________________________________________________________________________________________________
add_18 (Add) (None, 100, 100, 128 0 interpolation_14[0][0]
batch_normalization_23[0][0]
__________________________________________________________________________________________________
conv2d_38 (Conv2D) (None, 100, 100, 64) 73792 add_18[0][0]
__________________________________________________________________________________________________
conv2d_33 (Conv2D) (None, 200, 200, 64) 1792 0[0][0]
__________________________________________________________________________________________________
interpolation_15 (Interpolation (None, 200, 200, 64) 0 conv2d_38[0][0]
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 200, 200, 64) 256 conv2d_33[0][0]
__________________________________________________________________________________________________
add_19 (Add) (None, 200, 200, 64) 0 interpolation_15[0][0]
batch_normalization_22[0][0]
__________________________________________________________________________________________________
conv2d_39 (Conv2D) (None, 200, 200, 3) 195 add_19[0][0]
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 200, 200, 3) 12 0[0][0]
__________________________________________________________________________________________________
add_20 (Add) (None, 200, 200, 3) 0 conv2d_39[0][0]
batch_normalization_21[0][0]
__________________________________________________________________________________________________
conv2d_40 (Conv2D) (None, 200, 200, 1) 28 add_20[0][0]
==================================================================================================
Total params: 1,982,921
Trainable params: 1,982,019
Non-trainable params: 902
_____________________________
In the example, we move from a single-channel image to a multi-channel image. The same idea can be replicated for images of whatever size / number of channels. The exact network architecture of course depends on your desired functionality.
This is my particular code for creating an abnormal convolutional autoencoder and my problem is the loss function is not able to converge to anything at all. I have tried different optimizers for computing the loss. is there anything that i am doing fundamentally wrong with my AutoEncoder?
import keras
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dropout, UpSampling2D
from keras.layers import Activation, GlobalAveragePooling2D
from keras.layers.convolutional import Conv2D, Deconv2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers.advanced_activations import LeakyReLU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = np.reshape(x_train, (60000, 28, 28, 1))
x_train = x_train.astype('float32') / 255.
x_test = np.reshape(x_test, (10000, 28, 28, 1))
x_test = x_test.astype('float32') / 255.
def base_model():
model = Sequential()
model.add(Conv2D(64, (3, 3), padding = 'same', name = 'encode_first', input_shape = (28, 28, 1)))
model.add(LeakyReLU())
model.add(Conv2D(64, (3, 3), padding = 'same', name = 'encode_second'))
model.add(LeakyReLU())
model.add(Conv2D(64, (3, 3), padding = 'same', name = 'encode_third_downsample', strides = 2))
model.add(LeakyReLU())
model.add(Conv2D(128, (3, 3), padding = 'same', name = 'encode_fourth'))
model.add(LeakyReLU())
model.add(Conv2D(128, (3, 3), padding = 'same', name = 'encode_fifth'))
model.add(LeakyReLU())
model.add(Conv2D(128, (3, 3), padding = 'same', name = 'encode_sixth_downsample', strides = 2))
model.add(LeakyReLU())
model.add(Deconv2D(128, (3, 3), padding = 'same', name = 'decode_first'))
model.add(LeakyReLU())
model.add(Deconv2D(128, (3, 3), padding = 'same', name = 'decode_second'))
model.add(LeakyReLU())
model.add(UpSampling2D((2, 2)))
model.add(LeakyReLU())
model.add(Deconv2D(64, (3, 3), padding = 'same', name = 'decode_fourth'))
model.add(LeakyReLU())
model.add(Deconv2D(64, (3, 3), padding = 'same', name = 'decode_fifth'))
model.add(LeakyReLU())
model.add(UpSampling2D((2, 2)))
model.add(LeakyReLU())
model.add(Conv2D(1, (28, 28), padding = 'same', name = 'final_layer'))
model.add(LeakyReLU())
model.summary()
model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
return model
encoder = base_model()
encoder_hist = encoder.fit(x_train, x_train, epochs=50,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
This is my output
runfile('D:/Datasets/Siddhesh/Fraud Face/Face-Disguise-Dataset-master/autencoder.py', wdir='D:/Datasets/Siddhesh/Fraud Face/Face-Disguise-Dataset-master')
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encode_first (Conv2D) (None, 28, 28, 64) 640
_________________________________________________________________
leaky_re_lu_14 (LeakyReLU) (None, 28, 28, 64) 0
_________________________________________________________________
encode_second (Conv2D) (None, 28, 28, 64) 36928
_________________________________________________________________
leaky_re_lu_15 (LeakyReLU) (None, 28, 28, 64) 0
_________________________________________________________________
encode_third_downsample (Con (None, 14, 14, 64) 36928
_________________________________________________________________
leaky_re_lu_16 (LeakyReLU) (None, 14, 14, 64) 0
_________________________________________________________________
encode_fourth (Conv2D) (None, 14, 14, 128) 73856
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU) (None, 14, 14, 128) 0
_________________________________________________________________
encode_fifth (Conv2D) (None, 14, 14, 128) 147584
_________________________________________________________________
leaky_re_lu_18 (LeakyReLU) (None, 14, 14, 128) 0
_________________________________________________________________
encode_sixth_downsample (Con (None, 7, 7, 128) 147584
_________________________________________________________________
leaky_re_lu_19 (LeakyReLU) (None, 7, 7, 128) 0
_________________________________________________________________
decode_first (Conv2DTranspos (None, 7, 7, 128) 147584
_________________________________________________________________
leaky_re_lu_20 (LeakyReLU) (None, 7, 7, 128) 0
_________________________________________________________________
decode_second (Conv2DTranspo (None, 7, 7, 128) 147584
_________________________________________________________________
leaky_re_lu_21 (LeakyReLU) (None, 7, 7, 128) 0
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 14, 14, 128) 0
_________________________________________________________________
leaky_re_lu_22 (LeakyReLU) (None, 14, 14, 128) 0
_________________________________________________________________
decode_fourth (Conv2DTranspo (None, 14, 14, 64) 73792
_________________________________________________________________
leaky_re_lu_23 (LeakyReLU) (None, 14, 14, 64) 0
_________________________________________________________________
decode_fifth (Conv2DTranspos (None, 14, 14, 64) 36928
_________________________________________________________________
leaky_re_lu_24 (LeakyReLU) (None, 14, 14, 64) 0
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 28, 28, 64) 0
_________________________________________________________________
leaky_re_lu_25 (LeakyReLU) (None, 28, 28, 64) 0
_________________________________________________________________
final_layer (Conv2D) (None, 28, 28, 1) 50177
_________________________________________________________________
leaky_re_lu_26 (LeakyReLU) (None, 28, 28, 1) 0
=================================================================
Total params: 899,585
Trainable params: 899,585
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
60000/60000 [==============================] - 456s - loss: 0.3532 - val_loss: 0.2613
Epoch 2/50
60000/60000 [==============================] - 438s - loss: 0.1636 - val_loss: 0.1176
Epoch 3/50
60000/60000 [==============================] - 436s - loss: 1.9351 - val_loss: 2.1359
Epoch 4/50
60000/60000 [==============================] - 435s - loss: 2.1060 - val_loss: 2.1359
Epoch 5/50
16640/60000 [=======>......................] - ETA: 297s - loss: 2.1002Traceback (most recent call last):
Your loss function is increasing. The reason could be that you are using a very large learning-rate.
i'm trying to adapt the 2d convolutional autoencoder example from the keras website: https://blog.keras.io/building-autoencoders-in-keras.html
to my own case where i use 1d inputs:
from keras.layers import Input, Dense, Conv1D, MaxPooling1D, UpSampling1D
from keras.models import Model
from keras import backend as K
import scipy as scipy
import numpy as np
mat = scipy.io.loadmat('edata.mat')
emat = mat['edata']
input_img = Input(shape=(64,1)) # adapt this if using `channels_first` image data format
x = Conv1D(32, (9), activation='relu', padding='same')(input_img)
x = MaxPooling1D((4), padding='same')(x)
x = Conv1D(16, (9), activation='relu', padding='same')(x)
x = MaxPooling1D((4), padding='same')(x)
x = Conv1D(8, (9), activation='relu', padding='same')(x)
encoded = MaxPooling1D(4, padding='same')(x)
x = Conv1D(8, (9), activation='relu', padding='same')(encoded)
x = UpSampling1D((4))(x)
x = Conv1D(16, (9), activation='relu', padding='same')(x)
x = UpSampling1D((4))(x)
x = Conv1D(32, (9), activation='relu')(x)
x = UpSampling1D((4))(x)
decoded = Conv1D(1, (9), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
x_train = emat[:,0:80000]
x_train = np.reshape(x_train, (x_train.shape[1], 64, 1))
x_test = emat[:,80000:120000]
x_test = np.reshape(x_test, (x_test.shape[1], 64, 1))
from keras.callbacks import TensorBoard
autoencoder.fit(x_train, x_train,
epochs=50,
batch_size=128,
shuffle=True,
validation_data=(x_test, x_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
however, i receive this error when i try to run the autoencoder.fit():
ValueError: Error when checking target: expected conv1d_165 to have
shape (None, 32, 1) but got array with shape (80000, 64, 1)
i know i'm probably doing something wrong when i set up my layers, i just changed the maxpool and conv2d sizes to a 1d form...i have very little experience with keras or autoencoders, anyone see what i'm doing wrong?
thanks
EDIT:
the error when i run it on a fresh console:
ValueError: Error when checking target: expected conv1d_7 to have
shape (None, 32, 1) but got array with shape (80000, 64, 1)
here is the output of autoencoder.summary()
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 64, 1) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 64, 32) 320
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 16, 32) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 16, 16) 4624
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 4, 16) 0
_________________________________________________________________
conv1d_3 (Conv1D) (None, 4, 8) 1160
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 1, 8) 0
_________________________________________________________________
conv1d_4 (Conv1D) (None, 1, 8) 584
_________________________________________________________________
up_sampling1d_1 (UpSampling1 (None, 4, 8) 0
_________________________________________________________________
conv1d_5 (Conv1D) (None, 4, 16) 1168
_________________________________________________________________
up_sampling1d_2 (UpSampling1 (None, 16, 16) 0
_________________________________________________________________
conv1d_6 (Conv1D) (None, 8, 32) 4640
_________________________________________________________________
up_sampling1d_3 (UpSampling1 (None, 32, 32) 0
_________________________________________________________________
conv1d_7 (Conv1D) (None, 32, 1) 289
=================================================================
Total params: 12,785
Trainable params: 12,785
Non-trainable params: 0
_________________________________________________________________
Since the autoencoder output should reconstruct the input, a minimum requirement is that their dimensions should match, right?
Looking at your autoencoder.summary(), it is easy to confirm that this is not the case: your input is of shape (64,1), while the output of your last convolutional layer conv1d_7 is (32,1) (we ignore the None in the first dimension, since they refer to the batch size).
Let's have a look at the example in the Keras blog you link to (it is a 2D autoencoder, but the idea is the same):
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
input_img = Input(shape=(28, 28, 1)) # adapt this if using `channels_first` image data format
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
Here is the result of autoencoder.summary() in this case:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 28, 28, 1) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 28, 28, 16) 160
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 16) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 14, 14, 8) 1160
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 8) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 7, 7, 8) 584
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 4, 4, 8) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 4, 4, 8) 584
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 8, 8, 8) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 8, 8, 8) 584
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 16, 16, 8) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 14, 14, 16) 1168
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 28, 28, 16) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 28, 28, 1) 145
=================================================================
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
It is easy to confirm that here the dimensions of the input and the output (last convolutional layer conv2d_7) are indeed both (28, 28, 1).
So, the summary() method is your friend when building autoencoders; you should experiment with the parameters until you are sure that you produce an output of the same dimensionality as your input. I managed to do so with your autoencoder simply by changing the size argument of the last UpSampling1D layer from 4 to 8:
input_img = Input(shape=(64,1))
x = Conv1D(32, (9), activation='relu', padding='same')(input_img)
x = MaxPooling1D((4), padding='same')(x)
x = Conv1D(16, (9), activation='relu', padding='same')(x)
x = MaxPooling1D((4), padding='same')(x)
x = Conv1D(8, (9), activation='relu', padding='same')(x)
encoded = MaxPooling1D(4, padding='same')(x)
x = Conv1D(8, (9), activation='relu', padding='same')(encoded)
x = UpSampling1D((4))(x)
x = Conv1D(16, (9), activation='relu', padding='same')(x)
x = UpSampling1D((4))(x)
x = Conv1D(32, (9), activation='relu')(x)
x = UpSampling1D((8))(x) ## <-- change here (was 4)
decoded = Conv1D(1, (9), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
In which case, the autoencoder.summary() becomes:
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 64, 1) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 64, 32) 320
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 16, 32) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 16, 16) 4624
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 4, 16) 0
_________________________________________________________________
conv1d_3 (Conv1D) (None, 4, 8) 1160
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 1, 8) 0
_________________________________________________________________
conv1d_4 (Conv1D) (None, 1, 8) 584
_________________________________________________________________
up_sampling1d_1 (UpSampling1 (None, 4, 8) 0
_________________________________________________________________
conv1d_5 (Conv1D) (None, 4, 16) 1168
_________________________________________________________________
up_sampling1d_2 (UpSampling1 (None, 16, 16) 0
_________________________________________________________________
conv1d_6 (Conv1D) (None, 8, 32) 4640
_________________________________________________________________
up_sampling1d_3 (UpSampling1 (None, 64, 32) 0
_________________________________________________________________
conv1d_7 (Conv1D) (None, 64, 1) 289
=================================================================
Total params: 12,785
Trainable params: 12,785
Non-trainable params: 0
with the dimensionality of your input and output matched, as it should be...
I'm trying to implement a convolutional neural network within Keras using a TF backend for image segmentation of 111 images of size 141 x 166. When I run the code below, I get the error message:
Error when checking target: expected dense_36 to have 2 dimensions, but got array with shape (88, 141, 166, 1)
My X_train variable is the shape (88, 141, 166, 1) as well as the y_train variable. My X_test variable is the shape (23, 141, 166, 1) as well as the y_test variable, as split by the function train_test_split from sklearn.
I'm not sure what the error message means as per dense_36. I have tried using the Flatten() function before fitting the model, but it says that I have a ndim = 2 and cannot be flattened.
# set input
batch_size = 111
num_epochs = 50
img_rows = 141
img_cols = 166
input_shape = (img_rows, img_cols, 1)
num_classes = img_rows*img_cols
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 4)
X_train = X_train.astype('float32')
X_test = X_train.astype('float32')
# CNN itself
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# compile CNN
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
# fit CNN
model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs,
verbose=1, validation_data=(X_test, y_test))
My model summary is:
Layer (type) Output Shape Param #
=================================================================
conv2d_35 (Conv2D) (None, 139, 164, 32) 320
_________________________________________________________________
conv2d_36 (Conv2D) (None, 137, 162, 64) 18496
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 68, 81, 64) 0
_________________________________________________________________
dropout_35 (Dropout) (None, 68, 81, 64) 0
_________________________________________________________________
flatten_28 (Flatten) (None, 352512) 0
_________________________________________________________________
dense_33 (Dense) (None, 128) 45121664
_________________________________________________________________
dropout_36 (Dropout) (None, 128) 0
_________________________________________________________________
dense_34 (Dense) (None, 2) 258
_________________________________________________________________
Total params: 45,140,738
Trainable params: 45,140,738
Non-trainable params: 0
_________________________________________________________________
None