I am trying to use U-net network architeture for stereo vision.
I have datasets with 3 different image sizes (1240x368, 1224x368 and 1384x1104).
Here is My whole class:
import pickle
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.utils import np_utils
import sys, numpy as np
import keras
import cv2
pkl_file = open('data.p', 'rb')
dict = pickle.load(pkl_file)
X_data = dict['images']
Y_data = dict['disparity']
data_num = len(X_data)
train_num = int(data_num * 0.8)
X_train = X_data[:train_num]
X_test = X_data[train_num:]
Y_train = Y_data[:train_num]
Y_test = Y_data[train_num:]
def gen(X, Y):
while True:
for x, y in zip(X, Y):
yield x, y
model = Sequential()
model.add(Convolution2D(6, (2, 2), input_shape=(None, None, 6), activation='relu', padding='same'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(3, (3, 3), activation='relu'))
model.compile(loss=['mse'], optimizer='adam', metrics=['accuracy'])
model.fit_generator(gen(X_train, Y_train), steps_per_epoch=len(X_train), epochs=5)
scores = model.evaluate(X_test, Y_test, verbose=0)
When I try to run this code, I get an error in which it says:
Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
I resized the pictures to be divisible by 8 since I have 3 maxpool layers.
As input I put 2 images (I am doing stereo vision) and as an output I get disparity map for the first image. I am concatenating 2 images by putting the second one in third dimension (np.concatenate((img1,img2), axis=-1).
Can somebody tell me what I am doing wrong?
Here is my trace:
Traceback (most recent call last):
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
[[Node: loss/conv2d_transpose_9_loss/sub = Sub[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/loss/conv2d_transpose_9_loss/sub_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv2d_transpose_9/Relu-1-0-TransposeNCHWToNHWC-LayoutOptimizer, _arg_conv2d_transpose_9_target_0_2/_303)]]
[[Node: loss/mul/_521 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2266_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I tried resizing pictures and learning works, but since as a result I get disparity maps, resizing is not a good option. Does anybody have any advice?
If the picture is too big to fit in conv2dTransponse, you can use Cropping2d layer so it crops the picture on wished size. This works if input picture has even number of pixels.
Related
I am new to deep learning and neural network so I need help understanding why this is happening and how i can fix it.
I have a training size of 7500 images
This is my model
img_size = 50
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(img_size, img_size, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(learning_rate=2*1e-4),
metrics=['acc'])
# Date processing
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
# This is the target directory
train_dir,
target_size=(img_size, img_size),
batch_size=20,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(img_size, img_size),
batch_size=20,
class_mode='binary')
# Train the Model
history = model.fit(
train_generator,
steps_per_epoch=375, #train_sample_size/data_batch_size
epochs=100,
validation_data=validation_generator,
validation_steps=50)
I have tried changing the parameters, such as adding dropout, changing batch size etc.. but still get a really high loss. The loss would be in the negative 20million and just keep increases.
I am running a VAE in Keras. the model compiles, and its summary is :
however, when I try to train the model I get the following error:
ValueError: Dimensions must be equal, but are 32 and 16 for '{{node vae_mlp/tf_op_layer_AddV2_14/AddV2_14}} = AddV2[T=DT_FLOAT, _cloned=true](vae_mlp/tf_op_layer_Mul_10/Mul_10, vae_mlp/tf_op_layer_Mul_11/Mul_11)' with input shapes: [16,32,32], [16].
The 16 is the batch size. I know because if I change to any number greater than 1, I get the same error that mentions the batch size (and it works for a batch size of 1). I suspect that the problem is that stimuli have 3 channels and that for some reason, it treats it as if it is greyscaled. But I am not sure.
I am attaching the full code as well:
"""### VAE Cifar 10"""
from keras import layers
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from keras.layers import Dropout
from keras import regularizers
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
input_shape=(x_train.shape[1], x_train.shape[2], x_train.shape[3])
original_dim=x_train.shape[1]*x_train.shape[2]
latent_dim = 12
import keras
#encoder architecture
encoder_input = keras.Input(shape=input_shape)
cx=layers.Conv2D(filters=64,
kernel_size=(3, 3),
activation='relu',
padding='same')(encoder_input)
cx=layers.Conv2D(filters=64,
kernel_size=(3, 3),
activation='relu',
input_shape=(32, 32, 3),padding='same')(cx)
cx=layers.MaxPool2D(2,2)(cx)
cx=layers.Dropout(0.2)(cx)
cx=layers.Conv2D(filters=64,
kernel_size=(3, 3),
activation='relu',padding='same')(cx)
cx=layers.Conv2D(filters=64,
kernel_size=(3, 3),
activation='relu',padding='same')(cx)
cx=layers.MaxPool2D(2,2)(cx)
cx=layers.Dropout(0.2)(cx)
cx=layers.Conv2D(filters=128,
kernel_size=(3, 3),
activation='relu',padding='same')(cx)
cx=layers.Conv2D(filters=128,
kernel_size=(3, 3),
activation='relu',padding='same')(cx)
cx=layers.MaxPool2D(2,2)(cx)
cx=layers.Dropout(0.2)(cx)
x=layers.Flatten()(cx)
z_mean=layers.Dense(latent_dim, activation='relu', name = 'z_mean')(x) #I removed the softmax layer
z_log_sigma=layers.Dense(latent_dim, activation='relu',name = 'z_sd' )(x)
from keras import backend as K #what is that...
def sampling(args):
z_mean, z_log_sigma = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=0.1)
return z_mean + K.exp(z_log_sigma) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_sigma])
# Create encoder
encoder = keras.Model(encoder_input, [z_mean, z_log_sigma, z], name='encoder')
encoder.summary()
# Get Conv2D shape for Conv2DTranspose operation in decoder
conv_shape = K.int_shape(cx)
# Create decoder
#look at : https://www.machinecurve.com/index.php/2019/12/30/how-to-create-a-variational-autoencoder-with-keras/
from keras.layers import Conv2DTranspose, Reshape
latent_inputs = keras.Input(shape=(latent_dim, ), name='z_sampling') #shape=(latent_dim,) or shape=late_dim?
d0 = layers.Dense(conv_shape[1] * conv_shape[2] * conv_shape[3], activation='relu')(latent_inputs)
d05 = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(d0)
d1=layers.Conv2DTranspose(filters=128,
kernel_size=(3, 3),
strides=2,
activation='relu',padding='same')(d05)#(latent_inputs)
d2=layers.Conv2DTranspose(filters=128,
kernel_size=(3, 3),
strides=2,
activation='relu',padding='same')(d1)
d3=layers.Conv2DTranspose(filters=64,
kernel_size=(3, 3),
strides=2,
activation='relu',padding='same')(d2)
d4=layers.Conv2DTranspose(filters=64,
kernel_size=(3, 3),
activation='relu',padding='same')(d3)
d5=layers.Conv2DTranspose(filters=64,
kernel_size=(3, 3),
activation='relu',
padding='same')(d4)
d6=layers.Conv2DTranspose(filters=64,
kernel_size=(3, 3),
activation='relu',
input_shape=input_shape,padding='same')(d5)
outputs = layers.Conv2D(filters=3, kernel_size=3, activation='sigmoid', padding='same', name='decoder_output')(d6) #Dense(128, activation='relu')
from keras import Model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# instantiate VAE model
outputs = decoder(encoder(encoder_input)[2])
vae = keras.Model(encoder_input, outputs, name='vae_mlp')
vae.summary()
#loss
reconstruction_loss = keras.losses.binary_crossentropy(encoder_input, outputs)
reconstruction_loss *= original_dim
kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
#batch size = 1 doens't break after one epoch
print('you use x_train_t')
vae.fit(x_train, x_train,
epochs=20,
batch_size=16,
validation_data=(x_test, x_test))
There are two things that required to solve the issue:
First, the way to attach the loss function to the model should be by:
vae.compile(optimizer='adam', loss=val_loss_func)
Second, before training one should run:
import tensorflow as tf
tf.config.run_functions_eagerly(True)
I am not sure what this does..
I am writing a code for running autoencoder on CIFAR10 dataset and see the reconstructed images.
The requirement is to create
Encoder with First Layer
Input shape: (32,32,3)
Conv2D Layer with 64 Filters of (3,3)
BatchNormalization layer
ReLu activation
2D MaxpoolingLayer with (2,2) filter
Encoder with Second Layer
Conv2D layer with 16 filters (3,3)
BatchNormalization layer
ReLu activation
2D MaxpoolingLayer with (2,2) filter
Final Encoded as MaxPool with (2,2) with all previous layers
Decoder with First Layer
Input shape: encoder output
Conv2D Layer with 16 Filters of (3,3)
BatchNormalization layer
ReLu activation
UpSampling2D with (2,2) filter
Decoder with Second Layer
Conv2D Layer with 32 Filters of (3,3)
BatchNormalization layer
ReLu activation
UpSampling2D with (2,2) filter
Final Decoded as Sigmoid with all previous layers
I understand that
When we are creating Convolutional Autoencoder (or any AE), we need to pass the output of the previous layer to the next layer.
So, when I create the first Conv2D layer with ReLu and then perform BatchNormalization .. in which I pass the Conv2D layer .. right?
But when I do MaxPooling2D .. what should I pass .. BatchNormalization output or Conv2D layer output?
Also, is there any order in which I should be performing these operations?
Conv2D --> BatchNormalization --> MaxPooling2D
OR
Conv2D --> MaxPooling2D --> BatchNormalization
I am attaching my code below ... I have attempted it to two different ways and hence getting different outputs (in terms of model summary and also model training graph)
Can someone please help me by explaining which is the correct method (Method-1 or Method-2)? Also, how do I understand which graph shows better model performance?
Method - 1
input_image = Input(shape=(32, 32, 3))
### Encoder
conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_image)
bnorm1_1 = BatchNormalization()(conv1_1)
mpool1_1 = MaxPooling2D((2, 2), padding='same')(conv1_1)
conv1_2 = Conv2D(16, (3, 3), activation='relu', padding='same')(mpool1_1)
borm1_2 = BatchNormalization()(conv1_2)
encoder = MaxPooling2D((2, 2), padding='same')(conv1_2)
### Decoder
conv2_1 = Conv2D(16, (3, 3), activation='relu', padding='same')(encoder)
bnorm2_1 = BatchNormalization()(conv2_1)
up1_1 = UpSampling2D((2, 2))(conv2_1)
conv2_2 = Conv2D(32, (3, 3), activation='relu', padding='same')(up1_1)
bnorm2_2 = BatchNormalization()(conv2_2)
up2_1 = UpSampling2D((2, 2))(conv2_2)
decoder = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(up2_1)
model = Model(input_image, decoder)
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()
history = model.fit(trainX, trainX,
epochs=50,
batch_size=1000,
shuffle=True,
verbose=2,
validation_data=(testX, testX)
)
As an output of the model summary, I get this
Total params: 18,851
Trainable params: 18,851
Non-trainable params: 0
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
Method - 2
input_image = Input(shape=(32, 32, 3))
### Encoder
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_image)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
encoder = MaxPooling2D((2, 2), padding='same')(x)
### Decoder
x = Conv2D(16, (3, 3), activation='relu', padding='same')(encoder)
x = BatchNormalization()(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = UpSampling2D((2, 2))(x)
decoder = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
model = Model(input_image, decoder)
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()
history = model.fit(trainX, trainX,
epochs=50,
batch_size=1000,
shuffle=True,
verbose=2,
validation_data=(testX, testX)
)
As an output of the model summary, I get this
Total params: 19,363
Trainable params: 19,107
Non-trainable params: 256
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
In method 1, BatchNormalization layers does not exist in the compiled model, as the output of these layers are not used anywhere. You can check this by running model1.summary()
Method 2 is perfectly alright.
Order of the operations :
Conv2D --> BatchNormalization --> MaxPooling2D is usually the common approach.
Though either order would work since, since BatchNorm is just mean and variance normalization.
Edit:
For Conv2D --> BatchNormalization --> MaxPooling2D :
conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_image)
bnorm1_1 = BatchNormalization()(conv1_1)
mpool1_1 = MaxPooling2D((2, 2), padding='same')(bnorm1_1)
and then use mpool1_1 as input for next layer.
For Conv2D --> MaxPooling2D --> BatchNormalization:
conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_image)
mpool1_1 = MaxPooling2D((2, 2), padding='same')(conv1_1)
bnorm1_1 = BatchNormalization()(mpool1_1)
and then use bnorm1_1 as input for next layer.
To effectively use BatchNormalization layer, you should always use it before activation.
Instead of:
conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_image)
bnorm1_1 = BatchNormalization()(conv1_1)
mpool1_1 = MaxPooling2D((2, 2), padding='same')(bnorm1_1)
Use it like this:
conv1_1 = Conv2D(64, (3, 3), padding='same')(input_image)
bnorm1_1 = BatchNormalization()(conv1_1)
act_1 = Activation('relu')(bnorm1_1)
mpool1_1 = MaxPooling2D((2, 2), padding='same')(act_1)
For more details, check here:
Where do I call the BatchNormalization function in Keras?
I am doing image classification but i got the error for calculate the accuracy ,please help me how to do it.
this is my model :
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(6))
model.add(Activation('softmax'))
I want classifly the image like this :
label_dict={'0':'buildings',
this is my classification labels :
'1':'forest',
'2':'glacier',
'3':'mountain',
'4':'sea' ,
'5':'street' }
I am using categorical_crossentropy:
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=['accuracy'])
I am predicting classes :
pred=model.predict_classes(test)
I am calculate the test accuracy , but i got some errors:
print('Test loss:', pred[0])
print('Test accuracy:',pred[1])
Test loss: 5
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-28-b74afa5e2da9> in <module>
1 print('Test loss:', pred[0])
----> 2 print('Test accuracy:',pred[1])
IndexError: index 1 is out of bounds for axis 0 with size 1
If the size of array is n , max index value is n-1 .
So you can access to pred[0] only
I am trying to rewrite a Sequential model of Network In Network CNN using Functional API. I use it with CIFAR-10 dataset. The Sequential model trains without a problem, but Functional API model gets stuck. I probably missed something when rewriting the model.
Here's a reproducible example:
Dependencies:
from keras.models import Model, Input, Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, Activation
from keras.utils import to_categorical
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.datasets import cifar10
Loading the dataset:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train / 255.
x_test = x_test / 255.
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
input_shape = x_train[0,:,:,:].shape
Here's the working Sequential model:
model = Sequential()
#mlpconv block1
model.add(Conv2D(32, (5, 5), activation='relu',padding='valid',input_shape=input_shape))
model.add(Conv2D(32, (1, 1), activation='relu'))
model.add(Conv2D(32, (1, 1), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.5))
#mlpconv block2
model.add(Conv2D(64, (3, 3), activation='relu',padding='valid'))
model.add(Conv2D(64, (1, 1), activation='relu'))
model.add(Conv2D(64, (1, 1), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.5))
#mlpconv block3
model.add(Conv2D(128, (3, 3), activation='relu',padding='valid'))
model.add(Conv2D(32, (1, 1), activation='relu'))
model.add(Conv2D(10, (1, 1), activation='relu'))
model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))
Compile and train:
model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['acc'])
_ = model.fit(x=x_train, y=y_train, batch_size=32,
epochs=200, verbose=1,validation_split=0.2)
In three epochs the model gets close to 50% validation accuracy.
Here's the same model rewritten using Functional API:
model_input = Input(shape=input_shape)
#mlpconv block1
x = Conv2D(32, (5, 5), activation='relu',padding='valid')(model_input)
x = Conv2D(32, (1, 1), activation='relu')(x)
x = Conv2D(32, (1, 1), activation='relu')(x)
x = MaxPooling2D((2,2))(x)
x = Dropout(0.5)(x)
#mlpconv block2
x = Conv2D(64, (3, 3), activation='relu',padding='valid')(x)
x = Conv2D(64, (1, 1), activation='relu')(x)
x = Conv2D(64, (1, 1), activation='relu')(x)
x = MaxPooling2D((2,2))(x)
x = Dropout(0.5)(x)
#mlpconv block3
x = Conv2D(128, (3, 3), activation='relu',padding='valid')(x)
x = Conv2D(32, (1, 1), activation='relu')(x)
x = Conv2D(10, (1, 1), activation='relu')(x)
x = GlobalAveragePooling2D()(x)
x = Activation(activation='softmax')(x)
model = Model(model_input, x, name='nin_cnn')
This model is then compiled using the same parameters as the Sequential model. When trained, the training accuracy gets stuck at 0.10, meaning the model doesn't get better and randomly chooses one of 10 classes.
What did I miss when rewriting the model? When calling model.summary() the models look identical except for the explicit Input layer in the Functional API model.
Removing activation in the final conv layer solves the problem:
x = Conv2D(10, (1, 1))(x)
Still not sure why the Sequential model works fine with activation in that layer.