Keras - Char 74k Character Recoginition - CNN - machine-learning

I followed the following blog for Character Recognition using CNN.
http://ankivil.com/kaggle-first-steps-with-julia-chars74k-first-place-using-convolutional-neural-networks
The only change I did was dim_ordering="th" latest keras compatibility.
model = Sequential()
model.add(Convolution2D(128, 3, 3, border_mode='same', init='he_normal', activation = 'relu', input_shape=(1, img_rows, img_cols)))
print model.output_shape
model.add(Convolution2D(128, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
print model.output_shape
model.add(Convolution2D(256, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(Convolution2D(256, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
print model.output_shape
model.add(Convolution2D(512, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(Convolution2D(512, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(Convolution2D(512, 3, 3, border_mode='same', init='he_normal', activation = 'relu'))
print model.output_shape
model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
print model.output_shape
model.add(Flatten())
print model.output_shape
model.add(Dense(4096, init='he_normal', activation = 'relu'))
print model.output_shape
model.add(Dropout(0.5))
print model.output_shape
model.add(Dense(4096, init='he_normal', activation = 'relu'))
print model.output_shape
model.add(Dropout(0.5))
print model.output_shape
model.add(Dense(nb_classes, init='he_normal', activation = 'softmax'))
print model.output_shape
I am getting a very poor accuracy of 0.07 after 50 - 60 Iterations and it got stuck there.
Can you please suggest some pointers for me ? I am open for other models performing OCR using CNN.
Thanks,
Siva

Related

3D CNN model error in output shape. What is wrong exactly?

I created the following 3D CNN model where input shape is ((20,64,64),3)
def model_2():
global model
model = Sequential()
# 1st layer group
model.add(Conv3D(64, (3, 3, 3), activation="relu",name="conv1",
input_shape=(SEQUENCE_LENGTH,
IMAGE_HEIGHT, IMAGE_WIDTH, 3),
strides=(1, 1, 1), padding="same"))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name="pool1", padding="valid"))
# 2nd layer group
model.add(Conv3D(128, (3, 3, 3), activation="relu",name="conv2",
strides=(1, 1, 1), padding="same"))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), name="pool2", padding="valid"))
# 3rd layer group
model.add(Conv3D(256, (3, 3, 3), activation="relu",name="conv3a",
strides=(1, 1, 1), padding="same"))
model.add(Conv3D(256, (3, 3, 3), activation="relu",name="conv3b",
strides=(1, 1, 1), padding="same"))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), name="pool3", padding="valid"))
# 4th layer group
model.add(Conv3D(512, (3, 3, 3), activation="relu",name="conv4a",
strides=(1, 1, 1), padding="same"))
model.add(Conv3D(512, (3, 3, 3), activation="relu",name="conv4b",
strides=(1, 1, 1), padding="same"))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), name="pool4", padding="valid"))
# 5th layer group
model.add(Conv3D(512, (3, 3, 3), activation="relu",name="conv5a",
strides=(1, 1, 1), padding="same"))
model.add(Conv3D(512, (3, 3, 3), activation="relu",name="conv5b",
strides=(1, 1, 1), padding="same"))
model.add(ZeroPadding3D(padding=(0, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), name="pool5", padding="valid"))
model.add(Flatten())
# FC layers group
model.add(Dense(4096, activation='relu', name='fc6'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu', name='fc7'))
model.add(Dropout(.5))
model.add(Dense(487, activation='softmax', name='fc8'))
########################################################################################################################
# Display the models summary.
model.summary()
# Return the constructed convlstm model.
return model
Create an Instance of Early Stopping Callback
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True)
Compile the model and specify loss function, optimizer and metrics values to the model
model2.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])
Start training the model.
model2_training_history = model2.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 4,
shuffle = True, validation_split = 0.2,
callbacks = [early_stopping_callback])
Got the following error:
Epoch 1/50
ValueError Traceback (most recent call last)
in
6
7 # Start training the model.
----> 8 model2_training_history = model2.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 4,
9 shuffle = True, validation_split = 0.2,
10 callbacks = [early_stopping_callback])
1 frames
/usr/local/lib/python3.8/dist-packages/keras/engine/training.py in tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1051, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 948, in compute_loss
return self.compiled_loss(
File "/usr/local/lib/python3.8/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 1787, in categorical_crossentropy
return backend.categorical_crossentropy(
File "/usr/local/lib/python3.8/dist-packages/keras/backend.py", line 5119, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (4, 4) and (4, 487) are incompatible

Why is my loss so high and accuracy stays at 0.1?

I am new to deep learning and neural network so I need help understanding why this is happening and how i can fix it.
I have a training size of 7500 images
This is my model
img_size = 50
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(img_size, img_size, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(learning_rate=2*1e-4),
metrics=['acc'])
# Date processing
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
# This is the target directory
train_dir,
target_size=(img_size, img_size),
batch_size=20,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(img_size, img_size),
batch_size=20,
class_mode='binary')
# Train the Model
history = model.fit(
train_generator,
steps_per_epoch=375, #train_sample_size/data_batch_size
epochs=100,
validation_data=validation_generator,
validation_steps=50)
I have tried changing the parameters, such as adding dropout, changing batch size etc.. but still get a really high loss. The loss would be in the negative 20million and just keep increases.

Input 0 of layer conv1d_2 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 128). To develop LSTM-CNN

I am trying to incorporate a CNN layer into the LSTM network as shown.
model = Sequential()
model.add(LSTM(64, return_sequences = True, input_shape=(X_train.shape[1], X_train.shape[2]),activation='relu'))
model.add(Dropout(0.1)) model.add(LSTM(128, activation= 'relu'))
model.add(Conv1D(32, kernel_size=3, activation='relu'))
model.add(Flatten()) model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
But it is giving the following error about the input shape. Please help to resolve the issue.
Try this:
model = Sequential()
model.add(LSTM(64, return_sequences = True, input_shape = (X_train.shape[1], X_train.shape[2]), activation='relu'))
model.add(Dropout(0.1))
model.add(LSTM(128, activation = 'relu', return_sequences = True))
model.add(Conv1D(32, kernel_size= 1, input_shape = (None, 128, 1), activation = 'relu'))
model.add(Flatten())
model.add(Dense(1))

Determine the Batch input shape

This is a part of my code:
def lstm_model_structure(training_data,batch_size, num_neurons):
train, test = training_data[:, :8], training_data[:, 8:14]
train = train.reshape(train.shape[0], 1, train.shape[1])
model = Sequential()
model.add(LSTM(units=num_neurons,activation="tanh", kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01), name='input',return_sequences=True, batch_input_shape=(batch_size, X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, name='lstm1',return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, name='lstm2'))
model.add(Dropout(0.2))
model.add(Dense(6))
# Compile the model
adaDelta = optimizers.Adadelta(learning_rate=0.001, decay=1e-6)
model.compile(loss='mean_squared_error', optimizer="adaDelta")
model.fit(trainX, trainy,validation_data=(testX, testy), epochs=350, batch_size=7, verbose=1, shuffle=False)
return model
Where the shape of training_data is (9,16), batch size is 9, num_neurons is 52.
The previous model works fine(Without validation data) but when i split the data in order to have validation data as following:
def lstm_model_structure(training_data,batch_size, num_neurons):
train, test = training_data[:, :8], training_data[:, 8:14]
trainX, testX = train[:7, :], train[7:, :]
trainy, testy = test[:7], test[7:]
trainX = trainX.reshape(trainX.shape[0], 1, trainX.shape[1])
testX = testX.reshape(testX.shape[0], 1, testX.shape[1])
model = Sequential()
model.add(LSTM(units=num_neurons, activation="tanh", kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01), name='input',return_sequences=True, batch_input_shape=(batch_size, trainX.shape[1], trainX.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, name='lstm1',return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, name='lstm2'))
model.add(Dropout(0.2))
model.add(Dense(6))
# Compile the model
adaDelta = optimizers.Adadelta(learning_rate=0.001, decay=1e-6)
model.compile(loss='mean_squared_error', optimizer="adaDelta")
model.fit(trainX, trainy,validation_data=(testX, testy), epochs=350, batch_size=7, verbose=1, shuffle=False)
return model
Where:
print(trainX.shape) >> (7, 1, 8)
print(trainy.shape) >> (7, 6)
print(testX.shape) >> (2, 1, 8)
print(testy.shape) >> (2, 6)
I got the following error when i fit the model (Incompatible shapes):
InvalidArgumentError: Incompatible shapes: [7] vs. [9]
[[{{node training_2/Adadelta/gradients/loss_2/dense_3_loss/mean_squared_error/weighted_loss/mul_grad/Mul_1}}]]
How can i determine the correct batch input shape?

Incompatible shapes in keras

I am trying to use U-net network architeture for stereo vision.
I have datasets with 3 different image sizes (1240x368, 1224x368 and 1384x1104).
Here is My whole class:
import pickle
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.utils import np_utils
import sys, numpy as np
import keras
import cv2
pkl_file = open('data.p', 'rb')
dict = pickle.load(pkl_file)
X_data = dict['images']
Y_data = dict['disparity']
data_num = len(X_data)
train_num = int(data_num * 0.8)
X_train = X_data[:train_num]
X_test = X_data[train_num:]
Y_train = Y_data[:train_num]
Y_test = Y_data[train_num:]
def gen(X, Y):
while True:
for x, y in zip(X, Y):
yield x, y
model = Sequential()
model.add(Convolution2D(6, (2, 2), input_shape=(None, None, 6), activation='relu', padding='same'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(256, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(128, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(UpSampling2D(size=(2, 2)))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(64, (3, 3), activation='relu'))
model.add(Conv2DTranspose(3, (3, 3), activation='relu'))
model.compile(loss=['mse'], optimizer='adam', metrics=['accuracy'])
model.fit_generator(gen(X_train, Y_train), steps_per_epoch=len(X_train), epochs=5)
scores = model.evaluate(X_test, Y_test, verbose=0)
When I try to run this code, I get an error in which it says:
Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
I resized the pictures to be divisible by 8 since I have 3 maxpool layers.
As input I put 2 images (I am doing stereo vision) and as an output I get disparity map for the first image. I am concatenating 2 images by putting the second one in third dimension (np.concatenate((img1,img2), axis=-1).
Can somebody tell me what I am doing wrong?
Here is my trace:
Traceback (most recent call last):
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1,370,1242,3] vs. [1,368,1240,3]
[[Node: loss/conv2d_transpose_9_loss/sub = Sub[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/loss/conv2d_transpose_9_loss/sub_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv2d_transpose_9/Relu-1-0-TransposeNCHWToNHWC-LayoutOptimizer, _arg_conv2d_transpose_9_target_0_2/_303)]]
[[Node: loss/mul/_521 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2266_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I tried resizing pictures and learning works, but since as a result I get disparity maps, resizing is not a good option. Does anybody have any advice?
If the picture is too big to fit in conv2dTransponse, you can use Cropping2d layer so it crops the picture on wished size. This works if input picture has even number of pixels.

Resources