CNN learning stagnation - machine-learning

CNN learning stagnation - machine-learning

I have created a simulation of the CNN I am trying to use on video data set.
I set the test data to all one single image on all frames for positive examples and 0 for negative examples. I thought this would learn very quickly. But it does not move at all.
Using current versions of Keras & Tensorflow on Windows 10 64bit.
First question, is my logic wrong? Should I expect the learning of this test data to quickly reach high accuracy?
Is there something wrong with my model or parameters? I have been trying a number of changes but still get the same problem.
Is the sample size (56) too small?
# testing feature extraction model.
import time
import numpy as np, cv2
import sys
import os
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Conv3D, MaxPooling3D
from keras.optimizers import SGD,rmsprop, adam
from keras import regularizers
from keras.initializers import Constant
from keras.models import Model
#set gpu options
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=.99, allocator_type = 'BFC')
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True, gpu_options=gpu_options))
config = tf.ConfigProto()
batch_size = 5
num_classes = 1
epochs = 50
nvideos = 56
nframes = 55
nchan = 3
nrows = 480
ncols = 640
#load any single image, resize if needed
img = cv2.imread('C:\\Users\\david\\Documents\\AutonomousSS\\single frame.jpg',cv2.IMREAD_COLOR)
img = cv2.resize(img,(640,480))
x_learn = np.random.randint(0,255,(nvideos,nframes,nrows,ncols,nchan),dtype=np.uint8)
y_learn = np.array([[1],[1],[1],[0],[1],[0],[1],[0],[1],[0],
[1],[0],[0],[1],[0],[0],[1],[0],[1],[0],
[1],[0],[1],[1],[0],[1],[0],[0],[1],[1],
[1],[0],[1],[0],[1],[0],[1],[0],[1],[0],
[0],[1],[0],[0],[1],[0],[1],[0],[1],[0],
[1],[1],[0],[1],[0],[0]],np.uint8)
#each sample, each frame is either the single image for postive examples or 0 for negative examples.
for i in range (nvideos):
if y_learn[i] == 0 :
x_learn[i]=0
else:
x_learn[i,:nframes]=img
#build model
m_loss = 'mean_squared_error'
m_opt = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
m_met = 'acc'
model = Sequential()
# 1st layer group
model.add(Conv3D(32, (3, 3,3), activation='relu',padding="same", name="conv1a", strides=(3, 3, 3),
kernel_initializer = 'glorot_normal',
trainable=False,
input_shape=(nframes,nrows,ncols,nchan)))
#model.add(BatchNormalization(axis=1))
model.add(Conv3D(32, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv1b", activation="relu"))
#model.add(BatchNormalization(axis=1))
model.add(MaxPooling3D(padding="valid", trainable=False, pool_size=(1, 5, 5), name="pool1", strides=(2, 2, 2)))
# 2nd layer group
model.add(Conv3D(128, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv2a", activation="relu"))
model.add(Conv3D(128, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv2b", activation="relu"))
#model.add(BatchNormalization(axis=1))
model.add(MaxPooling3D(padding="valid", trainable=False, pool_size=(1, 5, 5), name="pool2", strides=(2, 2, 2)))
# 3rd layer group
model.add(Conv3D(256, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv3a", activation="relu"))
model.add(Conv3D(256, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv3b", activation="relu"))
#model.add(BatchNormalization(axis=1))
model.add(MaxPooling3D(padding="valid", trainable=False, pool_size=(1, 5, 5), name="pool3", strides=(2, 2, 2)))
# 4th layer group
model.add(Conv3D(512, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv4a", activation="relu"))
model.add(Conv3D(512, (3, 3, 3), trainable=False, strides=(1, 1, 1), padding="same", name="conv4b", activation="relu"))
#model.add(BatchNormalization(axis=1))
model.add(MaxPooling3D(padding="valid", trainable=False, pool_size=(1, 5, 5), name="pool4", strides=(2, 2, 2)))
model.add(Flatten(name='flatten',trainable=False))
model.add(Dense(512,activation='relu', trainable=True,name='den0'))
model.add(Dense(num_classes,activation='softmax',name='den1'))
print (model.summary())
#compile model
model.compile(loss=m_loss,
optimizer=m_opt,
metrics=[m_met])
print ('compiled')
#set callbacks
from keras import backend as K
K.set_learning_phase(0) #set learning phase
tb = keras.callbacks.TensorBoard(log_dir=sample_root_path+'logs', histogram_freq=0,
write_graph=True, write_images=False)
tb.set_model(model)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2,verbose=1,
patience=2, min_lr=0.000001)
reduce_lr.set_model(model)
ear_stop = keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=4, verbose=1, mode='auto')
ear_stop.set_model(model)
#fit
history = model.fit(x_learn, y_learn,
batch_size=batch_size,
callbacks=[reduce_lr,tb, ear_stop],
verbose=1,
validation_split=0.1,
shuffle = True,
epochs=epochs)
score = model.evaluate(x_learn, y_learn, batch_size=batch_size)
print(str(model.metrics_names) + ": " + str(score))
As usual, thanks for any and all help.
added output...
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1a (Conv3D) (None, 19, 160, 214, 32) 2624
_________________________________________________________________
conv1b (Conv3D) (None, 19, 160, 214, 32) 27680
_________________________________________________________________
pool1 (MaxPooling3D) (None, 10, 78, 105, 32) 0
_________________________________________________________________
conv2a (Conv3D) (None, 10, 78, 105, 128) 110720
_________________________________________________________________
conv2b (Conv3D) (None, 10, 78, 105, 128) 442496
_________________________________________________________________
pool2 (MaxPooling3D) (None, 5, 37, 51, 128) 0
_________________________________________________________________
conv3a (Conv3D) (None, 5, 37, 51, 256) 884992
_________________________________________________________________
conv3b (Conv3D) (None, 5, 37, 51, 256) 1769728
_________________________________________________________________
pool3 (MaxPooling3D) (None, 3, 17, 24, 256) 0
_________________________________________________________________
conv4a (Conv3D) (None, 3, 17, 24, 512) 3539456
_________________________________________________________________
conv4b (Conv3D) (None, 3, 17, 24, 512) 7078400
_________________________________________________________________
pool4 (MaxPooling3D) (None, 2, 7, 10, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 71680) 0
_________________________________________________________________
den0 (Dense) (None, 512) 36700672
_________________________________________________________________
den1 (Dense) (None, 1) 513
=================================================================
Total params: 50,557,281
Trainable params: 36,701,185
Non-trainable params: 13,856,096
_________________________________________________________________
None
compiled
Train on 50 samples, validate on 6 samples
Epoch 1/50
50/50 [==============================] - 20s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 2/50
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 3/50
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 4/50
45/50 [==========================>...] - ETA: 1s - loss: 0.5111 - acc: 0.4889
Epoch 00003: reducing learning rate to 0.00020000000949949026.
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 5/50
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 6/50
45/50 [==========================>...] - ETA: 1s - loss: 0.5111 - acc: 0.4889
Epoch 00005: reducing learning rate to 4.0000001899898055e-05.
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 7/50
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 8/50
45/50 [==========================>...] - ETA: 1s - loss: 0.4889 - acc: 0.5111
Epoch 00007: reducing learning rate to 8.000000525498762e-06.
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 9/50
50/50 [==============================] - 16s - loss: 0.5000 - acc: 0.5000 - val_loss: 0.5000 - val_acc: 0.5000
Epoch 00008: early stopping
56/56 [==============================] - 12s
['loss', 'acc']: [0.50000001516725334, 0.5000000127724239]

Your layers are set to trainable=False(apart from the last dense layer). Therefore your CNN cannot learn. In addition you won´t be able to train just on a single sample.
If you run into performance issues on your GPU switch to CPU or AWS or reduce your image size.

Related

How to store the gradients of Alexnet as numpy array (in each iteration) in Python?

I want to store the final gradient vector of a model as a numpy array. Is there an easy and intuitive way to do that using Tensorflow?
I want to store the gradient vectors of Alexnet (in a numpy array) for each iteration,, until convergence.

We can do it as shown below code -
import tensorflow as tf
import numpy as np
print(tf.__version__)
#Define the input tensor
x = tf.constant([3.0,6.0,9.0])
#Define the Gradient Function
with tf.GradientTape() as g:
g.watch(x)
y = x * x
dy_dx = g.gradient(y, x)
#Output Gradient Tensor
print("Output Gradient Tensor:",dy_dx)
#Convert to array
a = np.asarray(dy_dx)
print("Gradient array:",a)
print("Array shape:",a.shape)
print("Output type:",type(a))
The Output of the code is -
2.1.0
Output Gradient Tensor: tf.Tensor([ 6. 12. 18.], shape=(3,), dtype=float32)
Gradient array: [ 6. 12. 18.]
Array shape: (3,)
Output type: <class 'numpy.ndarray'>

Below is the model that resembles Alexnet architecture and capturing gradient for every epoch.
# (1) Importing dependency
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import numpy as np
np.random.seed(1000)
# (2) Get Data
import tflearn.datasets.oxflower17 as oxflower17
x, y = oxflower17.load_data(one_hot=True)
# (3) Create a sequential model
model = Sequential()
# 1st Convolutional Layer
model.add(Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding='valid'))
model.add(Activation('relu'))
# Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation before passing it to the next layer
model.add(BatchNormalization())
# 2nd Convolutional Layer
model.add(Conv2D(filters=256, kernel_size=(11,11), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation
model.add(BatchNormalization())
# 3rd Convolutional Layer
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 4th Convolutional Layer
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 5th Convolutional Layer
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation
model.add(BatchNormalization())
# Passing it to a dense layer
model.add(Flatten())
# 1st Dense Layer
model.add(Dense(4096, input_shape=(224*224*3,)))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# 2nd Dense Layer
model.add(Dense(4096))
model.add(Activation('relu'))
# Add Dropout
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# 3rd Dense Layer
model.add(Dense(1000))
model.add(Activation('relu'))
# Add Dropout
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# Output Layer
model.add(Dense(17))
model.add(Activation('softmax'))
model.summary()
# (4) Compile
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# (5) Define Gradient Function
def get_gradient_func(model):
grads = K.gradients(model.total_loss, model.trainable_weights)
inputs = model.model._feed_inputs + model.model._feed_targets + model.model._feed_sample_weights
func = K.function(inputs, grads)
return func
# (6) Train the model such that gradients are captured for every epoch
epoch_gradient = []
for epoch in range(1,5):
model.fit(x, y, batch_size=64, epochs= epoch, initial_epoch = (epoch-1), verbose=1, validation_split=0.2, shuffle=True)
get_gradient = get_gradient_func(model)
grads = get_gradient([x, y, np.ones(len(y))])
epoch_gradient.append(grads)
# (7) Convert to a 2 dimensiaonal array of (epoch, gradients) type
gradient = np.asarray(epoch_gradient)
print("Total number of epochs run:", epoch)
print("Gradient Array has the shape:",gradient.shape)
Output: gradient is the 2 dimensional array that has gradient captured for every epoch that retains the structure of gradient as per the network layers.
Model: "sequential_34"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_115 (Conv2D) (None, 54, 54, 96) 34944
_________________________________________________________________
activation_213 (Activation) (None, 54, 54, 96) 0
_________________________________________________________________
max_pooling2d_83 (MaxPooling (None, 27, 27, 96) 0
_________________________________________________________________
batch_normalization_180 (Bat (None, 27, 27, 96) 384
_________________________________________________________________
conv2d_116 (Conv2D) (None, 17, 17, 256) 2973952
_________________________________________________________________
activation_214 (Activation) (None, 17, 17, 256) 0
_________________________________________________________________
max_pooling2d_84 (MaxPooling (None, 8, 8, 256) 0
_________________________________________________________________
batch_normalization_181 (Bat (None, 8, 8, 256) 1024
_________________________________________________________________
conv2d_117 (Conv2D) (None, 6, 6, 384) 885120
_________________________________________________________________
activation_215 (Activation) (None, 6, 6, 384) 0
_________________________________________________________________
batch_normalization_182 (Bat (None, 6, 6, 384) 1536
_________________________________________________________________
conv2d_118 (Conv2D) (None, 4, 4, 384) 1327488
_________________________________________________________________
activation_216 (Activation) (None, 4, 4, 384) 0
_________________________________________________________________
batch_normalization_183 (Bat (None, 4, 4, 384) 1536
_________________________________________________________________
conv2d_119 (Conv2D) (None, 2, 2, 256) 884992
_________________________________________________________________
activation_217 (Activation) (None, 2, 2, 256) 0
_________________________________________________________________
max_pooling2d_85 (MaxPooling (None, 1, 1, 256) 0
_________________________________________________________________
batch_normalization_184 (Bat (None, 1, 1, 256) 1024
_________________________________________________________________
flatten_34 (Flatten) (None, 256) 0
_________________________________________________________________
dense_99 (Dense) (None, 4096) 1052672
_________________________________________________________________
activation_218 (Activation) (None, 4096) 0
_________________________________________________________________
dropout_66 (Dropout) (None, 4096) 0
_________________________________________________________________
batch_normalization_185 (Bat (None, 4096) 16384
_________________________________________________________________
dense_100 (Dense) (None, 4096) 16781312
_________________________________________________________________
activation_219 (Activation) (None, 4096) 0
_________________________________________________________________
dropout_67 (Dropout) (None, 4096) 0
_________________________________________________________________
batch_normalization_186 (Bat (None, 4096) 16384
_________________________________________________________________
dense_101 (Dense) (None, 1000) 4097000
_________________________________________________________________
activation_220 (Activation) (None, 1000) 0
_________________________________________________________________
dropout_68 (Dropout) (None, 1000) 0
_________________________________________________________________
batch_normalization_187 (Bat (None, 1000) 4000
_________________________________________________________________
dense_102 (Dense) (None, 17) 17017
_________________________________________________________________
activation_221 (Activation) (None, 17) 0
=================================================================
Total params: 28,096,769
Trainable params: 28,075,633
Non-trainable params: 21,136
_________________________________________________________________
Train on 1088 samples, validate on 272 samples
Epoch 1/1
1088/1088 [==============================] - 22s 20ms/step - loss: 3.1251 - acc: 0.2178 - val_loss: 13.0005 - val_acc: 0.1140
Train on 1088 samples, validate on 272 samples
Epoch 2/2
128/1088 [==>...........................] - ETA: 1s - loss: 2.3913 - acc: 0.2656/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py:111: UserWarning: `Sequential.model` is deprecated. `Sequential` is a subclass of `Model`, you can just use your `Sequential` instance directly.
warnings.warn('`Sequential.model` is deprecated. '
1088/1088 [==============================] - 2s 2ms/step - loss: 2.2318 - acc: 0.3465 - val_loss: 9.6171 - val_acc: 0.1912
Train on 1088 samples, validate on 272 samples
Epoch 3/3
64/1088 [>.............................] - ETA: 1s - loss: 1.5143 - acc: 0.5000/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py:111: UserWarning: `Sequential.model` is deprecated. `Sequential` is a subclass of `Model`, you can just use your `Sequential` instance directly.
warnings.warn('`Sequential.model` is deprecated. '
1088/1088 [==============================] - 2s 2ms/step - loss: 1.8109 - acc: 0.4320 - val_loss: 4.3375 - val_acc: 0.3162
Train on 1088 samples, validate on 272 samples
Epoch 4/4
64/1088 [>.............................] - ETA: 1s - loss: 1.7827 - acc: 0.4688/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py:111: UserWarning: `Sequential.model` is deprecated. `Sequential` is a subclass of `Model`, you can just use your `Sequential` instance directly.
warnings.warn('`Sequential.model` is deprecated. '
1088/1088 [==============================] - 2s 2ms/step - loss: 1.5861 - acc: 0.4871 - val_loss: 3.4091 - val_acc: 0.3787
Total number of epochs run: 4
Gradient Array has the shape: (4, 34)
/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py:111: UserWarning: `Sequential.model` is deprecated. `Sequential` is a subclass of `Model`, you can just use your `Sequential` instance directly.
warnings.warn('`Sequential.model` is deprecated. '

Keras with data augmentation does (almost) not converge

I'm currently playing around with data augmentation in Keras. My model looks as follows:
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 1024, 1280, 3)] 0
_________________________________________________________________
lambda_1 (Lambda) (None, 128, 128, 3) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 126, 126, 32) 896
_________________________________________________________________
conv2d_3 (Conv2D) (None, 42, 42, 32) 9248
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 21, 21, 32) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 21, 21, 32) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 14112) 0
_________________________________________________________________
dense_2 (Dense) (None, 128) 1806464
_________________________________________________________________
dropout_3 (Dropout) (None, 128) 0
_________________________________________________________________
dense_3 (Dense) (None, 6) 774
=================================================================
Total params: 1,817,382
Trainable params: 1,817,382
Non-trainable params: 0
_________________________________________________________________
The lambda layer basically scales the image.
Training worked ok-ish, however, I do not really have enough data, hence, generalisation was lousy.
Hence, I tried data augmentation.
image_gen = ImageDataGenerator(
rotation_range=20,
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest")
image_gen.fit(x_train, augment=True)
summary = model.fit_generator(image_gen.flow(x_train, y_train, batch_size=64), epochs=30, validation_data=(x_valid, y_valid) )
But now the fitting is not converging anymore
Epoch 1/30
3/3 [==============================] - 37s 12s/step - loss: 6.5018 - categorical_accuracy: 0.6000 - val_loss: 10.0990 - val_categorical_accuracy: 0.5217
Epoch 2/30
3/3 [==============================] - 34s 11s/step - loss: 6.5018 - categorical_accuracy: 0.6000 - val_loss: 10.0990 - val_categorical_accuracy: 0.5217
Epoch 3/30
3/3 [==============================] - 32s 11s/step - loss: 6.2290 - categorical_accuracy: 0.6000 - val_loss: 7.7606 - val_categorical_accuracy: 0.5217
Epoch 4/30
3/3 [==============================] - 36s 12s/step - loss: 6.7746 - categorical_accuracy: 0.6000 - val_loss: 10.0990 - val_categorical_accuracy: 0.5217
Epoch 5/30
3/3 [==============================] - 35s 12s/step - loss: 6.7746 - categorical_accuracy: 0.6000 - val_loss: 7.7606 - val_categorical_accuracy: 0.5217
Epoch 6/30
3/3 [==============================] - 35s 12s/step - loss: 7.3203 - categorical_accuracy: 0.6000 - val_loss: 10.0990 - val_categorical_accuracy: 0.5217
Epoch 7/30
1/3 [=========>....................] - ETA: 4s - loss: 2.6863 - categorical_accuracy: 0.8333
I inspected the generated images manually and they look good

Multiclass classification using sequence data with LSTM Keras not working

I'm trying to do a multi-class classification on sequential data to learn what is the source of certain events based on the cumulative reading of the sources.
I'm using a simple LSTM layer with 64 units and a Dense layer with the same number of units as targets. The model does not seems to be learning anything as the accuracy remains about 1% all thought.
def create_model():
model = Sequential()
model.add(LSTM(64, return_sequences=False))
model.add(Dense(8))
model.add(Activation("softmax"))
model.compile(
loss="categorical_crossentropy",
optimizer=Adam(lr=0.00001),
metrics=["accuracy"],
)
return model
I have tried changing learning rate to very small values (0.001, 0.0001, 1e-5) and training for larger epochs but no change in accuracy observed. Am I missing something here? Is my data preprocessing not correct or the model creation is faulty?
Thanks in advance for your help.
Dataset
Accumulated- Source-1 Source-2 Source-3
Reading
217 0 0 0
205 0 0 0
206 0 0 0
231 0 0 0
308 0 0 1
1548 0 0 1
1547 0 0 1
1530 0 0 1
1545 0 0 1
1544 0 0 1
1527 0 0 1
1533 0 0 1
1527 0 0 1
1527 0 0 1
1534 0 0 1
1520 0 0 1
1524 0 0 1
1523 0 0 1
205 0 0 0
209 0 0 0
.
.
.
I created a rolling window dataset having SEQ_LEN=5 to be fed to an LSTM network:
rolling_window labels
[205, 206, 217, 205, 206] [0, 0, 0]
[206, 217, 205, 206, 231] [0, 0, 0]
[217, 205, 206, 231, 308] [0, 0, 1]
[205, 206, 231, 308, 1548] [0, 0, 1]
[206, 231, 308, 1548, 1547] [0, 0, 1]
[231, 308, 1548, 1547, 1530] [0, 0, 1]
[308, 1548, 1547, 1530, 1545] [0, 0, 1]
[1548, 1547, 1530, 1545, 1544] [0, 0, 1]
[1547, 1530, 1545, 1544, 1527] [0, 0, 1]
[1530, 1545, 1544, 1527, 1533] [0, 0, 1]
[1545, 1544, 1527, 1533, 1527] [0, 0, 1]
[1544, 1527, 1533, 1527, 1527] [0, 0, 1]
[1527, 1533, 1527, 1527, 1534] [0, 0, 1]
[1533, 1527, 1527, 1534, 1520] [0, 0, 1]
[1527, 1527, 1534, 1520, 1524] [0, 0, 1]
[1527, 1534, 1520, 1524, 1523] [0, 0, 1]
[1534, 1520, 1524, 1523, 1520] [0, 0, 1]
[1520, 1524, 1523, 1520, 205] [0, 0, 0]
.
.
.
Reshaped dataset
X_train = train_df.rolling_window.values
X_train = X_train.reshape(X_train.shape[0], 1, SEQ_LEN)
Y_train = train_df.labels.values
Y_train = Y_train.reshape(Y_train.shape[0], 3)
Model
def create_model():
model = Sequential()
model.add(LSTM(64, input_shape=(1, SEQ_LEN), return_sequences=True))
model.add(Activation("relu"))
model.add(Flatten())
model.add(Dense(3))
model.add(Activation("softmax"))
model.compile(
loss="categorical_crossentropy", optimizer=Adam(lr=0.01), metrics=["accuracy"]
)
return model
Training
model = create_model()
model.fit(X_train, Y_train, batch_size=512, epochs=5)
Training Output
Epoch 1/5
878396/878396 [==============================] - 37s 42us/step - loss: 0.2586 - accuracy: 0.0173
Epoch 2/5
878396/878396 [==============================] - 36s 41us/step - loss: 0.2538 - accuracy: 0.0175
Epoch 3/5
878396/878396 [==============================] - 36s 41us/step - loss: 0.2538 - accuracy: 0.0176
Epoch 4/5
878396/878396 [==============================] - 37s 42us/step - loss: 0.2537 - accuracy: 0.0177
Epoch 5/5
878396/878396 [==============================] - 38s 43us/step - loss: 0.2995 - accuracy: 0.0174
[EDIT-1]
After trying Max's suggestions, here are the results (loss and accuracy are still not changing)
Suggested model
def create_model():
model = Sequential()
model.add(LSTM(64, return_sequences=False))
model.add(Dense(8))
model.add(Activation("softmax"))
model.compile(
loss="categorical_crossentropy",
optimizer=Adam(lr=0.001),
metrics=["accuracy"],
)
return model
X_train
array([[[205],
[217],
[209],
[215],
[206]],
[[217],
[209],
[215],
[206],
[206]],
[[209],
[215],
[206],
[206],
[211]],
...,
[[175],
[175],
[173],
[176],
[174]],
[[175],
[173],
[176],
[174],
[176]],
[[173],
[176],
[174],
[176],
[173]]])
Y_train (P.S: There are 8 target classes actually. The above example was a simplification of the real problem)
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]])
Training-output
Epoch 1/5
878396/878396 [==============================] - 15s 17us/step - loss: 0.1329 - accuracy: 0.0190
Epoch 2/5
878396/878396 [==============================] - 15s 17us/step - loss: 0.1313 - accuracy: 0.0190
Epoch 3/5
878396/878396 [==============================] - 16s 18us/step - loss: 0.1293 - accuracy: 0.0190
Epoch 4/5
878396/878396 [==============================] - 16s 18us/step - loss: 0.1355 - accuracy: 0.0195
Epoch 5/5
878396/878396 [==============================] - 15s 18us/step - loss: 0.1315 - accuracy: 0.0236
[EDIT-2]
Based on Max and Marcin's suggestions below the accuracy is mostly remaining below 3%. Although 1 out of 10 times it hits 95% accuracy. It all depends on what the accuracy is at the beginning of the first epoch. If it doesn't start the gradient descent in the right place, it doesn't reach good accuracy. Do I need to use a different initializer? Changing the learning rate doesn't bring repeatable results.
Suggestions:
1. Scale/Normalize the X_train (done)
2. Not reshaping Y_train (done)
3. Having lesser units in LSTM layer (reduced from 64 to 16)
4. Have smaller batch_size (reduced from 512 to 64)
Scaled X_train
array([[[ 0.01060734],
[ 0.03920736],
[ 0.02014085],
[ 0.03444091],
[ 0.01299107]],
[[ 0.03920728],
[ 0.02014073],
[ 0.03444082],
[ 0.01299095],
[ 0.01299107]],
[[ 0.02014065],
[ 0.0344407 ],
[ 0.01299086],
[ 0.01299095],
[ 0.02490771]],
...,
[[-0.06089251],
[-0.06089243],
[-0.06565897],
[-0.05850889],
[-0.06327543]],
[[-0.06089251],
[-0.06565908],
[-0.05850898],
[-0.06327555],
[-0.05850878]],
[[-0.06565916],
[-0.0585091 ],
[-0.06327564],
[-0.05850889],
[-0.06565876]]])
Non reshaped Y_train
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]])
Model with lesser LSTM units
def create_model():
model = Sequential()
model.add(LSTM(16, return_sequences=False))
model.add(Dense(8))
model.add(Activation("softmax"))
model.compile(
loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=["accuracy"]
)
return model
Training output
Epoch 1/5
878396/878396 [==============================] - 26s 30us/step - loss: 0.1325 - accuracy: 0.0190
Epoch 2/5
878396/878396 [==============================] - 26s 29us/step - loss: 0.1352 - accuracy: 0.0189
Epoch 3/5
878396/878396 [==============================] - 26s 30us/step - loss: 0.1353 - accuracy: 0.0192
Epoch 4/5
878396/878396 [==============================] - 26s 29us/step - loss: 0.1365 - accuracy: 0.0197
Epoch 5/5
878396/878396 [==============================] - 27s 31us/step - loss: 0.1378 - accuracy: 0.0201

The sequence should be the first dimension of the LSTM (2nd of the input array), i.e.:
Reshaped dataset
X_train = train_df.rolling_window.values
X_train = X_train.reshape(X_train.shape[0], SEQ_LEN, 1)
Y_train = train_df.labels.values
Y_train = Y_train.reshape(Y_train.shape[0], 3)
The input shape is not required for LSTM.
LSTM has 'tanh' activation by default, which is usually a good option.
Model
def create_model():
model = Sequential()
model.add(LSTM(64, return_sequences=True))
model.add(Flatten())
model.add(Dense(3))
model.add(Activation("softmax"))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.01), metrics=["accuracy"])
return model
Maybe it would be a better choice not to use a Flatten() layer but to use return_sequences=False for the LSTM. Just try.
Edit
Also try pre-processing in terms of feature scaling of the data. The data values seem to be quite large.

How to apply classification on series data in Keras?

The structure of my input data is:
print(df.col)
0 [262, 330, 392, 522, 784, 0, 0]
1 [262, 290, 330, 392, 522, 784, 0]
2 [262, 330, 392, 522, 784, 0, 0]
3 [250, 262, 330, 392, 522, 784, 0]
4 [262, 290, 306, 330, 392, 784, 0]
.
.
.
I had variable sized data so I've added a padding of 0's in the end to fix the input data shape.
The output column is:
print(df.predict)
array([[0., 0., 0., 1.],
[1., 0., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 1., 0.],
[0., 0., 1., 0.],
[0., 1., 0., 0.],...])
Output is one hot encoded.
Following is my model:
model = Sequential()
model.add(Dense(7, activation='relu', input_dim = 7))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(4))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Fit the model
model.fit(X_train, y_train, epochs=500, batch_size=10, verbose=2)
The accuracy and loss become constant after 2-3 epochs.
Epoch 1/500
0s - loss: 5.8413 - acc: 0.1754
Epoch 2/500
0s - loss: 5.7398 - acc: 0.1754
Epoch 3/500
0s - loss: 5.7190 - acc: 0.1754
Epoch 4/500
0s - loss: 5.6885 - acc: 0.1754
Epoch 5/500
0s - loss: 5.6650 - acc: 0.1754
Epoch 6/500
0s - loss: 5.6403 - acc: 0.1754
Epoch 7/500
0s - loss: 5.6164 - acc: 0.2456
Epoch 8/500
0s - loss: 5.5900 - acc: 0.2456
Epoch 9/500
0s - loss: 5.5730 - acc: 0.2456
...
0s - loss: 5.3727 - acc: 0.1754
Epoch 499/500
0s - loss: 5.3727 - acc: 0.1754
Epoch 500/500
0s - loss: 5.3727 - acc: 0.1754
I have 72 data points and 4 classes (about 18 samples for each class)
The data is fairly simple. Why is the accuracy so low?
Is the model designed right?
I'm new to ML and Keras. Any help is appreciated.

Try this model.add(layers.Dense(4, activation = 'softmax')) as you last layer.
If you have more than 2 classes for classification you will need a softmax layer in the end. This is a function, that output the probabilities for the 4 different classes (all add to 1) and the one with the highest probability will be your class. This way your network will be able to learn all the 4 different classes instead of only two.

Keras accuracy never exceeds 19%

I am taking the images from the SVHN (street view house number dataset, stanford) and I could really use some help in figuring out why my accuracy does not increase past 19%... This is essentially an MNIST tutorial with more difficult images (other numbers could be off center, blurs, shadows etc..)
I essentially take each image and subtract that image's mean then I normalize to 0-1 (divide by 255.)
The pipeline is simple enough:
2 Convolution 2d Layers (32 filters, 3x3)
MaxPool (2x2)
Dropout (.25)
2 Convolution 2d layers (64 filters, 3x3)
Max Pool (2x2)
Dropout(.25)
Flatten
Dense Relu
Dropout(.5)
Dense Softmax (10)
1792/73257 [..............................] - ETA: 3:17 - loss: 2.3241 - acc: 0.1602
1920/73257 [..............................] - ETA: 3:16 - loss: 2.3203 - acc: 0.1625
2048/73257 [..............................] - ETA: 3:14 - loss: 2.3177 - acc: 0.1621
2176/73257 [..............................] - ETA: 3:13 - loss: 2.3104 - acc: 0.1682
...
...
...
53376/73257 [====================>.........] - ETA: 51s - loss: 2.2439 - acc: 0.1879
53504/73257 [====================>.........] - ETA: 51s - loss: 2.2439 - acc: 0.1879
53632/73257 [====================>.........] - ETA: 50s - loss: 2.2439 - acc: 0.1878
53760/73257 [=====================>........] - ETA: 50s - loss: 2.2439 - acc: 0.1879
Can anyone help me figure out what I'm doing wrong? Are there any tips to figuring out why it would increase in the beginning as normal then taper off so quickly?
I am using categorical cross entropy with an rmsprop optimizer
epochs: 20
batch_size: 128
image_size: 32x32
model = Sequential()
model.add(Convolution2D(32, (3, 3),
strides=1,
activation='relu',
padding='same',
input_shape=input_shape,
data_format='channels_last'))
model.add(Convolution2D(32, (3, 3), padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format='channels_last'))
model.add(Dropout(0.25))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(model.output_shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
#METHOD1
# print('compiling model...')
# model.compile(loss='mean_squared_error',
# optimizer='sgd',
# metrics=['accuracy'])
# print('fitting model...')
#
# model.fit(X_train, y_train, batch_size=64, epochs=1, verbose=1)
# METHOD2
sgd = SGD(lr=0.05)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
model.fit(X_train, y_train,
epochs=20,
batch_size=128)
score = model.evaluate(X_test, y_test, batch_size=128)

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

CNN learning stagnation - machine-learning

Your layers are set to trainable=False(apart from the last dense layer). Therefore your CNN cannot learn. In addition you won´t be able to train just on a single sample. If you run into performance issues on your GPU switch to CPU or AWS or reduce your image size.

Related

How to store the gradients of Alexnet as numpy array (in each iteration) in Python?

Keras with data augmentation does (almost) not converge

Multiclass classification using sequence data with LSTM Keras not working

How to apply classification on series data in Keras?

Keras accuracy never exceeds 19%

Categories

Resources