LSTM autoencoder - batch size larger than 1 causes ValueError - machine-learning

I am attempting to train an LSTM autoencoder on some data with shape (21785, 160, 8). If I run the following code
import numpy as np
from keras.layers import Input, Dense, LSTM, TimeDistributed
from keras import regularizers, models, optimizers
from keras.optimizers import Adam
data = np.random.random(size=(21785,160,8))
timesteps = 160
input_dim = 8
latent_dim = 1
learning_rate = 1e-4
regularization = 5e-4
epochs=3
inputs = keras.Input(shape=(timesteps, input_dim))
encoded = layers.LSTM(latent_dim, activation='relu',
kernel_regularizer=regularizers.l2(regularization))(inputs)
decoded = layers.RepeatVector(timesteps)(encoded)
decoded = layers.LSTM(input_dim, return_sequences=True, activation='relu',
kernel_regularizer=regularizers.l2(regularization))(decoded)
sequence_autoencoder = keras.Model(inputs, decoded)
encoder = keras.Model(inputs, encoded)
encoder.compile(optimizer=Adam(lr=learning_rate), loss='mean_squared_error')
encoder.fit(data, data, epochs=epochs, batch_size=5, shuffle=False, verbose=True)
I get an error
ValueError: Dimensions must be equal, but are 5 and 160 for '{{node
mean_squared_error/SquaredDifference}} =
SquaredDifference[T=DT_FLOAT](model_3/lstm_2/strided_slice_3,
IteratorGetNext:1)' with input shapes: [5,1], [5,160,8].
However, if I reduce the batch_size = 1 then the code runs. What am I doing wrong?

Related

Unable to save model using call backs in keras

I was working on an image recognition problem. I am training my model for 200 epochs. And I want to save the model after every epoch if it has the best validation accuracy so far. This is my code,
from keras.models import Sequential
from keras.models import Model
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras import optimizers, losses, activations, models
from keras.layers import Convolution2D, Dense, Input, Flatten, Dropout, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Concatenate
from keras import applications
from keras import backend as K
from keras import callbacks
from keras.preprocessing.image import ImageDataGenerator
ROWS,COLS = 669,1026
input_shape = (ROWS, COLS, 3)
base_model = applications.VGG19(weights='imagenet',
include_top=False,
input_shape=(ROWS, COLS,3))
l = 0
for layer in base_model.layers:
layer.trainable = False
l += 1
c = 0
for layer in base_model.layers:
c += 1
if c > l-5:
layer.trainable = True
for layer in base_model.layers:
print(layer,layer.trainable)
base_model.summary()
add_model = Sequential()
add_model.add(base_model)
add_model.add(GlobalAveragePooling2D())
add_model.add(Dense(514, activation='relu'))
add_model.add(Dense(128, activation='relu'))
add_model.add(Dense(64, activation='relu'))
add_model.add(Dropout(0.5))
add_model.add(Dense(8, activation='relu'))
add_model.add(Dropout(0.5))
add_model.add(Dense(1, activation='sigmoid'))
model = add_model
# model.compile(loss='binary_crossentropy',
# optimizer=optimizers.SGD(lr=1e-,
# momentum=0.9),
# metrics=['accuracy'])
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
train_data_dir = '/home/spectrograms/train'
validation_data_dir = '/home/spectrograms/test'
nb_train_samples = 791
nb_validation_samples = 198
epochs = 200
batch_size = 3
if K.image_data_format() == 'channels_first':
input_shape = (3, ROWS, COLS)
else:
input_shape = (ROWS, COLS,3)
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0,
zoom_range=0,
horizontal_flip=False)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(ROWS, COLS),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(ROWS, COLS),
batch_size=batch_size,
class_mode='binary')
checkpoint_filepath = '/home/CNN/saved_model/checkpoints/checkpoint-{epoch:02d}-{val_loss:.2f}.h5'
model_checkpoint_callback = callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=False,
monitor='val_accuracy',
mode='max',
save_best_only=True)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
callbacks = [model_checkpoint_callback],
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
But I am getting the error OSError: Unable to create file (unable to open file: name = '/home/CNN/saved_model/checkpoints/checkpoint-01-0.69.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)
But the file directory actually exit. I don't understand what the issue is here.
Add the following in your code
save_dir = '/home/CNN/saved_model/checkpoints/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
checkpoint_filepath = os.path.join(save_dir, "checkpoint-{epoch:02d}-{val_loss:.2f}.h5")
model_checkpoint_callback = callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=False,
monitor='val_accuracy',
mode='max',
save_best_only=True)

Plot Confusion Matrix from CNN Model

This original work is presented here
How to go about plotting the confusion matrix based of a CNN model?
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics
##Need to put this block of code in for cuDNN to initialize properly
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)
#------------------------------------------------------------------------------------------------------------------
num_rows = 40
num_columns = 174
num_channels = 1
x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)
num_labels = yy.shape[1]
filter_size = 2
# Construct model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())
model.add(Dense(num_labels, activation='softmax'))
then trained as:
from keras.callbacks import ModelCheckpoint
from datetime import datetime
#num_epochs = 12
#num_batch_size = 128
num_epochs = 72
num_batch_size = 256
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5',
verbose=1, save_best_only=True)
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)
I have been trying a few things, one of which is:
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
plot_confusion_matrix=(model(),x_test, y_test)
plt.plot(plot_confusion_matrix)
but I cannot get the confusion matrix to plot.
I also looked at tf.math.confusion_matrix(), but what is the labels and predictions as defined from the CNN model above??
The confusion matrix is a multi-classification.
Is
y_true = np.argmax(y_test, 1)??
and
y_pred = model.predict_classes(x_test)??
labels: 1-D Tensor of real labels for the classification task.
predictions: 1-D Tensor of predictions for a given classification.
As they say in official documentation , labels are the names of Output classes and predictions, However as they say everything has to be 1D tensor it means labels will be Ground truth for one instance and the corresponding indexed value in the Predictions will hold its predicted value.
So what you can do is, get the predictions and labels for each instances,in your code,you have passed the x_test and y_test which arent the supposed to be passed elements.
instead use model.predict to get the output labels.
y_predict=model.predict(x_test)
y_true=y_test
res = tf.math.confusion_matrix(y_true,y_predict)
This res is a 2D matrix now to print it you need to
plot_confusion_matrix(classifier, X_test, y_test,
display_labels=class_names,
cmap=plt.cm.Blues,
normalize=normalize)
Here put classifer = "model",not functional model().
Hope this helps,here are some more resources.
Here You can see the multiclass classification Confusion matrix technique.
Multiclass plot github function
Another custom plot function

Problems with PyTorch MLP when training the MNIST dataset retrieved from Keras

I have finished a PyTorch MLP model for the MNIST dataset, but got two different results: 0.90+ accuracy when using MNIST dataset from PyTorch, but ~0.10 accuracy when using MNIST dataset from Keras.
Below is my code with dependency: PyTorch 0.3.0.post4, keras 2.1.3, tensorflow backend 1.4.1 gpu version.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch as pt
import torchvision as ptv
from keras.datasets import mnist
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
# training data from PyTorch
train_set = ptv.datasets.MNIST("./data/mnist/train", train=True, transform=ptv.transforms.ToTensor(), download=True)
test_set = ptv.datasets.MNIST("./data/mnist/test", train=False, transform=ptv.transforms.ToTensor(), download=True)
train_dataset = DataLoader(train_set, batch_size=100, shuffle=True)
test_dataset = DataLoader(test_set, batch_size=10000, shuffle=True)
class MLP(pt.nn.Module):
"""The Multi-layer perceptron"""
def __init__(self):
super(MLP, self).__init__()
self.fc1 = pt.nn.Linear(784, 512)
self.fc2 = pt.nn.Linear(512, 128)
self.fc3 = pt.nn.Linear(128, 10)
self.use_gpu = True
def forward(self, din):
din = din.view(-1, 28 * 28)
dout = F.relu(self.fc1(din))
dout = F.relu(self.fc2(dout))
# return F.softmax(self.fc3(dout))
return self.fc3(dout)
model = MLP().cuda()
print(model)
# loss func and optim
optimizer = pt.optim.SGD(model.parameters(), lr=1)
criterion = pt.nn.CrossEntropyLoss().cuda()
def evaluate_acc(pred, label):
pred = pred.cpu().data.numpy()
label = label.cpu().data.numpy()
test_np = (np.argmax(pred, 1) == label)
test_np = np.float32(test_np)
return np.mean(test_np)
def evaluate_loader(loader):
print("evaluating ...")
accurarcy_list = []
for i, (inputs, labels) in enumerate(loader):
inputs = pt.autograd.Variable(inputs).cuda()
labels = pt.autograd.Variable(labels).cuda()
outputs = model(inputs)
accurarcy_list.append(evaluate_acc(outputs, labels))
print(sum(accurarcy_list) / len(accurarcy_list))
def training(d, epochs):
for x in range(epochs):
for i, data in enumerate(d):
optimizer.zero_grad()
(inputs, labels) = data
inputs = pt.autograd.Variable(inputs).cuda()
labels = pt.autograd.Variable(labels).cuda()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if i % 200 == 0:
print(i, ":", evaluate_acc(outputs, labels))
# Training MLP for 4 epochs with MNIST dataset from PyTorch
training(train_dataset, 4)
# The accuracy is ~0.96.
evaluate_loader(test_dataset)
print("###########################################################")
def load_mnist():
(x, y), (x_test, y_test) = mnist.load_data()
x = x.reshape((-1, 1, 28, 28)).astype(np.float32)
x_test = x_test.reshape((-1, 1, 28, 28)).astype(np.float32)
y = y.astype(np.int64)
y_test = y_test.astype(np.int64)
print("x.shape", x.shape, "y.shape", y.shape,
"\nx_test.shape", x_test.shape, "y_test.shape", y_test.shape,
)
return x, y, x_test, y_test
class TMPDataset(Dataset):
"""Dateset for loading Keras MNIST dataset."""
def __init__(self, a, b):
self.x = a
self.y = b
def __getitem__(self, item):
return self.x[item], self.y[item]
def __len__(self):
return len(self.y)
x_train, y_train, x_test, y_test = load_mnist()
# Create dataloader for MNIST dataset from Keras.
test_loader = DataLoader(TMPDataset(x_test, y_test), num_workers=1, batch_size=10000)
train_loader = DataLoader(TMPDataset(x_train, y_train), shuffle=True, batch_size=100)
# Evaluate the performance of MLP trained on PyTorch dataset and the accurach is ~0.96.
evaluate_loader(test_loader)
evaluate_loader(train_loader)
model = MLP().cuda()
print(model)
optimizer = pt.optim.SGD(model.parameters(), lr=1)
criterion = pt.nn.CrossEntropyLoss().cuda()
# Train now on MNIST dataset from Keras.
training(train_loader, 4)
# Evaluate the trianed model on MNIST dataset from Keras and result in performance ~0.10...
evaluate_loader(test_loader)
evaluate_loader(train_loader)
I had checked some samples from Keras MNIST dataset and found no error.
I am wondering what is wrong with the datasets?
The code can run without error, run it to see the results.
The MNIST data coming from Keras are not normalized; following the Keras MNIST MLP example, you should do it manually, i.e. you should include the following in your load_data() function:
x /= 255
x_test /= 255
Not sure about PyTorch, but it would seem that the MNIST data from their own utility functions come already normalized (as is the case with Tensorflow - see the third point in my answer here).
A 10% accuracy (i.e. equivalent to random guessing) in case of not-normalized input data is perfectly consistent.

Why do I get a small values with InceptionV3 ? how to set a threshold to detect the present classes in the image?

I am using InceptionV3 after fine tuning it to my own data set for a multi class multi label classification problem I made the most important changes like softmax to sigmoid and I am using this loss function
model.compile(loss='binary_crossentropy',optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
but when I predict using the generated model I get a small values like this [ 2.74303748e-04 7.97736086e-03 2.44359515e-04 7.09630767e-05
5.43296163e-04 4.08404367e-03 3.28547925e-01 1.05091414e-04
1.80469989e-03 2.85170972e-03 1.44978316e-04 7.78235449e-03
1.72435939e-02 1.55413849e-02 3.82270187e-01 1.06311939e-03
2.70067930e-01 6.08937175e-04 7.47230020e-04 1.07850268e-04]
the source code is this :(can it be my validation set ?)
import keras
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
from keras import __version__
from keras.applications.inception_v3 import InceptionV3,
preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 3
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
#model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
#metrics=['accuracy'])
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
predictions = Dense(nb_classes, activation='sigmoid')(x)
model = Model(input=base_model.input, output=predictions)
return model
def setup_to_finetune(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top
layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in the
inceptionv3 arch
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
def train(args):
"""Use transfer learning and fine-tuning to train a network on a new
dataset"""
nb_train_samples = get_nb_files(args.train_dir)
nb_classes = len(glob.glob(args.train_dir + "/*"))
nb_val_samples = get_nb_files(args.val_dir)
nb_epoch = int(args.nb_epoch)
batch_size = int(args.batch_size)
# data prep
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
args.train_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
validation_generator = test_datagen.flow_from_directory(
args.val_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
# setup model
base_model = InceptionV3(weights='imagenet', include_top=False)
#include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
# transfer learning
setup_to_transfer_learn(model, base_model)
history_tl = model.fit_generator(
train_generator,
nb_epoch=nb_epoch,
samples_per_epoch=nb_train_samples,
validation_data=validation_generator,
nb_val_samples=nb_val_samples,
class_weight='auto')
# fine-tuning
setup_to_finetune(model)
history_ft = model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
nb_epoch=nb_epoch,
validation_data=validation_generator,
nb_val_samples=nb_val_samples,
class_weight='auto')
model.save(args.output_model_file)
if args.plot:
plot_training(history_ft)
def plot_training(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r.')
plt.plot(epochs, val_acc, 'r')
plt.title('Training and validation accuracy')
plt.figure()
plt.plot(epochs, loss, 'r.')
plt.plot(epochs, val_loss, 'r-')
plt.title('Training and validation loss')
plt.show()
if __name__=="__main__":
a = argparse.ArgumentParser()
a.add_argument("--train_dir")
a.add_argument("--val_dir")
a.add_argument("--nb_epoch", default=NB_EPOCHS)
a.add_argument("--batch_size", default=BAT_SIZE)
a.add_argument("--output_model_file", default="inceptionv3-ft.model")
a.add_argument("--plot", action="store_true")
args = a.parse_args()
if args.train_dir is None or args.val_dir is None:
a.print_help()
sys.exit(1)
if (not os.path.exists(args.train_dir)) or (not
os.path.exists(args.val_dir)):
print("directories do not exist")
sys.exit(1)
train(args)
You answer your own question. This is the reason.
I made the most important changes like softmax to sigmoid
Sigmoid function maps the input to the range [0, 1]. Hence the output values of the network are just the outputs of this function and are not class probabilities. Maximum value from the nodes will give you the neuron that has the maximum output and hence the present class.
When you say
but some values should be >0.5(the present classes ) and some others should be <0.5( the non present classes )
in the comments, you are referring to the probability of >0.5 of the present class. What you require for class probabilities is the softmax function.
Therefore replacing your last sigmoid layer with a softmax layer will get you what you think you should be observing.

Train Keras Stateful LSTM return_seq=true not learning

Consider this minimal runnable example:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy as np
import matplotlib.pyplot as plt
max = 30
step = 0.5
n_steps = int(30/0.5)
x = np.arange(0,max,step)
x = np.cos(x)*(max-x)/max
y = np.roll(x,-1)
y[-1] = x[-1]
shape = (n_steps,1,1)
batch_shape = (1,1,1)
x = x.reshape(shape)
y = y.reshape(shape)
model = Sequential()
model.add(LSTM(50, return_sequences=True, stateful=True, batch_input_shape=batch_shape))
model.add(LSTM(50, return_sequences=True, stateful=True))
model.add(Dense(1))
model.compile(loss='mse', optimizer='rmsprop')
for i in range(1000):
model.reset_states()
model.fit(x,y,nb_epoch=1, batch_size=1)
p = model.predict(x, batch_size=1)
plt.clf()
plt.axis([-1,31, -1.1, 1.1])
plt.plot(x[:, 0, 0], '*')
plt.plot(y[:,0,0],'o')
plt.plot(p[:,0,0],'.')
plt.draw()
plt.pause(0.001)
As stated in the keras API https://keras.io/layers/recurrent/
the last state for each sample at index i in a batch will be used as
initial state for the sample of index i in the following batch
So I'm using batch_size = 1 and I'm trying to predict the next value in the decaying cos-function for each timestep. The prediction, or the red dots in the picture below should go into the green circles for the script to predict it correctly, however it doesn't converge... Have any idea to make it learn?
The problem lied in a calling model.fit for each epoch separately. In this case optimizer parameters are reset what was harmful for a training process. Other thing is calling reset_states also before prediction - as if it wasn't called - the states from fit are starting states for prediction what also might be harmful. The final code is following:
for epoch in range(1000):
model.reset_states()
tot_loss = 0
for batch in range(n_steps):
batch_loss = model.train_on_batch(x[batch:batch+1], y[batch:batch+1])
tot_loss+=batch_loss
print "Loss: " + str(tot_loss/float(n_steps))
model.reset_states()
p = model.predict(x, batch_size=1)

Resources