I am training a deep learning model for meme image classification.
I am trying to cascade VGG model and LSTM text classification, model trains infinitely and never stops after mentioned epochs. I tried all possible ways to terminate using early stopping, still it runs infinitely.
My data folder has 2 folders one with meme image and another one with tag text files
Below is the code.
label_df_clean = t
num_of_samples = label_df_clean.shape[0]
# ## Glove
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
maxlen = 100
training_samples = num_of_samples
tag_vocabulary_size = 10000
max_words = tag_vocabulary_size
glove_dir = 'glove/glove.6B/'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'), encoding = "UTF-8")
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
print('Found %s word vectors.' % len(embeddings_index))
tokenizer = Tokenizer(num_words=max_words)
texts = []
for tag_list in label_df_clean['word_tags']:
texts.append(' '.join(tag_list))
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print('Found {} unique tokens'.format(len(word_index)))
tag_data = pad_sequences(sequences, maxlen=maxlen)
embedding_dim = 100
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if i < max_words:
if embedding_vector is not None:
# Words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
tag_input = Input(shape=(None,), dtype='int32', name='tag')
embedded_tag = layers.Embedding(max_words, embedding_dim)(tag_input)
encoded_tag = layers.LSTM(512)(embedded_tag)
# ## CONV2D
from keras.applications import VGG16
image_input = Input(shape=(150, 150, 3), name='image')
vgg16 = VGG16(weights='imagenet',
input_shape=(150, 150, 3))(image_input)
x = layers.Flatten()(vgg16)
x = layers.Dense(256, activation='relu')(x)
import tensorflow
concatenated = layers.concatenate([x, encoded_tag], axis=-1)
output = layers.Dense(1, activation='sigmoid')(concatenated)
model = Model([image_input, tag_input], output)
# model.layers[1].trainable = False # freeze VGG16 convolutional base
# model.layers[4].trainable = False # freeze GloVe word embedding
class new_callback(tensorflow.keras.callbacks.Callback):
def epoch_end(self, epoch, logs={}):
if(logs.get('accuracy')> 0.65): # select the accuracy
print("\n !!! 65% accuracy, no further training !!!")
self.model.stop_training = True
callbacks = new_callback()
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
# # model.layers[1].trainable = False # freeze VGG16 convolutional base
# model.layers[4].set_weights([embedding_matrix])
# model.layers[4].trainable = False # freeze GloVe word embedding
dim = (150, 150)
X_image_train = []
X_tag_train = tag_data
y_train = label_df_clean["label"]
for fname in listdir(small_image_path):
fpath = os.path.join(small_image_path, fname)
im = cv2.imread(fpath)
im_resized = cv2.resize(im, dim, interpolation = cv2.INTER_AREA)
# y_train.append(1)
# # add wrong tag samples
# num_negative_samples = len(y_train)
# for i in range(num_negative_samples):
# image = X_image_train[i]
# X_image_train.append(image)
# j = (i + 1) % num_negative_samples # get a different tag
# tag = X_tag_train[j]
# X_tag_train = np.append(X_tag_train, tag)
# y_train
# from sklearn import preprocessing
# le = preprocessing.LabelEncoder()
# le.fit(y_train)
# y_train = le.transform(y_train)
X_image_train = np.array(X_image_train)
X_tag_train = np.array(tag_data)
y_train = np.array(y_train)
perm = np.arange(y_train.shape[0])
X_image_train = X_image_train[perm]
X_tag_train = X_tag_train[perm]
y_train = y_train[perm]
X_image_train.shape, X_tag_train.shape, y_train.shape
model.fit([X_image_train, X_tag_train],y_train, batch_size = 64, epochs = 10, callbacks =[callbacks])')
Model continuously trains and never stops. Note code is from Jupyter notebook, please ignore indentation and print issues.
Please do let me know if u find logic issues in model train.
Note: I have not used label encoder since my train is binary data.
I currently use Keras 2.3.1 and TensorFlow 2.1.0.
I am currently trying train a CNN using PyTorch to predict a subject's age. The age group ranges from 0 to 116. I used the same model to train it on gender classification with two options: male or female.
I ported the same code for the age classification, I was getting errors. The error was due to our last fully connected layer not return a large enough output (in terms of matrix size, it was initially returning a 50 x 2 matrix due to our gender classifier but I switched it to 50 x 117 for the age classification based on the total age options.)
My issue now is that the training loop prints epochs with a huge loss (~3.5 while before, when training the gender classification, it was sub zero.)
Below is my code:
DataLoader class:
class MyDataset(Dataset):
def __init__(self, root_directory, csv_file, image_path, transform = None):
annotated_path = os.path.relpath(csv_file) # Path to UTKFace Dataset and Annotations
self.read_in_csv = pd.read_csv(annotated_path, index_col=False)
self.image_path = os.path.join(root_directory, image_path)
self.transform = transform
self.labels = np.asarray(self.read_in_csv.loc[:,'age'])
def __getitem__(self, index):
attr = self.labels[index]
image_name = str(self.read_in_csv.loc[index, 'file'])
image = Image.open(image_name)
if self.transform:
image = self.transform(image)
dict = {'image':image, 'label':attr}
return dict
def __len__(self):
return len(self.read_in_csv.index)
CNN Architecture:
class ConvolutionalNN(nn.Module):
def __init__(self):
self.layer1 = nn.Sequential(
nn.BatchNorm2d(96), # Number of Features
self.layer2 = nn.Sequential(
nn.ReLU(), # Default = False
self.layer3 = nn.Sequential(
self.fc1 = nn.Linear(384*6*6,512)
self.fc2 = nn.Linear(512,512)
self.fc3 = nn.Linear(512,117)
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0),-1)
#print out.size()
out = F.dropout(F.relu(self.fc1(out)))
out = F.dropout(F.relu(self.fc2(out)))
out = self.fc3(out)
return out
Training Loop:
def training_loop(checkpoint = None, best=False):
current_epoch = 1
num_epochs = 50
train_acc_history = []
val_acc_history = []
epoch_history = []
learning_rate = 0.001
best_val_acc = 0.0
is_best = False
criterion = nn.CrossEntropyLoss()
## Predict the Age and Gender of the Human in the Image
optimizer = torch.optim.SGD(cnn.parameters(),lr=0.001,momentum=0.9)
if checkpoint is not None:
is_best = best
current_epoch = checkpoint['epoch']
train_acc_history = checkpoint['train_acc_history']
val_acc_history = checkpoint['val_acc_history']
best_val_acc = checkpoint['best_val_acc']
epoch_history = checkpoint['epoch_history']
print('Uploading our images now...')
for epoch in range(current_epoch, num_epochs + current_epoch):
print('Starting epoch %d / %d' % (epoch + 1, num_epochs + current_epoch))
print('Learning Rate for this epoch: {}'.format(learning_rate))
for i, batch in enumerate(train_loader):
images, labels = batch['image'], batch['label']
images = images.clone().detach()
labels = labels.clone().detach()
if use_gpu:
images = images.cuda()
labels = labels.cuda()
pred_labels = cnn(images)
loss = criterion(pred_labels,labels)
So this is my code. It does not seem to be training well.
Please let me know on what could be done to fix this.
I built a two layered LSTM model(keras model) for a movie review dataset from kaggle : Dataset
While training the model, every epoch was giving the same accuracy of 0.5098.
Then I thought it might not be learning the long distance dependencies.Then instead of LSTM I used bidirectional LSTM. But, still model's accuracy while training was 0.5098 for every epoch. I trained the model for 8 hours/35 epochs on CPU. Then I stopped training.
import pandas as pd
from sentiment_utils import *
import keras
import keras.backend as k
import numpy as np
train_data = pd.read_table('train.tsv')
X_train = train_data.iloc[:,2]
Y_train = train_data.iloc[:,3]
from sklearn.preprocessing import OneHotEncoder
Y_train = Y_train.reshape(Y_train.shape[0],1)
ohe = OneHotEncoder(categorical_features=[0])
Y_train = ohe.fit_transform(Y_train).toarray()
maxLen = len(max(X_train, key=len).split())
words_to_index, index_to_words, word_to_vec_map = read_glove_vectors("glove/glove.6B.50d.txt")
m = X_train.shape[0]
def read_glove_vectors(path):
with open(path, encoding='utf8') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
cur_word = line[0]
word_to_vec_map[cur_word] = np.array(line[1:], dtype=np.float64)
i = 1
words_to_index = {}
index_to_words = {}
for w in sorted(words):
words_to_index[w] = i
index_to_words[i] = w
i = i + 1
return words_to_index, index_to_words, word_to_vec_map
def sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys):
m = X_train.shape[0]
X_indices = np.zeros((m, maxLen))
for i in range(m):
if i in dash_index_list:
sentance_words = X_train[i].lower().strip().split()
j = 0
for word in sentance_words:
if word in keys:
X_indices[i, j] = words_to_index[word]
j += 1
return X_indices
def pretrained_embedding_layer(word_to_vec_map, words_to_index):
emb_dim = word_to_vec_map['pen'].shape[0]
vocab_size = len(words_to_index) + 1
emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
emb_matrix[index, :] = word_to_vec_map[word]
emb_layer= keras.layers.embeddings.Embedding(vocab_size, emb_dim, trainable= False)
return emb_layer
def get_model(input_shape, word_to_vec_map, words_to_index):
sentance_indices = keras.layers.Input(shape = input_shape, dtype='int32')
embedding_layer = pretrained_embedding_layer(word_to_vec_map, words_to_index)
embeddings = embedding_layer(sentance_indices)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(embeddings)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Dense(5)(X)
X = keras.layers.Activation('softmax')(X)
model = keras.models.Model(sentance_indices, X)
return model
model = get_model((maxLen,), word_to_vec_map,words_to_index)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
dash_index_list = []
for i in range(m):
if '-' in X_train[i]:
keys = []
for key in word_to_vec_map.keys():
X_train_indices = sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys)
model.fit(X_train_indices, Y_train, epochs = 50, batch_size = 32, shuffle=True)
I think the way you defined the model architecture doesn't make sense! Try looking at this example on IMDB movie reviews with LSTM on Keras github repo: Trains an LSTM model on the IMDB sentiment classification task.
I am currently learning deep learning especially GAN.
I found a simple code of GAN from a web site below.
However, in the code, I don't understand why we always need to give true label to Generator as below.
for g_index in range(g_steps):
# 2. Train G on D's response (but DO NOT train D on these labels)
gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
g_fake_data = G(gen_input)
dg_fake_decision = D(preprocess(g_fake_data.t()))
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
g_optimizer.step() # Only optimizes G's parameters
Specifically, on this line.
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
Input data for Generator is fake data(includes noise), so if we assign True labels on those input data, I think Generator ends up creating data which is similar to fake data(doesn't look like genuine). Is my understanding wrong? Sorry for the silly question, but if you have knowledge, plz help me out.
I'll put a whole code below.
#!/usr/bin/env python
# Generative Adversarial Networks (GAN) example in PyTorch.
# See related blog post at https://medium.com/#devnag/generative-adversarial-networks-gans-in-50-lines-of-code-pytorch-e81b79659e3f#.sch4xgsa9
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
# Data params
data_mean = 4
data_stddev = 1.25
# Model params
g_input_size = 1 # Random noise dimension coming into generator, per output vector
g_hidden_size = 50 # Generator complexity
g_output_size = 1 # size of generated output vector
d_input_size = 100 # Minibatch size - cardinality of distributions
d_hidden_size = 50 # Discriminator complexity
d_output_size = 1 # Single dimension for 'real' vs. 'fake'
minibatch_size = d_input_size
d_learning_rate = 2e-4 # 2e-4
g_learning_rate = 2e-4
optim_betas = (0.9, 0.999)
num_epochs = 30000
print_interval = 200
d_steps = 1 # 'k' steps in the original GAN paper. Can put the discriminator on higher training freq than generator
g_steps = 1
# ### Uncomment only one of these
#(name, preprocess, d_input_func) = ("Raw data", lambda data: data, lambda x: x)
(name, preprocess, d_input_func) = ("Data and variances", lambda data: decorate_with_diffs(data, 2.0), lambda x: x * 2)
print("Using data [%s]" % (name))
# ##### DATA: Target data and generator input data
def get_distribution_sampler(mu, sigma):
return lambda n: torch.Tensor(np.random.normal(mu, sigma, (1, n))) # Gaussian
def get_generator_input_sampler():
return lambda m, n: torch.rand(m, n) # Uniform-dist data into generator, _NOT_ Gaussian
# ##### MODELS: Generator model and discriminator model
class Generator(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Generator, self).__init__()
self.map1 = nn.Linear(input_size, hidden_size)
self.map2 = nn.Linear(hidden_size, hidden_size)
self.map3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.elu(self.map1(x))
x = F.sigmoid(self.map2(x))
return self.map3(x)
class Discriminator(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Discriminator, self).__init__()
self.map1 = nn.Linear(input_size, hidden_size)
self.map2 = nn.Linear(hidden_size, hidden_size)
self.map3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.elu(self.map1(x))
x = F.elu(self.map2(x))
return F.sigmoid(self.map3(x))
def extract(v):
return v.data.storage().tolist()
def stats(d):
return [np.mean(d), np.std(d)]
def decorate_with_diffs(data, exponent):
mean = torch.mean(data.data, 1, keepdim=True)
mean_broadcast = torch.mul(torch.ones(data.size()), mean.tolist()[0][0])
diffs = torch.pow(data - Variable(mean_broadcast), exponent)
return torch.cat([data, diffs], 1)
d_sampler = get_distribution_sampler(data_mean, data_stddev)
gi_sampler = get_generator_input_sampler()
G = Generator(input_size=g_input_size, hidden_size=g_hidden_size, output_size=g_output_size)
D = Discriminator(input_size=d_input_func(d_input_size), hidden_size=d_hidden_size, output_size=d_output_size)
criterion = nn.BCELoss() # Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=optim_betas)
g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=optim_betas)
for epoch in range(num_epochs):
for d_index in range(d_steps):
# 1. Train D on real+fake
# 1A: Train D on real
d_real_data = Variable(d_sampler(d_input_size))
d_real_decision = D(preprocess(d_real_data))
d_real_error = criterion(d_real_decision, Variable(torch.ones(1))) # ones = true
d_real_error.backward() # compute/store gradients, but don't change params
# 1B: Train D on fake
d_gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
d_fake_data = G(d_gen_input).detach() # detach to avoid training G on these labels
d_fake_decision = D(preprocess(d_fake_data.t()))
d_fake_error = criterion(d_fake_decision, Variable(torch.zeros(1))) # zeros = fake
d_optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward()
for g_index in range(g_steps):
# 2. Train G on D's response (but DO NOT train D on these labels)
gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
g_fake_data = G(gen_input)
dg_fake_decision = D(preprocess(g_fake_data.t()))
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
g_optimizer.step() # Only optimizes G's parameters
if epoch % print_interval == 0:
print("%s: D: %s/%s G: %s (Real: %s, Fake: %s) " % (epoch,
In this part of the code you are training G to fool D, so G generates fake data and asks D whether it thinks it's real (true labels), D's gradients are then propogated all the way to G (this is possible as D's input was G's output) so that it will learn to better fool D in the next iteration.
The inputs of G are not trainable and G only tries to transform them into real data (data similar to what d_sampler generates)
I'm new to the PyTorch framework (coming from Theano and Tensorflow mainly):
I've followed the introduction tutorial and read the Classifying Names with a Character-Level RNN one.
I now try to adapt it to a char level LSTM model in order to gain some practical experience with the framework.
Basically I feed in the model sequences of char indices and give as target to the model the same sequence but shifted by one in the future.
However I can't overfit a simple training example and I don't see what I did wrong.
If someone can spot my mistake it would be very helpful.
Here is my code:
class LSTMTxtGen(nn.Module):
def __init__(self, hidden_dim, n_layer, vocab_size):
super(LSTMTxtGen, self).__init__()
self.n_layer = n_layer
self.hidden_dim = hidden_dim
self.vocab_size = vocab_size
self.lstm = nn.LSTM(vocab_size, hidden_dim, n_layer, batch_first=True)
# The linear layer that maps from hidden state space to tag space
#self.hidden = self.init_hidden()
def init_hidden(self, batch_size):
# Before we've done anything, we dont have any hidden state.
# Refer to the Pytorch documentation to see exactly
# why they have this dimensionality.
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return (autograd.Variable(torch.zeros(self.n_layer, batch_size,
autograd.Variable(torch.zeros(self.n_layer, batch_size,
def forward(self, seqs):
self.hidden = self.init_hidden(seqs.size()[0])
lstm_out, self.hidden = self.lstm(seqs, self.hidden)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
lstm_out = nn.Linear(lstm_out.size(1), self.vocab_size)(lstm_out)
return lstm_out
model = LSTMTxtGen (
hidden_dim = 50,
n_layer = 3,
vocab_size = 44,
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters())
G = Data.batch_generator(5,100)
batch_per_epoch, to_idx, to_char = next(G)
X, Y = next(G)
for epoch in range(10):
losses = []
for batch_count in range(batch_per_epoch):
#mode.hidden = model.init_hidden()
#X, Y = next(G)
X = autograd.Variable(torch.from_numpy(X))
Y = autograd.Variable(torch.from_numpy(Y))
preds = model(X)
loss = criterion(preds.view(-1, model.vocab_size), Y.view(-1))
if (batch_count % 20 == 0):
print('Loss: ', losses[-1])
I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})