Classifying sequences with different lengths with error batching - machine-learning

I'm using Keras with the TensorFlow backend. I've just figured out how to train and classify sequences of different lengths without masking, because I can't get masking to work. In the toy example I'm working with, I'm trying to train an LSTM to detect whether a sequence of arbitrary length starts with a 1 or not.
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
As seen in the code, my error is being batched over each time-step. This is bad for multiple reasons. How do I train the network in two steps? For example:
Run a bunch of values through the network to accumulate the error
Adjust the weights of the network given this accumulated error

To do what is described in the original question, the easiest way is to train the original network with masking, but then test with a stateful network so any length input can be classified:
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()

Related

Multi GPU training in Pytorch causes my system to freeze

When I wrap my model in nn.DataParallel(model) and start training my screen freezes and I have to manually restart the computer every time.
I've tried a few variations, like not adding .to(device) to every x and y, but whenever nn.DataParallel is used I seem to cause the computer to freeze.
import random
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import models, datasets, transforms
import torch.utils.data
DataLoader = torch.utils.data.DataLoader
random_split = torch.utils.data.random_split
global_rank = 0
MNIST = datasets.MNIST
class MLPClassifier(nn.Module):
def __init__(self):
super(MLPClassifier, self).__init__()
self.layer_1 = torch.nn.Linear(28 * 28, 128)
self.layer_2 = torch.nn.Linear(128, 444)
self.layer_3 = torch.nn.Linear(444, 333)
self.layer_4 = torch.nn.Linear(333, 10)
def forward(self, x):
x = x.view(x.size(0), -1)
x = self.layer_1(x)
x = F.relu(x)
x = self.layer_2(x)
x = F.relu(x)
x = self.layer_3(x)
x = F.relu(x)
x = self.layer_4(x)
return x
# Download data
if global_rank == 0:
mnist_train = MNIST(os.getcwd(), train=True, download=True)
mnist_test = MNIST(os.getcwd(), train=False, download=True)
# dist.barrier()
#transforms
transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
mnist_train = MNIST(os.getcwd(), train=True, transform=transform)
# Split dataset
mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
mnist_test = MNIST(os.getcwd(), train=False, download=True)
# Build dataloaders
mnist_train = DataLoader(mnist_train, batch_size=256)
mnist_val = DataLoader(mnist_val, batch_size=256)
mnist_test = DataLoader(mnist_test, batch_size=256)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MLPClassifier()
model = nn.DataParallel(model)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# Train loop
model.train()
num_epochs = 1
for epoch in range(num_epochs):
for train_batch in mnist_train:
x, y = train_batch
logits = model(x.to(device))
loss = F.cross_entropy(logits, y.to(device))
print('rain loss: ', loss.item())
loss.backward()
optimizer.step()
optimizer.zero_grad()
# EVAL LOOP
model.eval()
with torch.no_grad():
val_loss_a = []
for val_batch in mnist_val:
x, y = val_batch
logits = model(x.to(device))
val_loss = F.cross_entropy(logits, y.to(device))
val_loss_a.append(val_loss)
avg_val_loss = torch.stack(val_loss_a).mean()
model.train()

How to fix this loss is NaN problem in PyTorch of this RNN with GRU?

I'm completely new to PyTorch and tried out some models. I wanted to make an easy prediction rnn of stock market prices and found the following code:
I load the data set with pandas then split it into training and test data and load it into a pytorch DataLoader for later usage in training process. The model is defined in the GRU class. But the actual problem seems to be the optimisation. I think the problem could be gradient explosion. I thought about adding gradient clipping but the GRU design should actually prevent gradient explosion or am I wrong? What could cause the loss to be instantly NaN (already in the first epoch)
from sklearn.preprocessing import MinMaxScaler
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
batch_size = 200
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 10
nvda = pd.read_csv('dataset/stocks/NVDA.csv')
price = nvda[['Close']]
scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1, 1))
def split_data(stock, lookback):
data_raw = stock.to_numpy() # convert to numpy array
data = []
# create all possible sequences of length seq_len
for index in range(len(data_raw) - lookback):
data.append(data_raw[index: index + lookback])
data = np.array(data)
test_set_size = int(np.round(0.2 * data.shape[0]))
train_set_size = data.shape[0] - (test_set_size)
x_train = data[:train_set_size, :-1, :]
y_train = data[:train_set_size, -1, :]
x_test = data[train_set_size:, :-1]
y_test = data[train_set_size:, -1, :]
return [x_train, y_train, x_test, y_test]
lookback = 20 # choose sequence length
x_train, y_train, x_test, y_test = split_data(price, lookback)
train_data = TensorDataset(torch.from_numpy(x_train).float(), torch.from_numpy(y_train).float())
train_data = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)
test_data = TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test).float())
test_data = DataLoader(test_data, shuffle=True, batch_size=batch_size, drop_last=True)
class GRU(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(GRU, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
out, h = self.gru(x, h)
out = self.fc(self.relu(out[:, -1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.num_layers, batch_size, self.hidden_dim).zero_()
return hidden
model = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0000000001)
model.train()
start_time = time.time()
h = model.init_hidden(batch_size)
for epoch in range(1, num_epochs+1):
for x, y in train_data:
h = h.data
model.zero_grad()
y_train_pred, h = model(x, h)
loss = criterion(y_train_pred, y)
print("Epoch ", epoch, "MSE: ", loss.item())
loss.backward()
optimizer.step()
training_time = time.time() - start_time
print("Training time: {}".format(training_time))
This is the dataset which I used.
Not sure if it is the case, but did you preprocess and cleaned the data? I do not know it but maybe there are some values missing or it's something strange about it. I checked it here
https://ca.finance.yahoo.com/quote/NVDA/history?p=NVDA and it seems that every couple of rows there is some inconsistency. Like I said, I do not know if it's the case but it may be.

Accuracy per epoch in PyTorch

I have made a chatbot using pytorch and would like to display accuracy on every epoch. I am not quite understanding how to do that. I can display loss but cant figure out how to display my accuracy
Here is my code :-
from nltk_utils import tokenize, stem, bag_of_words
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
from torch.autograd import Variable
all_words=[]
tags=[]
xy=[]
questionsP1=[]
questionsP2=[]
questionsP3=[]
questionsP4=[]
questionTag={}
with open('new.json', encoding="utf8") as file:
data = json.load(file)
for intent in data["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
for responses in questions["responses"]:
wrds = tokenize(responses)
all_words.extend(wrds)
xy.append((wrds, questions["tag"]))
if questions["tag"] in tags:
print(questions["tag"])
if questions["tag"] not in tags:
tags.append(questions["tag"])
if proficiency["level"] == "P1":
questionsP1.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P2":
questionsP2.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P3":
questionsP3.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P4":
questionsP4.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
ignore_words = ['?', '!', '.', ',']
all_words = [stem(x) for x in all_words if x not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for tokenized_response, tag in xy:
bag = bag_of_words(tokenized_response, all_words)
print(bag)
X_train.append( bag )
label = tags.index( tag )
y_train.append( label )
print(y_train)
X_train = np.array( X_train )
y_train = np.array( y_train )
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
#HyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 994
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size=batch_size, shuffle = True, num_workers = 2)
device = 'cpu'
model = NeuralNet(input_size, hidden_size, output_size).to(device)
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range( num_epochs ):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(device)
#Forward
outputs = model(words)
loss = criterion(outputs, labels)
#backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch {epoch + 1}/ {num_epochs}, loss={loss.item(): .4f}')
print(f'final loss, loss={loss.item(): .4f}')
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"output_size": output_size,
"hidden_size": hidden_size,
"all_words": all_words,
"tags": tags,
}
FILE = "data.pth"
torch.save(data, FILE)
with open('new.json', 'r') as f:
intents = json.load(f)
bot_name = "Sam"
while True:
sentence = input("You: ")
if sentence == 'quit':
break
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape( 1, X.shape[0])
X = torch.from_numpy( X )
output = model( X )
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
print(tag)
probs = torch.softmax(output, dim=1)
probs = probs[0][predicted.item()]
print( probs.item() )
if probs.item() > 0.75:
for intent in intents["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
if questions["tag"] == tag:
print(f'{bot_name}: {questions["question"]}')
else:
print(f'{bot_name}: Probability Too Low')
print(f'Training Complete. File saved to {FILE}')
My chatbot is working inverselt... i am trying to map the answer to the right question.
Any help would be appreciated.
According to your code labels contains the indices that should have the highest values in outputs in order for the samples to be counted as correct predictions.
So to calculate the validation accuracy:
correct = 0
total = 0
model.eval()
with torch.no_grad():
for (words, labels) in validation_loader:
words = words.to(device)
labels = labels.to(device)
total += labels.shape[0]
outputs = model(words)
correct += torch.sum(labels == outputs.argmax(dim=-1))
accuracy = correct / total

Error when checking input: expected embedding_1_input to have shape (4,) but got array with shape (1,)

I use pretrained embedding vectors for my keras model. Before I did it everything worked and now I get this error:
ValueError: Error when checking input: expected embedding_1_input to
have shape (4,) but got array with shape (1,)
Maybe somebody can help me, what I do wrong here. I am not sure if I did correct model.fit and model.evaluate. Maybe there is a problem?
import csv
import numpy as np
np.random.seed(42)
from keras.models import Sequential, Model
from keras.layers import *
from random import shuffle
from sklearn.model_selection import train_test_split
from keras import optimizers
from keras.callbacks import EarlyStopping
from itertools import groupby
from numpy import asarray
from numpy import zeros
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
#function makes a list of antonyms and synonyms from the files
def preprocessing(filename):
list_words = []
with open(filename) as tsv:
for line in csv.reader(tsv, dialect="excel-tab"):
list_words.append([line[0], line[1]])
return list_words
#function make a list of not relevant pairs by mixing synonyms and
antonyms
def notrelevant(filename, filename2):
list_words = []
with open(filename) as tsv:
with open(filename2) as tsv2:
for lines in zip(csv.reader(tsv, dialect="excel-tab"),csv.reader(tsv2, dialect="excel-tab")):
list_words.append([lines[0][0], lines[1][1]])
return list_words
antonyms_list = preprocessing("antonyms.tsv")
synonyms_list = preprocessing("synonyms.tsv")
notrelevant_list = notrelevant("antonyms.tsv", "synonyms.tsv")
# function combines all antonyms, synonyms in one list with labels,
shuffle them
def data_prepare(ant,syn,nrel):
data = []
for elem1,elem2 in ant:
data.append([[elem1,elem2], "Antonyms"])
for elem1, elem2 in syn:
data.append([[elem1, elem2], "Synonyms"])
for elem1, elem2 in nrel:
data.append([[elem1, elem2], "Not relevant"])
shuffle(data)
return data
data_with_labels_shuffled =
data_prepare(antonyms_list,synonyms_list,notrelevant_list)
def label_to_onehot(labels):
mapping = {label: i for i, label in enumerate(set(labels))}
one_hot = np.empty((len(labels), 3))
for i, label in enumerate(labels):
entry = [0] * len(mapping)
entry[mapping[label]] = 1
one_hot[i] = entry
return (one_hot)
def words_to_ids(labels):
vocabulary = []
word_to_id = {}
ids = []
for word1,word2 in labels:
vocabulary.append(word1)
vocabulary.append(word2)
counter = 0
for word in vocabulary:
if word not in word_to_id:
word_to_id[word] = counter
counter += 1
for word1,word2 in labels:
ids.append([word_to_id [word1], word_to_id [word2]])
return (ids)
def split_data(datas):
data = np.array(datas)
X, y = data[:, 0], data[:, 1]
# split the data to get 60% train and 40% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
y_train = label_to_onehot(y_train)
X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
y_dev = label_to_onehot(y_dev)
y_test = label_to_onehot(y_test)
return X_train, y_train, X_dev, y_dev, X_test, y_test
X_train, y_train, X_dev, y_dev, X_test, y_test = split_data(data_with_labels_shuffled)
# prepare tokenizer
t = Tokenizer()
t.fit_on_texts(X_train)
vocab_size = len(t.word_index) + 1
# integer encode the documents
encoded_docs = t.texts_to_sequences(X_train)
# load the whole embedding into memory
embeddings_index = dict()
f = open('glove.6B.50d.txt')
for line in f:
values = line.split()
word = values[0]
coefs = asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
# create a weight matrix for words in training docs
embedding_matrix = zeros((vocab_size, 50))
for word, i in t.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
VOCABSIZE = len(data_with_labels_shuffled)
EMBSIZE = 50
HIDDENSIZE = 50
KERNELSIZE = 5
MAXEPOCHS = 5
model = Sequential()
model.add(Embedding(vocab_size, 50, weights=[embedding_matrix],
input_length=4, trainable=False))
model.add(Dropout(0.25))
model.add(Bidirectional(GRU(units = HIDDENSIZE // 2)))
#model.add(Flatten())
model.add(Dense(units = 3, activation = "softmax"))
model.compile(loss='categorical_crossentropy', optimizer="adam",
metrics=['accuracy'])
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='min')
model.fit (X_train, y_train,
batch_size=64,
callbacks = [earlystop],
epochs=100,
validation_data=(X_dev, y_dev),
verbose=1)
scores = model.evaluate(X_test, y_testbatch_size=64)
print("Accuracy is: %.2f%%" %(scores[1] * 100))
I think the problem is that you should pass encoded_docs to your model.fit() function instead of X_train since encoded_docs contains the tokenization of your training data and X_train still only contains a list of words. Moreover, you have to make sure that the input_length parameter of your Embedding layer matches the length of these tokenized training examples that you have created in encoded_docs.

NLP model's accuracy stuck on 0.5098 while training

I built a two layered LSTM model(keras model) for a movie review dataset from kaggle : Dataset
While training the model, every epoch was giving the same accuracy of 0.5098.
Then I thought it might not be learning the long distance dependencies.Then instead of LSTM I used bidirectional LSTM. But, still model's accuracy while training was 0.5098 for every epoch. I trained the model for 8 hours/35 epochs on CPU. Then I stopped training.
Code:
import pandas as pd
from sentiment_utils import *
import keras
import keras.backend as k
import numpy as np
train_data = pd.read_table('train.tsv')
X_train = train_data.iloc[:,2]
Y_train = train_data.iloc[:,3]
from sklearn.preprocessing import OneHotEncoder
Y_train = Y_train.reshape(Y_train.shape[0],1)
ohe = OneHotEncoder(categorical_features=[0])
Y_train = ohe.fit_transform(Y_train).toarray()
maxLen = len(max(X_train, key=len).split())
words_to_index, index_to_words, word_to_vec_map = read_glove_vectors("glove/glove.6B.50d.txt")
m = X_train.shape[0]
def read_glove_vectors(path):
with open(path, encoding='utf8') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
cur_word = line[0]
words.add(cur_word)
word_to_vec_map[cur_word] = np.array(line[1:], dtype=np.float64)
i = 1
words_to_index = {}
index_to_words = {}
for w in sorted(words):
words_to_index[w] = i
index_to_words[i] = w
i = i + 1
return words_to_index, index_to_words, word_to_vec_map
def sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys):
m = X_train.shape[0]
X_indices = np.zeros((m, maxLen))
for i in range(m):
if i in dash_index_list:
continue
sentance_words = X_train[i].lower().strip().split()
j = 0
for word in sentance_words:
if word in keys:
X_indices[i, j] = words_to_index[word]
j += 1
return X_indices
def pretrained_embedding_layer(word_to_vec_map, words_to_index):
emb_dim = word_to_vec_map['pen'].shape[0]
vocab_size = len(words_to_index) + 1
emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
emb_matrix[index, :] = word_to_vec_map[word]
emb_layer= keras.layers.embeddings.Embedding(vocab_size, emb_dim, trainable= False)
emb_layer.build((None,))
emb_layer.set_weights([emb_matrix])
return emb_layer
def get_model(input_shape, word_to_vec_map, words_to_index):
sentance_indices = keras.layers.Input(shape = input_shape, dtype='int32')
embedding_layer = pretrained_embedding_layer(word_to_vec_map, words_to_index)
embeddings = embedding_layer(sentance_indices)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(embeddings)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Dense(5)(X)
X = keras.layers.Activation('softmax')(X)
model = keras.models.Model(sentance_indices, X)
return model
model = get_model((maxLen,), word_to_vec_map,words_to_index)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
dash_index_list = []
for i in range(m):
if '-' in X_train[i]:
dash_index_list.append(i)
keys = []
for key in word_to_vec_map.keys():
keys.append(key)
X_train_indices = sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys)
model.fit(X_train_indices, Y_train, epochs = 50, batch_size = 32, shuffle=True)
I think the way you defined the model architecture doesn't make sense! Try looking at this example on IMDB movie reviews with LSTM on Keras github repo: Trains an LSTM model on the IMDB sentiment classification task.

Resources