NLP model's accuracy stuck on 0.5098 while training - machine-learning

I built a two layered LSTM model(keras model) for a movie review dataset from kaggle : Dataset
While training the model, every epoch was giving the same accuracy of 0.5098.
Then I thought it might not be learning the long distance dependencies.Then instead of LSTM I used bidirectional LSTM. But, still model's accuracy while training was 0.5098 for every epoch. I trained the model for 8 hours/35 epochs on CPU. Then I stopped training.
Code:
import pandas as pd
from sentiment_utils import *
import keras
import keras.backend as k
import numpy as np
train_data = pd.read_table('train.tsv')
X_train = train_data.iloc[:,2]
Y_train = train_data.iloc[:,3]
from sklearn.preprocessing import OneHotEncoder
Y_train = Y_train.reshape(Y_train.shape[0],1)
ohe = OneHotEncoder(categorical_features=[0])
Y_train = ohe.fit_transform(Y_train).toarray()
maxLen = len(max(X_train, key=len).split())
words_to_index, index_to_words, word_to_vec_map = read_glove_vectors("glove/glove.6B.50d.txt")
m = X_train.shape[0]
def read_glove_vectors(path):
with open(path, encoding='utf8') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
cur_word = line[0]
words.add(cur_word)
word_to_vec_map[cur_word] = np.array(line[1:], dtype=np.float64)
i = 1
words_to_index = {}
index_to_words = {}
for w in sorted(words):
words_to_index[w] = i
index_to_words[i] = w
i = i + 1
return words_to_index, index_to_words, word_to_vec_map
def sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys):
m = X_train.shape[0]
X_indices = np.zeros((m, maxLen))
for i in range(m):
if i in dash_index_list:
continue
sentance_words = X_train[i].lower().strip().split()
j = 0
for word in sentance_words:
if word in keys:
X_indices[i, j] = words_to_index[word]
j += 1
return X_indices
def pretrained_embedding_layer(word_to_vec_map, words_to_index):
emb_dim = word_to_vec_map['pen'].shape[0]
vocab_size = len(words_to_index) + 1
emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
emb_matrix[index, :] = word_to_vec_map[word]
emb_layer= keras.layers.embeddings.Embedding(vocab_size, emb_dim, trainable= False)
emb_layer.build((None,))
emb_layer.set_weights([emb_matrix])
return emb_layer
def get_model(input_shape, word_to_vec_map, words_to_index):
sentance_indices = keras.layers.Input(shape = input_shape, dtype='int32')
embedding_layer = pretrained_embedding_layer(word_to_vec_map, words_to_index)
embeddings = embedding_layer(sentance_indices)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(embeddings)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Dense(5)(X)
X = keras.layers.Activation('softmax')(X)
model = keras.models.Model(sentance_indices, X)
return model
model = get_model((maxLen,), word_to_vec_map,words_to_index)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
dash_index_list = []
for i in range(m):
if '-' in X_train[i]:
dash_index_list.append(i)
keys = []
for key in word_to_vec_map.keys():
keys.append(key)
X_train_indices = sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys)
model.fit(X_train_indices, Y_train, epochs = 50, batch_size = 32, shuffle=True)

I think the way you defined the model architecture doesn't make sense! Try looking at this example on IMDB movie reviews with LSTM on Keras github repo: Trains an LSTM model on the IMDB sentiment classification task.

Related

How to deal with big dataset when using pyG?

I am a beginer learning to using torch_geometric to build my GNN models. I refered the sample of the pyG example of node classification and build my own dataset, however, I tried to use my GPU to run the code and it tells me that it run out of memory, maybe my dataset is too large to allocate the GPU memory? I don't know. I shared an machine of 8 A100 with my classmates. Could you please give me some suggestions, thank you!
from torch_geometric.nn import GATConv,GCNConv
from torch_geometric.data import Dataset,DataLoader,HeteroData,Data
import torch.nn as nn
from torch_geometric.nn import DataParallel
from torch_geometric.loader import DataListLoader
import torch.nn.functional as F
import torch
import pandas as pd
from transformers import BertTokenizer,BertModel
import pickle
import time
from tqdm import tqdm
from numba import jit
import json
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
plt.grid(True)
plt.grid(color='gray',
linestyle='--',
linewidth=1,
alpha=0.3)
begin = time.time()
punctuation = "!#$%&'\(\)-*+,-./:;<=>?#\\\[\]^_`{|}~():;,。【】·、“”‘’《》\"%……——·"
def dataCleanifier(s):
for i in punctuation:
s.replace(i," ")
s = s.replace(" "," ")
s = s.replace("\n","")
return s
class BertClassifier(nn.Module):
def __init__(self,bertType:str,max_length,tag_size):
super(BertClassifier,self).__init__()
self.bertType = bertType
self.tokenizer = BertTokenizer.from_pretrained(self.bertType)
self.encoder = BertModel.from_pretrained(self.bertType)
self.outputDim = self.encoder.pooler.dense.out_features
self.max_length = max_length
self.tag_size = tag_size
self.dropout = nn.Dropout(0.1)
self.activation = nn.LeakyReLU(0.1)
self.convs = nn.ModuleList(
[nn.Conv2d(1, 512, (k, self.outputDim)) for k in (2,3,4)])
self.fc_cnn = nn.Linear(512 * len((2,3,4)), self.tag_size)
def conv_and_pool(self, x, conv):
x = F.relu(conv(x)).squeeze(3)
x = F.max_pool1d(x, x.size(2)).squeeze(2)
return x
def forward(self,x):
x = self.tokenizer.batch_encode_plus(x,return_tensors="pt",max_length=self.max_length,truncation=True,padding="max_length")
attention = x["attention_mask"]
x = x["input_ids"]
x = x.cuda(2)
x = self.encoder(x,attention_mask=attention.cuda(2))['last_hidden_state'][:]
x = x.unsqueeze(1)
encoded = torch.cat([self.conv_and_pool(x,conv) for conv in self.convs],1)
x = self.fc_cnn(encoded)
x = self.activation(x)
# x = F.softmax(x,dim=1)
return x,encoded
class ContrastiveLoss(nn.Module):
def __init__(self):
super(ContrastiveLoss, self).__init__()
def forward(self,representations,label,y_hat):
n = label.shape[0]
T = 0.5
similarity_matrix = F.cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(0), dim=2)
mask = torch.ones_like(similarity_matrix) * (label.expand(n, n).eq(label.expand(n, n).t()))
mask_no_sim = torch.ones_like(mask) - mask
mask_dui_jiao_0 = torch.ones(n ,n) - torch.eye(n, n )
similarity_matrix = torch.exp(similarity_matrix/T)
similarity_matrix = similarity_matrix*mask_dui_jiao_0
sim = mask*similarity_matrix
no_sim = similarity_matrix - sim
no_sim_sum = torch.sum(no_sim , dim=1)
no_sim_sum_expend = no_sim_sum.repeat(n, 1).T
sim_sum = sim + no_sim_sum_expend
loss = torch.div(sim , sim_sum)
loss = mask_no_sim + loss + torch.eye(n, n )
#接下来就是算一个批次中的loss了
loss = -torch.log(loss) #求-log
loss = torch.sum(torch.sum(loss, dim=1) )/(2*n)+nn.CrossEntropyLoss()(y_hat,label)
return loss
class GAT(nn.Module):
def __init__(self, hidden_channels) -> None:
super().__init__()
self.conv1 = GATConv(data.num_features,hidden_channels)
self.conv2 = GATConv(hidden_channels,9)
self.activation = nn.ReLU()
def forward(self,x,edge_index):
x = self.conv1(x,edge_index)
x = self.activation(x)
# print(x)
# x = F.dropout(x,p=0.2)
x = self.conv2(x,edge_index)
return x
x=None
y=None
edge_index = None
train_mask = None
with open("X.pkl","rb") as f1:
x = pickle.load(f1)
with open("Y.pkl","rb") as f2:
y = pickle.load(f2)
y = y.long()
with open("edge_index.pkl","rb") as f3:
edge_index = pickle.load(f3)
# print(edge_index.shape)
with open("train_mask.pkl","rb") as f4:
train_mask = pickle.load(f4)
data = Data(x=x,y=y,edge_index=edge_index)
data.train_mask = train_mask
model = GAT(hidden_channels=32)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, 100, 0.8)
criterion = ContrastiveLoss()
def train():
model.train()
optimizer.zero_grad() # Clear gradients.
out = model(data.x,data.edge_index) # Perform a single forward pass.
loss = criterion(data.x[data.train_mask], data.y[data.train_mask],out[data.train_mask]) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
return loss
def test():
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1) # Use the class with highest probability.
test_correct = pred[data.train_mask] == data.y[data.train_mask] # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.train_mask.sum()) # Derive ratio of correct predictions.
return test_acc
accs = []
for epoch in range(1, 1025):
loss = train()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}',end=" ")
acc = test()
print("acc:",acc)
accs.append(acc)
scheduler.step()
plt.plot(range(len(accs)),accs)
print(time.time()-begin)
with open("./accs_gat_GCL.pkl","wb") as f1:
pickle.dump(accs,f1)
plt.savefig("./res_GAT_GCL.png",dpi=600)
I have tried to use DataPararel to use multiple GPU to load my model and dataset but failed.

How to use variable learning rate that decreases with loss using pytorch-geometric?

I have the following code snippet from PyTorch geometric example. I want to use a learning rate that decreases as the loss value during training decreases. I tried using scheduler but that didn't work for me.
A clean code-snippet is below. Can anyone provide valuable suggestions or help on this matter?
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import AGNNConv
dataset = 'Cora'
path = 'Cora'
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.lin1 = torch.nn.Linear(dataset.num_features, 16)
self.prop1 = AGNNConv(requires_grad=False)
self.prop2 = AGNNConv(requires_grad=True)
self.lin2 = torch.nn.Linear(16, dataset.num_classes)
def forward(self):
x = F.dropout(data.x, training=self.training)
x = F.relu(self.lin1(x))
x = self.prop1(x, data.edge_index)
x = self.prop2(x, data.edge_index)
x = F.dropout(x, training=self.training)
x = self.lin2(x)
return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
def train():
model.train()
optimizer.zero_grad()
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
optimizer.step()
def test():
model.eval()
logits, accs = model(), []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
pred = logits[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
best_val_acc = test_acc = 0
for epoch in range(1, 201):
train()
train_acc, val_acc, tmp_test_acc = test()
if val_acc > best_val_acc:
best_val_acc = val_acc
test_acc = tmp_test_acc
log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
print(log.format(epoch, train_acc, best_val_acc, test_acc))

Accuracy per epoch in PyTorch

I have made a chatbot using pytorch and would like to display accuracy on every epoch. I am not quite understanding how to do that. I can display loss but cant figure out how to display my accuracy
Here is my code :-
from nltk_utils import tokenize, stem, bag_of_words
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
from torch.autograd import Variable
all_words=[]
tags=[]
xy=[]
questionsP1=[]
questionsP2=[]
questionsP3=[]
questionsP4=[]
questionTag={}
with open('new.json', encoding="utf8") as file:
data = json.load(file)
for intent in data["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
for responses in questions["responses"]:
wrds = tokenize(responses)
all_words.extend(wrds)
xy.append((wrds, questions["tag"]))
if questions["tag"] in tags:
print(questions["tag"])
if questions["tag"] not in tags:
tags.append(questions["tag"])
if proficiency["level"] == "P1":
questionsP1.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P2":
questionsP2.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P3":
questionsP3.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P4":
questionsP4.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
ignore_words = ['?', '!', '.', ',']
all_words = [stem(x) for x in all_words if x not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for tokenized_response, tag in xy:
bag = bag_of_words(tokenized_response, all_words)
print(bag)
X_train.append( bag )
label = tags.index( tag )
y_train.append( label )
print(y_train)
X_train = np.array( X_train )
y_train = np.array( y_train )
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
#HyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 994
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size=batch_size, shuffle = True, num_workers = 2)
device = 'cpu'
model = NeuralNet(input_size, hidden_size, output_size).to(device)
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range( num_epochs ):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(device)
#Forward
outputs = model(words)
loss = criterion(outputs, labels)
#backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch {epoch + 1}/ {num_epochs}, loss={loss.item(): .4f}')
print(f'final loss, loss={loss.item(): .4f}')
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"output_size": output_size,
"hidden_size": hidden_size,
"all_words": all_words,
"tags": tags,
}
FILE = "data.pth"
torch.save(data, FILE)
with open('new.json', 'r') as f:
intents = json.load(f)
bot_name = "Sam"
while True:
sentence = input("You: ")
if sentence == 'quit':
break
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape( 1, X.shape[0])
X = torch.from_numpy( X )
output = model( X )
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
print(tag)
probs = torch.softmax(output, dim=1)
probs = probs[0][predicted.item()]
print( probs.item() )
if probs.item() > 0.75:
for intent in intents["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
if questions["tag"] == tag:
print(f'{bot_name}: {questions["question"]}')
else:
print(f'{bot_name}: Probability Too Low')
print(f'Training Complete. File saved to {FILE}')
My chatbot is working inverselt... i am trying to map the answer to the right question.
Any help would be appreciated.
According to your code labels contains the indices that should have the highest values in outputs in order for the samples to be counted as correct predictions.
So to calculate the validation accuracy:
correct = 0
total = 0
model.eval()
with torch.no_grad():
for (words, labels) in validation_loader:
words = words.to(device)
labels = labels.to(device)
total += labels.shape[0]
outputs = model(words)
correct += torch.sum(labels == outputs.argmax(dim=-1))
accuracy = correct / total

Classifying sequences with different lengths with error batching

I'm using Keras with the TensorFlow backend. I've just figured out how to train and classify sequences of different lengths without masking, because I can't get masking to work. In the toy example I'm working with, I'm trying to train an LSTM to detect whether a sequence of arbitrary length starts with a 1 or not.
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
As seen in the code, my error is being batched over each time-step. This is bad for multiple reasons. How do I train the network in two steps? For example:
Run a bunch of values through the network to accumulate the error
Adjust the weights of the network given this accumulated error
To do what is described in the original question, the easiest way is to train the original network with masking, but then test with a stateful network so any length input can be classified:
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()

value prediction with tensorflow and python

I have a data set which contains a list of stock prices. I need to use the tensorflow and python to predict the close price.
Q1: I have the following code which takes the first 2000 records as training and 2001 to 20000 records as test but I don't know how to change the code to do the prediction of the close price of today and 1 day later??? Please advise!
#!/usr/bin/env python2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
def feature_scaling(input_pd, scaling_meathod):
if scaling_meathod == 'z-score':
scaled_pd = (input_pd - input_pd.mean()) / input_pd.std()
elif scaling_meathod == 'min-max':
scaled_pd = (input_pd - input_pd.min()) / (input_pd.max() -
input_pd.min())
return scaled_pd
def input_reshape(input_pd, start, end, batch_size, batch_shift, n_features):
temp_pd = input_pd[start-1: end+batch_size-1]
output_pd = map(lambda y : temp_pd[y:y+batch_size], xrange(0, end-start+1, batch_shift))
output_temp = map(lambda x : np.array(output_pd[x]).reshape([-1]), xrange(len(output_pd)))
output = np.reshape(output_temp, [-1, batch_size, n_features])
return output
def target_reshape(input_pd, start, end, batch_size, batch_shift, n_step_ahead, m_steps_pred):
temp_pd = input_pd[start+batch_size+n_step_ahead-2: end+batch_size+n_step_ahead+m_steps_pred-2]
print temp_pd
output_pd = map(lambda y : temp_pd[y:y+m_steps_pred], xrange(0, end-start+1, batch_shift))
output_temp = map(lambda x : np.array(output_pd[x]).reshape([-1]), xrange(len(output_pd)))
output = np.reshape(output_temp, [-1,1])
return output
def lstm(input, n_inputs, n_steps, n_of_layers, scope_name):
num_layers = n_of_layers
input = tf.transpose(input,[1, 0, 2])
input = tf.reshape(input,[-1, n_inputs])
input = tf.split(0, n_steps, input)
with tf.variable_scope(scope_name):
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_inputs)
cell = tf.nn.rnn_cell.MultiRNNCell([cell]*num_layers)
output, state = tf.nn.rnn(cell, input, dtype=tf.float32) yi1
output = output[-1]
return output
feature_to_input = ['open price', 'highest price', 'lowest price', 'close price','turnover', 'volume','mean price']
feature_to_predict = ['close price']
feature_to_scale = ['volume']
sacling_meathod = 'min-max'
train_start = 1
train_end = 1000
test_start = 1001
test_end = 20000
batch_size = 100
batch_shift = 1
n_step_ahead = 1
m_steps_pred = 1
n_features = len(feature_to_input)
lstm_scope_name = 'lstm_prediction'
n_lstm_layers = 1
n_pred_class = 1
learning_rate = 0.1
EPOCHS = 1000
PRINT_STEP = 100
read_data_pd = pd.read_csv('./stock_price.csv')
temp_pd = feature_scaling(input_pd[feature_to_scale],sacling_meathod)
input_pd[feature_to_scale] = temp_pd
train_input_temp_pd = input_pd[feature_to_input]
train_input_nparr = input_reshape(train_input_temp_pd,
train_start, train_end, batch_size, batch_shift, n_features)
train_target_temp_pd = input_pd[feature_to_predict]
train_target_nparr = target_reshape(train_target_temp_pd, train_start, train_end, batch_size, batch_shift, n_step_ahead, m_steps_pred)
test_input_temp_pd = input_pd[feature_to_input]
test_input_nparr = input_reshape(test_input_temp_pd, test_start, test_end, batch_size, batch_shift, n_features)
test_target_temp_pd = input_pd[feature_to_predict]
test_target_nparr = target_reshape(test_target_temp_pd, test_start, test_end, batch_size, batch_shift, n_step_ahead, m_steps_pred)
tf.reset_default_graph()
x_ = tf.placeholder(tf.float32, [None, batch_size, n_features])
y_ = tf.placeholder(tf.float32, [None, 1])
lstm_output = lstm(x_, n_features, batch_size, n_lstm_layers, lstm_scope_name)
W = tf.Variable(tf.random_normal([n_features, n_pred_class]))
b = tf.Variable(tf.random_normal([n_pred_class]))
y = tf.matmul(lstm_output, W) + b
cost_func = tf.reduce_mean(tf.square(y - y_))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_func)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for ii in range(EPOCHS):
sess.run(train_op, feed_dict={x_:train_input_nparr, y_:train_target_nparr})
if ii % PRINT_STEP == 0:
cost = sess.run(cost_func, feed_dict={x_:train_input_nparr, y_:train_target_nparr})
print 'iteration =', ii, 'training cost:', cost
Very simply, prediction (a.k.a. scoring or inference) comes from running the input through only the forward pass, and collecting the score for each input vector. It's the same process flow as testing. The difference is the four stages of model use:
Train: learn from the training data set; adjust weights as needed.
Test: evaluate the model's performance; if accuracy has converged, stop training.
Validate: evaluate the accuracy of the trained model. If it doesn't meet acceptance criteria, change something and start over with the training.
Predict: you've passed validation -- release the model for use by the intended application.
All four steps follow the same forward logic flow; training includes back-propagation; the others do not. Simply follow the forward-only process, and you'll get the result form you need.
I worry about your data partition: only 10% for training, 90% for testing, and none for validation. A more typical split is 50-30-20, or something in that general area.
Q-1 : You should change your LSTM parameter to return a sequence of size two which will be prediction for that day and the day after.
Q-2 it's clearly that your model is underfitting the data, which is so obvious with your 10% train 90% test data ! You should more equilibrated ratio as suggested in the previous answer.

Resources