Is there any difference between the two codes? - machine-learning

I am currently still relatively new to Tensorflow. I am having some trouble with these two pieces of code.
Code A:
self.h1_layer = tf.layers.dense(self.x, self.n_nodes_hl1, activation=tf.nn.relu, name="h1")
self.h2_layer = tf.layers.dense(self.h1_layer, self.n_nodes_hl2, activation=tf.nn.relu, name="h2")
self.h3_layer = tf.layers.dense(self.h2_layer, self.n_nodes_hl3, activation=tf.nn.relu, name="h3")
self.logits = tf.layers.dense(self.h3_layer, self.num_of_classes, name="output")
Code B:
self.hidden_1_layer = {
'weights': tf.Variable(tf.random_normal([self.num_of_words, self.h1])),
'biases' : tf.Variable(tf.random_normal([self.h1]))
}
self.hidden_2_layer = {
'weights': tf.Variable(tf.random_normal([self.h1, self.h2])),
'biases' : tf.Variable(tf.random_normal([self.h2]))
}
self.hidden_3_layer = {
'weights': tf.Variable(tf.random_normal([self.h2, self.h3])),
'biases' : tf.Variable(tf.random_normal([self.h3]))
}
self.final_output_layer = {
'weights': tf.Variable(tf.random_normal([self.h3, self.num_of_classes])),
'biases' : tf.Variable(tf.random_normal([self.num_of_classes]))
}
layer1 = tf.add(tf.matmul(data, self.hidden_1_layer['weights']), self.hidden_1_layer['biases'])
layer1 = tf.nn.relu(layer1)
layer2 = tf.add(tf.matmul(layer1, self.hidden_2_layer['weights']), self.hidden_2_layer['biases'])
layer2 = tf.nn.relu(layer2)
layer3 = tf.add(tf.matmul(layer2, self.hidden_3_layer['weights']), self.hidden_3_layer['biases'])
layer3 = tf.nn.relu(layer3)
output = tf.matmul(layer3, self.final_output_layer['weights']) + self.final_output_layer['biases']
Are they the same thing? Can both Codes A & B weights and biases be saved with tf.train.Saver() ?
Thanks
Edit:
I am facing issues using Code A to generate prediction. It seems that logits of Code A is always changing.
The full code:
import tensorflow as tf
import os
from utils import Utils as utils
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
class Neural_Network:
# Neural Network Setup
num_of_epoch = 50
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
def __init__(self):
self.num_of_classes = utils.get_num_of_classes()
self.num_of_words = utils.get_num_of_words()
# placeholders
self.x = tf.placeholder(tf.float32, [None, self.num_of_words])
self.y = tf.placeholder(tf.int32, [None, self.num_of_classes])
with tf.name_scope("model"):
self.h1_layer = tf.layers.dense(self.x, self.n_nodes_hl1, activation=tf.nn.relu, name="h1")
self.h2_layer = tf.layers.dense(self.h1_layer, self.n_nodes_hl2, activation=tf.nn.relu, name="h2")
self.h3_layer = tf.layers.dense(self.h2_layer, self.n_nodes_hl3, activation=tf.nn.relu, name="h3")
self.logits = tf.layers.dense(self.h3_layer, self.num_of_classes, name="output")
def predict(self):
return self.logits
def make_prediction(self, query):
result = None
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.import_meta_graph('saved_models/testing.meta')
saver.restore(sess, 'saved_models/testing')
# for variable in tf.trainable_variables():
# print sess.run(variable)
prediction = self.predict()
pre, prediction = sess.run([self.logits, prediction], feed_dict={self.x : query})
print pre
prediction = prediction.tolist()
prediction = tf.nn.softmax(prediction)
prediction = sess.run(prediction)
print prediction
return utils.get_label_from_encoding(prediction[0])
def train(self, data):
prediction = self.predict()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=self.y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter("mygraph/logs", tf.get_default_graph())
for epoch in range(self.num_of_epoch):
optimised, loss = sess.run([optimizer, cost],
feed_dict={self.x: data['values'], self.y: data['labels']})
if epoch % 1 == 0:
print("Completed Training Cycle: " + str(epoch) + " out of " + str(self.num_of_epoch))
print("Current Loss: " + str(loss))
saver = tf.train.Saver()
saver.save(sess, 'saved_models/testing')
print("Model saved")

TLDR: The operations are essentially the same but the variables creation and initialization methods are different.
If you trace the code from here, you will eventually get to a stage where the code is calling tf.get_variable to initialize variables. In your example above, since kernel_initializer and bias_initializer is not set, they will default to None and tf.zeros_initializer() respectively (see Dense API). When None is passed to tf.get_variable as an initializer, a glorot_uniform_initializer will be used:
If initializer is None (the default), the default initializer passed
in the variable scope will be used. If that one is None too, a
glorot_uniform_initializer will be used. The initializer can also be a
Tensor, in which case the variable is initialized to this value and
shape.
More on tf.get_variable can be found here.
For one case, you used a tf.random_normal initializer for both kernel weights and bias weights, but for the other, you used tf.layers.dense and will result in a glorot_uniform_initializer for kernel weights and zeros_initializer for bias weights as no parameters were passed to tf.layers.dense.
To your second question on whether they can be saved, yes they can.
As a last note, you have to be careful when using tf.Variable as it might complicate things when the scopes are not properly set.

Related

I have a trained tensorflow model, how do i make predictions with the same?

I have trained my models by calling the 'train_neural_network' function which trains the model and I store the model, the accuracy comes to around 83%, the problem I'm facing is how do I make predictions using my saved model? Which variable to restore and how to pass the input(in batches or whole at once)?
def make_model(data,train_x):
n_nodes_hl1 = 2000
n_nodes_hl2 = 2000
n_nodes_hl3 = 2000
n_classes = 2 # No of classification
hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([len(train_x[0]), n_nodes_hl1], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl1]),name = 'biases')}
hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl1, n_nodes_hl2], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl2]),name = 'biases')}
hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl2, n_nodes_hl3], stddev=0.1,),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl3]),name = 'biases')}
output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl3, n_classes], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_classes]),name = 'biases'), }
layer_1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
# now goes through an activation function - sigmoid function
layer_1 = tf.nn.relu(layer_1)
print ("Layer 1 done!!")
# input for layer 2 = result of activ_func for layer 1
layer_2 = tf.add(tf.matmul(layer_1, hidden_2_layer['weights']), hidden_2_layer['biases'])
layer_2 = tf.nn.relu(layer_2)
print ("Layer 2 done!!")
layer_3 = tf.add(tf.matmul(layer_2, hidden_3_layer['weights']), hidden_3_layer['biases'])
layer_3 = tf.nn.relu(layer_3)
print ("Layer 3 done!!")
output = tf.matmul(layer_3, output_layer['weights'],name = "output") + output_layer['biases']
return output
def train_neural_network(train_x,train_y,test_x,test_y):
tf.reset_default_graph()
with tf.name_scope('input'):
x = tf.placeholder('float', [None, len(train_x[0])],name= 'x_input')
y = tf.placeholder('float',name = 'y-input')
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
prediction = make_model(x,train_x)
print ('model ready!!')
with tf.name_scope('pred'):
pred = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
with tf.name_scope('cost'):
cost = tf.reduce_mean(pred)
with tf.name_scope('train'):
optimizer = tf.train.AdamOptimizer().minimize(cost,name = 'optimizer')
tf.summary.scalar("cost", cost)
n_epochs = 10
batch_size = 100
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) # initializes our variables. Session has now begun.
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train/2/',
sess.graph)
test_writer = tf.summary.FileWriter('test/')
for epoch in range(n_epochs):
epoch_loss = 0 # we'll calculate the loss as we go
i = 0
while i < len(train_x):
#we want to take batches(chunks); take a slice, then another size)
start = i
end = i+batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
if i%200 == 0:
train_writer.add_summary(_, i)
epoch_loss += c
i+=batch_size
print('Epoch', epoch, 'completed out of', n_epochs, 'loss:', epoch_loss)
with tf.name_scope('accuracy'):
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
tf.summary.scalar("accuracy", accuracy)
print('Accuracy:', accuracy.eval({x: test_x, y: test_y}))
saver = tf.train.Saver()
tf_log = 'tf.log'
saver.save(sess, "model3.ckpt")
return accuracy
This is how I am making predictions, but this fails everytime:
def test_neural_network(test_x):
batch_size = 100
i = 0
batch_x = np.array(test_x[i:i+batch_size])
tf.reset_default_graph()
x = tf.placeholder('float', [len(batch_x),len(test_x[0])])
y = tf.placeholder('float',[2])
prediction = make_model(x,batch_x)
# pred1 = tf.nn.softmax(logits=prediction)
# weight = tf.get_variable("weights_3", shape=[len(batch_x),2],initializer = tf.zeros_initializer)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, "model3.ckpt")
p = tf.argmax(prediction,1)
print (p.eval({x:batch_x))
gives and array of shape(batch_size,2),expected values [0,1] or [1,0] but getting decimal values.
You have a problem because you launch a session on your variable « weight ». But in your case you want to know the output of your network. Try to launch a session on your last layer 😉
How do I make predictions using my saved model? Which variable to
restore and how to pass the input (in batches or whole at once)?
Several comments regarding your design. You don't have to rebuild the graph at test time, because it's saved right next to the session checkpoint. Take a look at this question.
With this, your code will be simplified a lot, because you don't have to keep the placeholders and cross-entropy loss function separately. Add the name to the softmax layer like this:
with tf.name_scope('pred'):
pred = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y, name='softmax')
After you've restored the graph, you can find the target operation by:
graph = sess.graph
pred = graph.get_operation_by_name("pred/softmax")
If your test data is not big, you can freely feed all of it at once, but if it's significantly larger than your batch size, you can easily get out-of-memory. In this case, you should use mini-batches for testing as well.
As for your test accuracy, there can be plenty reasons for this, for instance, overfitting. Update the question with the full code, so that it could be reproduced.

value prediction with tensorflow and python

I have a data set which contains a list of stock prices. I need to use the tensorflow and python to predict the close price.
Q1: I have the following code which takes the first 2000 records as training and 2001 to 20000 records as test but I don't know how to change the code to do the prediction of the close price of today and 1 day later??? Please advise!
#!/usr/bin/env python2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
def feature_scaling(input_pd, scaling_meathod):
if scaling_meathod == 'z-score':
scaled_pd = (input_pd - input_pd.mean()) / input_pd.std()
elif scaling_meathod == 'min-max':
scaled_pd = (input_pd - input_pd.min()) / (input_pd.max() -
input_pd.min())
return scaled_pd
def input_reshape(input_pd, start, end, batch_size, batch_shift, n_features):
temp_pd = input_pd[start-1: end+batch_size-1]
output_pd = map(lambda y : temp_pd[y:y+batch_size], xrange(0, end-start+1, batch_shift))
output_temp = map(lambda x : np.array(output_pd[x]).reshape([-1]), xrange(len(output_pd)))
output = np.reshape(output_temp, [-1, batch_size, n_features])
return output
def target_reshape(input_pd, start, end, batch_size, batch_shift, n_step_ahead, m_steps_pred):
temp_pd = input_pd[start+batch_size+n_step_ahead-2: end+batch_size+n_step_ahead+m_steps_pred-2]
print temp_pd
output_pd = map(lambda y : temp_pd[y:y+m_steps_pred], xrange(0, end-start+1, batch_shift))
output_temp = map(lambda x : np.array(output_pd[x]).reshape([-1]), xrange(len(output_pd)))
output = np.reshape(output_temp, [-1,1])
return output
def lstm(input, n_inputs, n_steps, n_of_layers, scope_name):
num_layers = n_of_layers
input = tf.transpose(input,[1, 0, 2])
input = tf.reshape(input,[-1, n_inputs])
input = tf.split(0, n_steps, input)
with tf.variable_scope(scope_name):
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_inputs)
cell = tf.nn.rnn_cell.MultiRNNCell([cell]*num_layers)
output, state = tf.nn.rnn(cell, input, dtype=tf.float32) yi1
output = output[-1]
return output
feature_to_input = ['open price', 'highest price', 'lowest price', 'close price','turnover', 'volume','mean price']
feature_to_predict = ['close price']
feature_to_scale = ['volume']
sacling_meathod = 'min-max'
train_start = 1
train_end = 1000
test_start = 1001
test_end = 20000
batch_size = 100
batch_shift = 1
n_step_ahead = 1
m_steps_pred = 1
n_features = len(feature_to_input)
lstm_scope_name = 'lstm_prediction'
n_lstm_layers = 1
n_pred_class = 1
learning_rate = 0.1
EPOCHS = 1000
PRINT_STEP = 100
read_data_pd = pd.read_csv('./stock_price.csv')
temp_pd = feature_scaling(input_pd[feature_to_scale],sacling_meathod)
input_pd[feature_to_scale] = temp_pd
train_input_temp_pd = input_pd[feature_to_input]
train_input_nparr = input_reshape(train_input_temp_pd,
train_start, train_end, batch_size, batch_shift, n_features)
train_target_temp_pd = input_pd[feature_to_predict]
train_target_nparr = target_reshape(train_target_temp_pd, train_start, train_end, batch_size, batch_shift, n_step_ahead, m_steps_pred)
test_input_temp_pd = input_pd[feature_to_input]
test_input_nparr = input_reshape(test_input_temp_pd, test_start, test_end, batch_size, batch_shift, n_features)
test_target_temp_pd = input_pd[feature_to_predict]
test_target_nparr = target_reshape(test_target_temp_pd, test_start, test_end, batch_size, batch_shift, n_step_ahead, m_steps_pred)
tf.reset_default_graph()
x_ = tf.placeholder(tf.float32, [None, batch_size, n_features])
y_ = tf.placeholder(tf.float32, [None, 1])
lstm_output = lstm(x_, n_features, batch_size, n_lstm_layers, lstm_scope_name)
W = tf.Variable(tf.random_normal([n_features, n_pred_class]))
b = tf.Variable(tf.random_normal([n_pred_class]))
y = tf.matmul(lstm_output, W) + b
cost_func = tf.reduce_mean(tf.square(y - y_))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_func)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for ii in range(EPOCHS):
sess.run(train_op, feed_dict={x_:train_input_nparr, y_:train_target_nparr})
if ii % PRINT_STEP == 0:
cost = sess.run(cost_func, feed_dict={x_:train_input_nparr, y_:train_target_nparr})
print 'iteration =', ii, 'training cost:', cost
Very simply, prediction (a.k.a. scoring or inference) comes from running the input through only the forward pass, and collecting the score for each input vector. It's the same process flow as testing. The difference is the four stages of model use:
Train: learn from the training data set; adjust weights as needed.
Test: evaluate the model's performance; if accuracy has converged, stop training.
Validate: evaluate the accuracy of the trained model. If it doesn't meet acceptance criteria, change something and start over with the training.
Predict: you've passed validation -- release the model for use by the intended application.
All four steps follow the same forward logic flow; training includes back-propagation; the others do not. Simply follow the forward-only process, and you'll get the result form you need.
I worry about your data partition: only 10% for training, 90% for testing, and none for validation. A more typical split is 50-30-20, or something in that general area.
Q-1 : You should change your LSTM parameter to return a sequence of size two which will be prediction for that day and the day after.
Q-2 it's clearly that your model is underfitting the data, which is so obvious with your 10% train 90% test data ! You should more equilibrated ratio as suggested in the previous answer.

When introducing the Optimizer variables under variable_scope get recreated twice, why?

As the title suggests, when looking at the graph inside tensorboard - the variables I've created inside a variable_scope get recreated twice. Why is that? what am I doing wrong?
def weights_biases(weights_shape, biases_shape):
weights = tf.get_variable("weights", weights_shape, initializer = tf.random_normal_initializer())
biases = tf.get_variable("biases", biases_shape, initializer = tf.random_normal_initializer())
return weights, biases
def hl_relu(input_tensor, weights_shape, biases_shape):
weights, biases = weights_biases(weights_shape, biases_shape)
regression = tf.matmul(input_tensor, weights) + biases
return tf.nn.relu(regression)
def neural_network_model(x):
# W = tf.Variable(
# tf.truncated_normal([vocab_size, embedding_size], stddev=1 / math.sqrt(vocab_size)),
# name="W")
# embedded = tf.nn.embedding_lookup(W, x)
# embedding_aggregated = tf.reduce_sum(embedded, [1])
with tf.variable_scope("hidden_layer_1"):
relu1 = hl_relu(x, [max_words_len, n_nodes_hl1], [n_nodes_hl1])
with tf.variable_scope("hidden_layer_2"):
relu2 = hl_relu(relu1, [n_nodes_hl1,n_nodes_hl2], [n_nodes_hl2])
with tf.variable_scope("hidden_layer_3"):
relu3 = hl_relu(relu2, [n_nodes_hl2,n_nodes_hl3], [n_nodes_hl3])
with tf.variable_scope("output_layer"):
weights, biases = weights_biases([n_nodes_hl3, n_classes], [n_classes])
output_regression = tf.matmul(relu3, weights) + biases
return output_regression
def train_neural_network(test_x, test_y):
with tf.device("/cpu:0"):
custom_runner = CustomRunner()
x_batch, y_batch = custom_runner.get_inputs()
with tf.variable_scope("test"):
testX = tf.constant(test_x, name="testX")
testX = tf.cast(testX, tf.float32)
testY = tf.constant(test_y, name="testY")
testY = tf.cast(testY, tf.float32)
with tf.variable_scope("nn") as scope:
global_step = tf.Variable(0, trainable=False, name='global_step')
logits = neural_network_model(x_batch)
scope.reuse_variables()
test_logits = neural_network_model(testX)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_batch), name="cost")
tf.scalar_summary('cost', cost)
optimizer = tf.train.AdagradOptimizer(0.01).minimize(cost, global_step = global_step)
Produces the following abnormality:
You can see that the 'nn' scope was created twice with my hidden layers which are not connected to any inputs, but affect the Adagrad optimizer by providing their initialized random weight constantly. I suspect this is also slowing down training.
What have i done wrong in my code??
I believe this is what is causing them to be created twice.
logits = neural_network_model(x_batch)
scope.reuse_variables()
test_logits = neural_network_model(testX)
Can you change that to this:
logits = neural_network_model(x_batch)
without the test_logits and see if you still get the same issue?
Are you sure the variables are re-created? I suspect what you see is just Adagrad variables created by the optimizer so that is can save what it needs for its computation. Can you try the simplest GradientDescentOptimizer and see if it still happens?

DNN With embedded layer returning sin wave cost/accuracy

I am a total newbie to tensor flow and machine learning, but trying to model a DNN with an embedded layer infront of it. For some reason I keep getting a sin wave of cost results as well as accuracy. I imagine there is something wrong with my code, so here goes:
This is my model and training routines:
def neural_network_model(x):
W = tf.Variable(
tf.truncated_normal([vocab_size, embedding_size], stddev=1 / math.sqrt(vocab_size)),
name="W")
embedded = tf.nn.embedding_lookup(W, x)
embedding_aggregated = tf.reduce_sum(embedded, [1])
hidden_1_layer = {
'weights': tf.Variable(tf.random_normal([embedding_size, n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden_2_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden_3_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
output = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.matmul(embedding_aggregated, hidden_1_layer['weights']) + hidden_1_layer['biases']
l1 = tf.nn.relu(l1)
l2 = tf.matmul(l1, hidden_2_layer['weights']) + hidden_2_layer['biases']
l2 = tf.nn.relu(l2)
l3 = tf.matmul(l2, hidden_3_layer['weights']) + hidden_3_layer['biases']
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output['weights']) + output['biases']
return output
def train_neural_network(x_batch, y_batch, test_x, test_y):
global_step = tf.Variable(0, trainable=False, name='global_step')
logits = neural_network_model(x_batch)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_batch))
tf.scalar_summary('cost', cost)
optimizer = tf.train.AdagradOptimizer(0.01).minimize(cost, global_step = global_step)
test_logits = neural_network_model(test_x)
prediction = tf.nn.softmax(test_logits)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(test_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
tf.scalar_summary('accuracy', accuracy)
merged = tf.merge_all_summaries()
saver = tf.train.Saver()
model_dir = "model_embedding"
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
with tf.Session() as sess:
train_writer = tf.train.SummaryWriter(model_dir + "/eval", sess.graph)
if (latest_checkpoint != None):
print("Restoring: ", latest_checkpoint)
saver.restore(sess, latest_checkpoint)
else:
print("Nothing to restore")
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
epoch = 1
while not coord.should_stop():
epoch_loss = 0
_, c, summary = sess.run([optimizer, cost, merged])
# embd = sess.run(emb)
# for idx in range(xb.size):
# print(xb[idx])
# print(yb[idx])
train_writer.add_summary(summary, global_step = global_step.eval())
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
print("Global step: ", global_step.eval())
print('Accuracy:',accuracy.eval())
#saver.save(sess, model_dir+'/model.ckpt', global_step=global_step) # default to last 5 checkpoint saves
epoch += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
My data is a bunch of word integer IDs padded to a size of 2056 uniformly with the padding token being added at the end so a lot of my tensors have a bunch of vocab_size integer value at the end, in order to pad up to 2056.
Is there something glaringly obvious about my code thats wrong?
For whoever runs into the same issue:
My error was reusing the neural_network_model() function and thus creating a new set of variables. The answer lies in reading how to share variables, and TF has a good page describing that at Sharing Variables

Tensorflow Grid3LSTMCell visualization

I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
"""
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
use_peepholes=use_peepholes),
non_recurrent_fn=non_recurrent_fn)
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
Clarity?
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})

Resources