Unable restore variables of Adam Optimizer while using tf.train.save - machine-learning

I get following errors when I try to restore a saved model in tensorflow:
W tensorflow/core/framework/op_kernel.cc:1192] Not found: Key out_w/Adam_5 not found in checkpoint
W tensorflow/core/framework/op_kernel.cc:1192] Not found: Key b1/Adam not found in checkpoint
W tensorflow/core/framework/op_kernel.cc:1192] Not found: Key b1/Adam_4 not found in checkpoint
I guess I am unable to save Variables of Adam Optimizer.
Any fix?

Consider this small experiment:
import tensorflow as tf
def simple_model(X):
with tf.variable_scope('Layer1'):
w1 = tf.get_variable('w1', initializer=tf.truncated_normal((5, 2)))
b1 = tf.get_variable('b1', initializer=tf.ones((2)))
layer1 = tf.matmul(X, w1) + b1
return layer1
def simple_model2(X):
with tf.variable_scope('Layer1'):
w1 = tf.get_variable('w1_x', initializer=tf.truncated_normal((5, 2)))
b1 = tf.get_variable('b1_x', initializer=tf.ones((2)))
layer1 = tf.matmul(X, w1) + b1
return layer1
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (None, 5))
model = simple_model(X)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.save(sess, './Checkpoint', global_step = 0)
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (None, 5))
model = simple_model(X) # Case 1
#model = simple_model2(X) # Case 2
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
tf.train.Saver().restore(sess, tf.train.latest_checkpoint('.'))
In Case 1, everything works fine. But in Case2, you will get errors like Key Layer1/b1_x not found in checkpoint which is because the variable names in the model are different (though the shapes and datatypes of both variables are same). Ensure that variables are having same names in the model in which you are restoring.
To check the names of the variables present in the checkpoint, check this answer.

This can also happen when you are not training every variable simultaneously, due to only partially available adam parameters in a checkpoint.
One possible fix would be to "reset" Adam after loading the checkpoint. To to this, filter adam-related variables when creating the saver:
vl = [v for v in tf.global_variables() if "Adam" not in v.name]
saver = tf.train.Saver(var_list=vl)
Make sure to initialize global variables afterwards.

Related

How to feed previous time-stamp prediction as additional input to the next time-stamp?

This question might have been asked, but I got confused.
I am trying to apply one of RNN types, e.g. LSTM for time-series forecasting. I have inputs, y (stock returns). For each timestamp, I'd like to get the predictions. Q1 - Am I correct choosing seq2seq approach?
I also want to use predictions from previous timestamp (initializing initial values with some constant) as additional (still using my existing inputs) input in the form of squared residuals, i.e. using
eps_{t-1} = (y_{t-1} - y^_{t-1})^2 as additional input at t (as well as previous inputs).
So, how can I do this in tensorflow or in pytorch?
I tried to depict what I want on the attached graph. The graph
p.s. Sorry, it the question is poorly formulated
Let say your input if of dimension (32,10,1) with batch_size 32, time steps of length 10 and dimension of 1. Same for your target (stock return). This code make use of the tf.scan function, which is usefull when implementing custom recurrent networks (it will iterate over the timesteps). It remains to use the residual of t-1 in t somewhere, as you would like to.
ps: it is a very basic implementation of lstm from scratch, without any bias or output activation.
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
BS = 32
TS = 10
inputs_dim = 1
target_dim = 1
inputs = tf.placeholder(shape=[BS, TS, inputs_dim], dtype=tf.float32)
stock_returns = tf.placeholder(shape=[BS, TS, target_dim], dtype=tf.float32)
state_size = 16
# initial hidden state
init_state = tf.placeholder(shape=[2, BS, state_size],
dtype=tf.float32, name='initial_state')
# initializer
xav_init = tf.contrib.layers.xavier_initializer
# params
W = tf.get_variable('W', shape=[4, state_size, state_size],
initializer=xav_init())
U = tf.get_variable('U', shape=[4, inputs_dim, state_size],
initializer=xav_init())
W_out = tf.get_variable('W_out', shape=[state_size, target_dim],
initializer=xav_init())
#the function to feed tf.scan with
def step(prev, inputs_):
#unpack all inputs and previous outputs
st_1, ct_1 = prev[0][0], prev[0][1]
x = inputs_[0]
target = inputs_[1]
#get previous squared residual
eps = prev[1]
"""
here do whatever you want with eps_t-1
like x += eps if x if of the same dimension
or include it somewhere in your graph
"""
# lstm gates (add bias if needed)
#
# input gate
i = tf.sigmoid(tf.matmul(x,U[0]) + tf.matmul(st_1,W[0]))
# forget gate
f = tf.sigmoid(tf.matmul(x,U[1]) + tf.matmul(st_1,W[1]))
# output gate
o = tf.sigmoid(tf.matmul(x,U[2]) + tf.matmul(st_1,W[2]))
# gate weights
g = tf.tanh(tf.matmul(x,U[3]) + tf.matmul(st_1,W[3]))
ct = ct_1*f + g*i
st = tf.tanh(ct)*o
"""
make prediction, compute residual in t
and pass it to t+1
Normaly, we would compute prediction outside the scan function,
but as we need it here, we could just keep it and return it back
as an output of the scan function
"""
prediction_t = tf.matmul(st, W_out) # + bias
eps = (target - prediction_t)**2
return [tf.stack((st, ct), axis=0), eps, prediction_t]
states, eps, preds = tf.scan(step, [tf.transpose(inputs, [1,0,2]),
tf.transpose(stock_returns, [1,0,2])], initializer=[init_state,
tf.zeros((32,1), dtype=tf.float32),
tf.zeros((32,1),dtype=tf.float32)])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
out = sess.run(preds, feed_dict=
{inputs:np.random.rand(BS,TS,inputs_dim),
stock_returns:np.random.rand(BS,TS,target_dim),
init_state:np.zeros((2,BS,state_size))})
out = tf.transpose(out,[1,0,2])
print(out)
And the output :
Tensor("transpose_2:0", shape=(32, 10, 1), dtype=float32)
Base code from here

How to store a dictionary and map words to ints when using Tensorflow Serving?

I have trained an LSTM RNN classification model on Tensorflow. I was saving and restoring checkpoints to retrain and use the model for testing. Now I want to use Tensorflow serving so that I can use the model in production.
Initially, I would parse through a corpus to create my dictionary which is then used to map words in a string to integers. I would then store this dictionary in a pickle file which could be reloaded when restoring a checkpoint and retraining on a data set or just for using the model so that the mapping is consistent. How do I store this dictionary when saving the model using SavedModelBuilder?
My code for the neural network is as follows. The code for saving the model is towards the end (I am including an overview of the whole structure for context):
...
# Read files and store them in variables
with open('./someReview.txt', 'r') as f:
reviews = f.read()
with open('./someLabels.txt', 'r') as f:
labels = f.read()
...
#Pre-processing functions
#Parse through dataset and create a vocabulary
vocab_to_int, reviews = RnnPreprocessing.map_vocab_to_int(reviews)
with open(pickle_path, 'wb') as handle:
pickle.dump(vocab_to_int, handle, protocol=pickle.HIGHEST_PROTOCOL)
#More preprocessing functions
...
# Building the graph
lstm_size = 256
lstm_layers = 2
batch_size = 1000
learning_rate = 0.01
n_words = len(vocab_to_int) + 1
# Create the graph object
tf.reset_default_graph()
with tf.name_scope('inputs'):
inputs_ = tf.placeholder(tf.int32, [None, None], name="inputs")
labels_ = tf.placeholder(tf.int32, [None, None], name="labels")
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
#Create embedding layer LSTM cell, LSTM Layers
...
# Forward pass
with tf.name_scope("RNN_forward"):
outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
# Output. We are only interested in the latest output of the lstm cell
with tf.name_scope('predictions'):
predictions = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.sigmoid)
tf.summary.histogram('predictions', predictions)
#More functions for cost, accuracy, optimizer initialization
...
# Training
epochs = 1
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
iteration = 1
for e in range(epochs):
state = sess.run(initial_state)
for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
feed = {inputs_: x,
labels_: y[:, None],
keep_prob: 0.5,
initial_state: state}
summary, loss, state, _ = sess.run([merged, cost, final_state, optimizer], feed_dict=feed)
train_writer.add_summary(summary, iteration)
if iteration%1==0:
print("Epoch: {}/{}".format(e, epochs),
"Iteration: {}".format(iteration),
"Train loss: {:.3f}".format(loss))
if iteration%2==0:
val_acc = []
val_state = sess.run(cell.zero_state(batch_size, tf.float32))
for x, y in get_batches(val_x, val_y, batch_size):
feed = {inputs_: x,
labels_: y[:, None],
keep_prob: 1,
initial_state: val_state}
summary, batch_acc, val_state = sess.run([merged, accuracy, final_state], feed_dict=feed)
val_acc.append(batch_acc)
print("Val acc: {:.3f}".format(np.mean(val_acc)))
iteration +=1
test_writer.add_summary(summary, iteration)
#Saving the model
export_path = './SavedModel'
print ('Exporting trained model to %s'%(export_path))
builder = saved_model_builder.SavedModelBuilder(export_path)
# Build the signature_def_map.
classification_inputs = utils.build_tensor_info(inputs_)
classification_outputs_classes = utils.build_tensor_info(labels_)
classification_signature = signature_def_utils.build_signature_def(
inputs={signature_constants.CLASSIFY_INPUTS: classification_inputs},
outputs={
signature_constants.CLASSIFY_OUTPUT_CLASSES:
classification_outputs_classes,
},
method_name=signature_constants.CLASSIFY_METHOD_NAME)
legacy_init_op = tf.group(
tf.tables_initializer(), name='legacy_init_op')
#add the sigs to the servable
builder.add_meta_graph_and_variables(
sess, [tag_constants.SERVING],
signature_def_map={
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature
},
legacy_init_op=legacy_init_op)
print ("added meta graph and variables")
#save it!
builder.save()
print("model saved")
I am not entirely sure if this is the correct way to save a model such as this but this is the only implementation I have found in the documentation and online tutorials.
I haven't found any example or any explicit guide to saving the dictionary or how to use it when restoring a savedModel in the documentation.
When using checkpoints, I would just load the pickle file before running the session. How do I restore this savedModel so that I can use the same word to int mapping using the dictionary? Is there any specific way I should be saving the model or loading it?
I have also added inputs_ as the input for the input signature. This is a sequence of integeres 'after' the words have been mapped. I can't specify a string as input because I get an AttributeError: 'str' object has no attribute 'dtype' . In such cases, how exactly are words mapped to integers in models that are in production?
Implement your preprocessing using the utilities in tf.feature_column and it'll be straightforward to use the same mapping to integers in serving.
One approach to this is storing the vocabulary in the model's graph. This will then be shipped with the model.
...
vocab_table = lookup.index_table_from_file(vocabulary_file='data/vocab.csv', num_oov_buckets=1, default_value=-1)
text = features[commons.FEATURE_COL]
words = tf.string_split(text)
dense_words = tf.sparse_tensor_to_dense(words, default_value=commons.PAD_WORD)
word_ids = vocab_table.lookup(dense_words)
padding = tf.constant([[0, 0], [0, commons.MAX_DOCUMENT_LENGTH]])
# Pad all the word_ids entries to the maximum document length
word_ids_padded = tf.pad(word_ids, padding)
word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, commons.MAX_DOCUMENT_LENGTH])
Source: https://github.com/KishoreKarunakaran/CloudML-Serving/blob/master/text/imdb_cnn/model/cnn_model.py#L83

Tensorflow - How is the graph executed?

I am trying to get an output of an activation function as the weights change. When the weights change I expect the activation function to change as well.
I am simply changing the weights before the activation and I get a change in the value of the activation.
import tensorflow as tf
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+tf.exp(-x))) * (1.0 - (1.0/(1+tf.exp(-x))))
return 1.0/(1+tf.exp(-x))
def dummy(x):
weights['h0'] = tf.assign(weights['h0'], tf.add(weights['h0'], 0.1))
res = tf.add(weights['h0'], x)
res = sigmoid(res)
return res
# build computational graph
a = tf.placeholder('float', None)
d = dummy(a)
weights = {
'h0': tf.Variable(tf.random_normal([1]))
}
# initialize variables
init = tf.global_variables_initializer()
# create session and run the graph
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print (sess.run(d, feed_dict={a: [2]}))
# close session
sess.close()
But when I try to change the weights after the activation such as in backprop, I get the same activation every time. Can anyone explain to me what is happening and what I can do to get the activation to change after every iteration?
import tensorflow as tf
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+tf.exp(-x))) * (1.0 - (1.0/(1+tf.exp(-x))))
return 1.0/(1+tf.exp(-x))
def dummy(x):
res = tf.add(weights['h0'], x)
res = sigmoid(res)
weights['h0'] = tf.assign(weights['h0'], tf.add(weights['h0'], 0.1))
return res
# build computational graph
a = tf.placeholder('float', None)
d = dummy(a)
weights = {
'h0': tf.Variable(tf.random_normal([1]))
}
# initialize variables
init = tf.global_variables_initializer()
# create session and run the graph
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print (sess.run(d, feed_dict={a: [2]}))
# close session
sess.close()
EDIT:
It seems like it is not running the entire graph? I can do this:
with tf.Session() as sess:
sess.run(init)
for i in range(10):
sess.run(weights['h0'])
print (sess.run(d, feed_dict={a: [2]}))
Where I run the weights and it gives me different values. Is that correct?
This line isn't doing what you think it's doing:
print (sess.run(d, feed_dict={a: [2]}))
You need to call sess.run() and pass in a training operation, which is usually an optimizer's minimize() function.
Below are some example usages.
From the super-simple Tensorflow MNIST example:
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
...
for _ in range(1000):
...
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
From a TensorFlow multi-layer NN example:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
...
for i in range(total_batch):
...
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
The general pattern is:
Define cost function J.
Add the cost variable J to an optimizer
Call sess.run() with the optimizer variable as an argument.
If you want to write your own optimizer, then you'll need to take a different approach. Writing your own cost function is typical, but writing your own optimizer is not. You should look at the code for AdamOptimizer or GradientDescentOptimizer for insight.

Why can't I restore this model?

I am currently having trouble restoring this model to make a prediction.
Code:
def neural_network(data):
with tf.name_scope("network"):
layer1 = tf.layers.dense(data, 1000, activation=tf.nn.relu, name="hidden_layer1")
layer2 = tf.layers.dense(layer1, 1000, activation=tf.nn.relu, name="hidden_layer2")
output = tf.layers.dense(layer2, 2, name="output_layer")
return output
def evaluate():
with tf.name_scope("loss"):
global x
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=neural_network(x))
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("exec"):
with tf.Session() as sess:
for i in range(1, 10):
sess.run(tf.global_variables_initializer())
sess.run(training_op, feed_dict={x: np.array(train_data).reshape([-1, 1]), y: label})
print "Training " + str(i)
saver = tf.train.Saver()
saver.save(sess, "saved_models/testing")
print "Model Saved."
def predict():
with tf.name_scope("predict"):
output = neural_network(x)
output = tf.nn.softmax(output)
with tf.Session() as sess:
saver = tf.train.import_meta_graph("saved_models/testing.meta")
# saver = tf.train.Saver()
saver.restore(sess, "saved_models/testing")
print sess.run(output, feed_dict={x: np.array([12003]).reshape([-1, 1])})
I have tried using tf.train.Saver() to restore but also gives the same error.
The error given is ValueError: Variable hidden_layer1/kernel already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
I have tried setting reuse=True for tf.layers.dense() but it results in me unable to train the graph (gives the same ValueError as above but asking to set reuse=None).
I am guessing it has to do with the graph still existing in the session so when I try to restore it, it detects a duplicate graph. However, I thought this should not happen as the session have already closed.
link to entire code: gistlink
I think you are loading the variables in the same graph. For testing try to create a new graph and load it. Do something like this:
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
# Load the graph with the trained states

Tensorflow Grid3LSTMCell visualization

I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
"""
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
use_peepholes=use_peepholes),
non_recurrent_fn=non_recurrent_fn)
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
Clarity?
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})

Resources