Tensorflow - How is the graph executed? - machine-learning

I am trying to get an output of an activation function as the weights change. When the weights change I expect the activation function to change as well.
I am simply changing the weights before the activation and I get a change in the value of the activation.
import tensorflow as tf
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+tf.exp(-x))) * (1.0 - (1.0/(1+tf.exp(-x))))
return 1.0/(1+tf.exp(-x))
def dummy(x):
weights['h0'] = tf.assign(weights['h0'], tf.add(weights['h0'], 0.1))
res = tf.add(weights['h0'], x)
res = sigmoid(res)
return res
# build computational graph
a = tf.placeholder('float', None)
d = dummy(a)
weights = {
'h0': tf.Variable(tf.random_normal([1]))
}
# initialize variables
init = tf.global_variables_initializer()
# create session and run the graph
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print (sess.run(d, feed_dict={a: [2]}))
# close session
sess.close()
But when I try to change the weights after the activation such as in backprop, I get the same activation every time. Can anyone explain to me what is happening and what I can do to get the activation to change after every iteration?
import tensorflow as tf
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+tf.exp(-x))) * (1.0 - (1.0/(1+tf.exp(-x))))
return 1.0/(1+tf.exp(-x))
def dummy(x):
res = tf.add(weights['h0'], x)
res = sigmoid(res)
weights['h0'] = tf.assign(weights['h0'], tf.add(weights['h0'], 0.1))
return res
# build computational graph
a = tf.placeholder('float', None)
d = dummy(a)
weights = {
'h0': tf.Variable(tf.random_normal([1]))
}
# initialize variables
init = tf.global_variables_initializer()
# create session and run the graph
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print (sess.run(d, feed_dict={a: [2]}))
# close session
sess.close()
EDIT:
It seems like it is not running the entire graph? I can do this:
with tf.Session() as sess:
sess.run(init)
for i in range(10):
sess.run(weights['h0'])
print (sess.run(d, feed_dict={a: [2]}))
Where I run the weights and it gives me different values. Is that correct?

This line isn't doing what you think it's doing:
print (sess.run(d, feed_dict={a: [2]}))
You need to call sess.run() and pass in a training operation, which is usually an optimizer's minimize() function.
Below are some example usages.
From the super-simple Tensorflow MNIST example:
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
...
for _ in range(1000):
...
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
From a TensorFlow multi-layer NN example:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
...
for i in range(total_batch):
...
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
The general pattern is:
Define cost function J.
Add the cost variable J to an optimizer
Call sess.run() with the optimizer variable as an argument.
If you want to write your own optimizer, then you'll need to take a different approach. Writing your own cost function is typical, but writing your own optimizer is not. You should look at the code for AdamOptimizer or GradientDescentOptimizer for insight.

Related

Why does the location of saver in the script matter when there is a graph object in TensorFlow?

I was training some models and I noticed that when I explicitly defined a graph variable, then it mattered where my saver object was created. First my code looked like this:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp_MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.truncated_normal([784, 10], mean=0.0, stddev=0.1),name='w')
b = tf.Variable(tf.constant(0.1, shape=[10]),name='b')
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # list of booleans indicating correct predictions
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(cross_entropy)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(fetches=train_step, feed_dict={x: batch_xs, y_: batch_ys})
if i % 100 == 0:
saver.save(sess=sess,save_path='./tmp/mdl_ckpt')
print(sess.run(fetches=accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
then I decided to change it to something like this and it seemed very sensitive where I defined my variables and where I defined saver. If they were not defined exactly after the graph variable was created for example, it would have errors. Similarly, I noticed that saver had to be defined exactly after one single variable (note being after the definition of the graph was not enough) for all the variables to be captured together by the saver (which didn't make sense to me, it would make more sense to require that its behind the definition of all the variables rather than a single one for this to work).
This is how the code looks now (with comments showing the locations of where I've defined saver):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp_MNIST_data/", one_hot=True)
graph = tf.Graph()
with tf.Session(graph=graph) as sess:
#saver = tf.train.Saver()
x = tf.placeholder(tf.float32, [None, 784])
saver = tf.train.Saver()
y_ = tf.placeholder(tf.float32, [None, 10])
#saver = tf.train.Saver()
W = tf.Variable(tf.truncated_normal([784, 10], mean=0.0, stddev=0.1),name='w')
#saver = tf.train.Saver()
b = tf.Variable(tf.constant(0.1, shape=[10]),name='b')
y = tf.nn.softmax(tf.matmul(x, W) + b)
#saver = tf.train.Saver()
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # list of booleans indicating correct predictions
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#saver = tf.train.Saver()
step = tf.Variable(0, name='step')
#saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
#saver = tf.train.Saver()
for i in range(1001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(fetches=train_step, feed_dict={x: batch_xs, y_: batch_ys})
if i % 100 == 0:
step_assign = step.assign(i)
sess.run(step_assign)
saver.save(sess=sess,save_path='./tmp/mdl_ckpt')
print(step.eval())
print( [ op.name for op in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)] )
print(sess.run(fetches=accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
the code above should work, but I am having a hard time understand why it behaves like this or why it makes sense that this would happen. Someone know what the right thing to do is?
I'm not entirely sure what's going on here, but I suspect the issue is related to variables not going into the wrong graph, or the session having an outdated version of the graph. You create a graph, but don't set it as the default, then create a session with that graph... but when you create variables, you don't specify which graph they should go into. Maybe the creation of the session sets the specified graph to the default, but that's not the way tensorflow was designed to be used, so I wouldn't be surprised if it hasn't been thoroughly tested in this regime.
While I don't have an explanation or what's going on, I can suggest a simple solution: separate graph construction with session running.
graph = tf.Graph()
with graph.as_default():
build_graph()
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
do_stuff_with(sess)
saver.save(sess, path)

Spare autoencoders in tensorflow

Is there an example of sparse autoencoders in tensorflow? I was to able to run and understand the normal one from here https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/autoencoder.py
For sparse, do I just need to modify the cost function?
from __future__ import division, print_function, absolute_import
import scipy.fftpack
import pdb, random
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from read_audio import read_audio
start,end= 3050,5723
#start=end=None
audio_data=read_audio("LDC93S1",start,end)
def overlapping_chunks(l, sub_array_size, overlap_size):
return [l[i:i+sub_array_size] for i in range(0, len(l)-overlap_size, overlap_size)]
def conv_frq_domain(signal):
fft_abs=abs(scipy.fft(signal))
fft_sorted=np.sort(fft_abs)[::-1]
top_100=fft_sorted[:100]
return top_100
sample_len=100
samples=overlapping_chunks(audio_data,sample_len,50)
freq_samples=[]
for sample in samples:
freq_samples.append(conv_frq_domain(sample))
examples=samples
print("Number of samples", str(len(examples)))
#pdb.set_trace()
# Parameters
learning_rate = 0.001
training_epochs = 2000
batch_size = 2
display_step = 100
# Network Parameters
n_hidden_1 = 1000 # 1st layer num features
n_hidden_2 = 650 # 2nd layer num features
n_input = sample_len
# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([n_input])),
}
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
return layer_1,layer_2
# Building the decoder
def decoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2'])
return layer_2
def kl_divergence(p_1, p_hat):
num_len=p_1.get_shape()[0]
term1 = p_1 * tf.log(p_1)
term2 = p_1 * tf.log(p_hat)
term3 = tf.sub(tf.ones(num_len),p_1) * tf.log(tf.sub(tf.ones(num_len),p_1))
term4 = tf.sub(tf.ones(num_len),p_1) * tf.log(tf.sub(tf.ones(num_len) ,p_hat))
return tf.sub(tf.add(term1,term3),tf.add(term2,term4))
def sparsity_penalty(hidden_layer_acts, sparsity_level=0.05, sparse_reg=1e-3, batch_size=-1):
# = T.extra_ops.repeat(sparsity_level, self.nhid)
sparsity_level_vec=tf.ones(hidden_layer_acts.get_shape()[1])*sparsity_level
sparsity_penalty = 0
avg_act = Mean = tf.reduce_mean(hidden_layer_acts,1)
kl_div = kl_divergence(sparsity_level_vec, avg_act)
sparsity_penalty = sparse_reg * tf.reduce_sum(kl_div,0)
return sparsity_penalty
# Construct model
encoder_op1, encoder_op2 = encoder(X)
decoder_op = decoder(encoder_op2)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2)+sparsity_penalty(encoder_op2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
train_data=examples
with tf.Session() as sess:
sess.run(init)
total_batch = int(len(examples)/batch_size)
# Training cycle
for epoch in range(training_epochs):
#random.shuffle(train_data)
# Loop over all batches
for i in range(total_batch):
batch_xs=train_data[i:i+2]
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
if epoch ==2500:
pdb.set_trace()
encode_decode = sess.run(y_pred, feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=", "{:.9f}".format(c))
print("Optimization Finished!")

How does one do Inference with Batch Normalization with Tensor Flow?

I was reading the original paper on BN and the stack overflow question on How could I use Batch Normalization in TensorFlow? which provides a very useful piece of code to insert a batch normalization block to a Neural Network but does not provides enough guidance on how to actually use it during training, inference and when evaluating models.
For example, I would like to track the train error during training and test error to make sure I don't overfit. Its clear that the batch normalization block should be off during test, but when evaluating the error on the training set, should the batch normalization block be turned off too? My main questions are:
During inference and error evaluation, should the batch normalization block be turned off regardless of the data set?
Does that mean that the batch normalization block should only be on during the training step then?
To make it very clear, I will provide an extract (of simplified) code I have been using to run batch normalization with Tensor flow according to what is my understanding of what is the right thing to do:
## TRAIN
if phase_train is not None:
#DO BN
feed_dict_train = {x:X_train, y_:Y_train, phase_train: False}
feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False}
feed_dict_test = {x:X_test, y_:Y_test, phase_train: False}
else:
#Don't do BN
feed_dict_train = {x:X_train, y_:Y_train}
feed_dict_cv = {x:X_cv, y_:Y_cv}
feed_dict_test = {x:X_test, y_:Y_test}
def get_batch_feed(X, Y, M, phase_train):
mini_batch_indices = np.random.randint(M,size=M)
Xminibatch = X[mini_batch_indices,:] # ( M x D^(0) )
Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) )
if phase_train is not None:
#DO BN
feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True}
else:
#Don't do BN
feed_dict = {x: Xminibatch, y_: Yminibatch}
return feed_dict
with tf.Session() as sess:
sess.run( tf.initialize_all_variables() )
for iter_step in xrange(steps):
feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train)
# Collect model statistics
if iter_step%report_error_freq == 0:
train_error = sess.run(fetches=l2_loss, feed_dict=feed_dict_train)
cv_error = sess.run(fetches=l2_loss, feed_dict=feed_dict_cv)
test_error = sess.run(fetches=l2_loss, feed_dict=feed_dict_test)
do_stuff_with_errors(train_error, cv_error, test_error)
# Run Train Step
sess.run(fetches=train_step, feed_dict=feed_dict_batch)
and the code I am using to produce batch normalization blocks is:
def standard_batch_norm(l, x, n_out, phase_train, scope='BN'):
"""
Batch normalization on feedforward maps.
Args:
x: Vector
n_out: integer, depth of input maps
phase_train: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope+l):
#beta = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=tf.float64 ), name='beta', trainable=True, dtype=tf.float64 )
#gamma = tf.Variable(tf.constant(1.0, shape=[n_out],dtype=tf.float64 ), name='gamma', trainable=True, dtype=tf.float64 )
init_beta = tf.constant(0.0, shape=[n_out], dtype=tf.float64)
init_gamma = tf.constant(1.0, shape=[n_out],dtype=tf.float64)
beta = tf.get_variable(name='beta'+l, dtype=tf.float64, initializer=init_beta, regularizer=None, trainable=True)
gamma = tf.get_variable(name='gamma'+l, dtype=tf.float64, initializer=init_gamma, regularizer=None, trainable=True)
batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
return normed
I found that there is 'official' batch_norm layer in tensorflow. Try it out:
https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100
Most likely it is not mentioned in docs since it included in some RC or 'beta' version only.
I haven't inspected deep into this matter yet, but as far as I see from documentation you just use binary parameter is_training in this batch_norm layer, and set it to true only for training phase. Try it out.
UPDATE: Below is the code to load data, build a network with one hidden ReLU layer and L2 normalization and introduce batch normalization for both hidden and out layer. This runs fine and trains fine.
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
pickle_file = '/home/maxkhk/Documents/Udacity/DeepLearningCourse/SourceCode/tensorflow/examples/udacity/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
#for NeuralNetwork model code is below
#We will use SGD for training to save our time. Code is from Assignment 2
#beta is the new parameter - controls level of regularization.
#Feel free to play with it - the best one I found is 0.001
#notice, we introduce L2 for both biases and weights of all layers
batch_size = 128
beta = 0.001
#building tensorflow graph
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
#introduce batchnorm
tf_train_dataset_bn = tf.contrib.layers.batch_norm(tf_train_dataset)
#now let's build our new hidden layer
#that's how many hidden neurons we want
num_hidden_neurons = 1024
#its weights
hidden_weights = tf.Variable(
tf.truncated_normal([image_size * image_size, num_hidden_neurons]))
hidden_biases = tf.Variable(tf.zeros([num_hidden_neurons]))
#now the layer itself. It multiplies data by weights, adds biases
#and takes ReLU over result
hidden_layer = tf.nn.relu(tf.matmul(tf_train_dataset_bn, hidden_weights) + hidden_biases)
#adding the batch normalization layerhi()
hidden_layer_bn = tf.contrib.layers.batch_norm(hidden_layer)
#time to go for output linear layer
#out weights connect hidden neurons to output labels
#biases are added to output labels
out_weights = tf.Variable(
tf.truncated_normal([num_hidden_neurons, num_labels]))
out_biases = tf.Variable(tf.zeros([num_labels]))
#compute output
out_layer = tf.matmul(hidden_layer_bn,out_weights) + out_biases
#our real output is a softmax of prior result
#and we also compute its cross-entropy to get our loss
#Notice - we introduce our L2 here
loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
out_layer, tf_train_labels) +
beta*tf.nn.l2_loss(hidden_weights) +
beta*tf.nn.l2_loss(hidden_biases) +
beta*tf.nn.l2_loss(out_weights) +
beta*tf.nn.l2_loss(out_biases)))
#now we just minimize this loss to actually train the network
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
#nice, now let's calculate the predictions on each dataset for evaluating the
#performance so far
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(out_layer)
valid_relu = tf.nn.relu( tf.matmul(tf_valid_dataset, hidden_weights) + hidden_biases)
valid_prediction = tf.nn.softmax( tf.matmul(valid_relu, out_weights) + out_biases)
test_relu = tf.nn.relu( tf.matmul( tf_test_dataset, hidden_weights) + hidden_biases)
test_prediction = tf.nn.softmax(tf.matmul(test_relu, out_weights) + out_biases)
#now is the actual training on the ANN we built
#we will run it for some number of steps and evaluate the progress after
#every 500 steps
#number of steps we will train our ANN
num_steps = 3001
#actual training
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(
valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

How can I get the value of the error during training in Tensorflow?

In the TensorFlow MNIST beginners tutorial, code excerpts here:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.Session()
sess.run(init)
#-----training loop starts here-----
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
Is it possible to access/retrieve the values of the cross_entropy error, Weights, and biases while inside the loop? I want to plot the error, and possibly a histogram of the weights.
Thanks!
As some person say, TensorBoard is the one for that purpose.
Here I can give you how to.
First, let's define a function for logging min, max, mean and std-dev for the tensor.
def variable_summaries(var, name):
with tf.name_scope("summaries"):
mean = tf.reduce_mean(var)
tf.scalar_summary('mean/' + name, mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
tf.scalar_summary('stddev/' + name, stddev)
tf.scalar_summary('max/' + name, tf.reduce_max(var))
tf.scalar_summary('min/' + name, tf.reduce_min(var))
tf.histogram_summary(name, var)
Then, create a summarize operation after you build a graph like below.
This code saves weight and bias of first layer with cross-entropy in "mnist_tf_log" directory.
variable_summaries(W_fc1, "W_fc1")
variable_summaries(b_fc1, "b_fc1")
tf.scalar_summary("cross_entropy:", cross_entropy)
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter("mnist_tf_log", graph_def=sess.graph)
Now you're all set.
You can log those data by returning summary_op and pass it to summary_writer.
Here is an example for logging every 10 training steps.
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
if i % 10 == 0:
_, summary_str = sess.run( [train_step, summary_op], feed_dict={x: batch_xs, y_: batch_ys})
summary_writer.add_summary(summary_str, i)
summary_writer.flush()
else:
sess.run( train_step, feed_dict={x: batch_xs, y_: batch_ys})
Execute TensorBoard after you run the code.
python /path/to/tensorboard/tensorboard.py --logdir=mnist_tf_log
Then you can see the result by opening http://localhost:6006 with your web browser.
Tensorboard is made exactly for that:
https://www.tensorflow.org/versions/r0.7/how_tos/summaries_and_tensorboard/index.html

Dynamically changing weights in TensorFlow

In TensorFlow, I'm trying to change weights during training, but get no change in the results. I've tried to disrupt the weights (set to zero), but it seems to do nothing (other than take longer to complete). What am I missing? Is there a way to manipulate W like a regular matrix/tensor during session?
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]), trainable=True)
W2 = tf.Variable(tf.zeros([784,10]), trainable=False)
b = tf.Variable(tf.zeros([10]))
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x,W) + b)
loss = tf.reduce_mean(tf.square(y_ - y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
for i in range(1000):
#try to change W during training
W = W2
W = tf.Variable(tf.zeros([784,10]))
W.assign(tf.Variable(tf.zeros([784,10])))
batch = mnist.train.next_batch(1)
train_step.run(feed_dict={x: batch[0], y_: batch[1]})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
Accuracy remains the same (0.82).
I am not sure it's a good idea, but if you want to update W after W.assign, you need to evaluate it.
sess.run(W)
In addition, Since TensorFlow and most Neural Nets use forward/backpropagation to compute values/gradients to update weights, initializing weights with 0 kills all forward values and thus gradients. It's not a good idea.
You can try to initialize them with small random numbers:
tf.Variable(tf.random_normal([784, 10], stddev=0.01))
Or use the xavier initializer
W = tf.get_variable("W", shape=[784, 10],
initializer=tf.contrib.layers.xavier_initializer())
When you use tf.assign(), you need to give a name for this operation:
W= W.assign(tf.Variable(tf.zeros([784,10])))
Then when you use W again, the assign operation will be executed.

Resources