Occupies all GPU memory with the following tensorflow code? - memory

I was experimenting with word2vec with the code from https://github.com/chiphuyen/stanford-tensorflow-tutorials/blob/master/examples/04_word2vec_no_frills.py
However, it easily uses up all my GPU memories, any idea why?
with tf.name_scope('data'):
center_words = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name='center_words')
target_words = tf.placeholder(tf.int32, shape=[BATCH_SIZE, 1], name='target_words')
with tf.name_scope("embedding_matrix"):
embed_matrix = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBED_SIZE], -1.0, 1.0), name="embed_matrix")
with tf.name_scope("loss"):
embed = tf.nn.embedding_lookup(embed_matrix, center_words, name="embed")
nce_weight = tf.Variable(tf.truncated_normal([VOCAB_SIZE, EMBED_SIZE], stddev=1.0/(EMBED_SIZE ** 0.5)), name="nce_weight")
nce_bias = tf.Variable(tf.zeros([VOCAB_SIZE]), name="nce_bias")
loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, biases=nce_bias, labels=target_words, inputs=embed, num_sampled=NUM_SAMPLED, num_classes=VOCAB_SIZE), name="loss")
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_loss = 0.0 # we use this to calculate the average loss in the last SKIP_STEP steps
writer = tf.summary.FileWriter('./graphs/no_frills/', sess.graph)
for index in range(NUM_TRAIN_STEPS):
centers, targets = next(batch_gen)
loss_batch, _ = sess.run([loss, optimizer], feed_dict={center_words:centers, target_words:targets})
total_loss += loss_batch
if (index + 1) % SKIP_STEP == 0:
print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
total_loss = 0.0
writer.close()

This is default tensorflow behaviour. If you want to limit GPU memory allocation to only what is needed, specify this in the session config.
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
Alternatively you can specify a maximum fraction of GPU memory to use:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
session = tf.Session(config=config)
It's not very well documented, but the message definitions are the place to start if you want to do more than that.

Related

I have a trained tensorflow model, how do i make predictions with the same?

I have trained my models by calling the 'train_neural_network' function which trains the model and I store the model, the accuracy comes to around 83%, the problem I'm facing is how do I make predictions using my saved model? Which variable to restore and how to pass the input(in batches or whole at once)?
def make_model(data,train_x):
n_nodes_hl1 = 2000
n_nodes_hl2 = 2000
n_nodes_hl3 = 2000
n_classes = 2 # No of classification
hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([len(train_x[0]), n_nodes_hl1], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl1]),name = 'biases')}
hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl1, n_nodes_hl2], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl2]),name = 'biases')}
hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl2, n_nodes_hl3], stddev=0.1,),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_nodes_hl3]),name = 'biases')}
output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_hl3, n_classes], stddev=0.1),name= 'weights'),
'biases': tf.Variable(tf.constant(0.1, shape=[n_classes]),name = 'biases'), }
layer_1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
# now goes through an activation function - sigmoid function
layer_1 = tf.nn.relu(layer_1)
print ("Layer 1 done!!")
# input for layer 2 = result of activ_func for layer 1
layer_2 = tf.add(tf.matmul(layer_1, hidden_2_layer['weights']), hidden_2_layer['biases'])
layer_2 = tf.nn.relu(layer_2)
print ("Layer 2 done!!")
layer_3 = tf.add(tf.matmul(layer_2, hidden_3_layer['weights']), hidden_3_layer['biases'])
layer_3 = tf.nn.relu(layer_3)
print ("Layer 3 done!!")
output = tf.matmul(layer_3, output_layer['weights'],name = "output") + output_layer['biases']
return output
def train_neural_network(train_x,train_y,test_x,test_y):
tf.reset_default_graph()
with tf.name_scope('input'):
x = tf.placeholder('float', [None, len(train_x[0])],name= 'x_input')
y = tf.placeholder('float',name = 'y-input')
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
prediction = make_model(x,train_x)
print ('model ready!!')
with tf.name_scope('pred'):
pred = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
with tf.name_scope('cost'):
cost = tf.reduce_mean(pred)
with tf.name_scope('train'):
optimizer = tf.train.AdamOptimizer().minimize(cost,name = 'optimizer')
tf.summary.scalar("cost", cost)
n_epochs = 10
batch_size = 100
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) # initializes our variables. Session has now begun.
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train/2/',
sess.graph)
test_writer = tf.summary.FileWriter('test/')
for epoch in range(n_epochs):
epoch_loss = 0 # we'll calculate the loss as we go
i = 0
while i < len(train_x):
#we want to take batches(chunks); take a slice, then another size)
start = i
end = i+batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
if i%200 == 0:
train_writer.add_summary(_, i)
epoch_loss += c
i+=batch_size
print('Epoch', epoch, 'completed out of', n_epochs, 'loss:', epoch_loss)
with tf.name_scope('accuracy'):
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
tf.summary.scalar("accuracy", accuracy)
print('Accuracy:', accuracy.eval({x: test_x, y: test_y}))
saver = tf.train.Saver()
tf_log = 'tf.log'
saver.save(sess, "model3.ckpt")
return accuracy
This is how I am making predictions, but this fails everytime:
def test_neural_network(test_x):
batch_size = 100
i = 0
batch_x = np.array(test_x[i:i+batch_size])
tf.reset_default_graph()
x = tf.placeholder('float', [len(batch_x),len(test_x[0])])
y = tf.placeholder('float',[2])
prediction = make_model(x,batch_x)
# pred1 = tf.nn.softmax(logits=prediction)
# weight = tf.get_variable("weights_3", shape=[len(batch_x),2],initializer = tf.zeros_initializer)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, "model3.ckpt")
p = tf.argmax(prediction,1)
print (p.eval({x:batch_x))
gives and array of shape(batch_size,2),expected values [0,1] or [1,0] but getting decimal values.
You have a problem because you launch a session on your variable « weight ». But in your case you want to know the output of your network. Try to launch a session on your last layer 😉
How do I make predictions using my saved model? Which variable to
restore and how to pass the input (in batches or whole at once)?
Several comments regarding your design. You don't have to rebuild the graph at test time, because it's saved right next to the session checkpoint. Take a look at this question.
With this, your code will be simplified a lot, because you don't have to keep the placeholders and cross-entropy loss function separately. Add the name to the softmax layer like this:
with tf.name_scope('pred'):
pred = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y, name='softmax')
After you've restored the graph, you can find the target operation by:
graph = sess.graph
pred = graph.get_operation_by_name("pred/softmax")
If your test data is not big, you can freely feed all of it at once, but if it's significantly larger than your batch size, you can easily get out-of-memory. In this case, you should use mini-batches for testing as well.
As for your test accuracy, there can be plenty reasons for this, for instance, overfitting. Update the question with the full code, so that it could be reproduced.

Tensorflow NMT example outputs only the same word for inference

Trying to recreate the Tensorflow NMT example
https://github.com/tensorflow/nmt#bidirectional-rnns
with a baseline of just copying the original sentence instead of translating, but getting this weird bug for my inference part where the only outputs are the same words multiple times.
Using the TF attention wrapper and using Greedy Embedding Helper for inference.
Using TF 1.3 and python 3.6 if that helps
Screenshot of the bugged prediction
The weird thing is during training, the predictions are normal and the loss decreased to around 0.1
I have already checked the embeddings and they do change from each time step and I suspect that this has something to do with the decoding stage since it's the only part the really changes from training to inference.
tf.reset_default_graph()
sess = tf.InteractiveSession()
PAD = 0
EOS = 1
max_gradient_norm = 1
learning_rate = 0.02
num_layers = 1
total_epoch = 2
sentence_length = 19
vocab_size = 26236
input_embedding_size = 128
if mode == "training":
batch_size = 100
isReused = None
else:
batch_size = 1
isReused = True
with tf.name_scope("encoder"):
encoder_embeddings = tf.get_variable('encoder_embeddings', [vocab_size, input_embedding_size], tf.float32,
tf.random_uniform_initializer(-1.0, 1.0))
encoder_hidden_units = 128
encoder_inputs = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='encoder_inputs')
encoder_lengths = tf.placeholder(shape=batch_size, dtype=tf.int32, name='encoder_lengths')
encoder_cell = tf.contrib.rnn.BasicLSTMCell(encoder_hidden_units, state_is_tuple=True)
encoder_inputs_embedded = tf.nn.embedding_lookup(encoder_embeddings, encoder_inputs)
encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, dtype=tf.float32,
sequence_length=encoder_lengths, time_major=False)
with tf.variable_scope("decoder", isReused):
decoder_hidden_units = encoder_hidden_units
decoder_inputs = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='decoder_inputs')
decoder_targets = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='decoder_targets')
decoder_lengths = tf.placeholder(shape=batch_size, dtype=tf.int32, name="decoder_lengths")
decoder_embeddings = tf.get_variable('decoder_embeddings', [vocab_size, input_embedding_size], tf.float32,
tf.random_uniform_initializer(-1.0, 1.0))
decoder_inputs_embedded = tf.nn.embedding_lookup(decoder_embeddings, decoder_inputs)
decoder_cell = tf.contrib.rnn.BasicLSTMCell(decoder_hidden_units, state_is_tuple=True)
projection_layer = layers_core.Dense(vocab_size, use_bias=False)
attention_mechanism = tf.contrib.seq2seq.LuongAttention(encoder_hidden_units, encoder_outputs,
memory_sequence_length=encoder_lengths)
attn_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
attention_layer_size=encoder_hidden_units)
if mode == "training":
helper = tf.contrib.seq2seq.TrainingHelper(decoder_inputs_embedded, decoder_lengths, time_major=False)
maximum_iterations = None
else:
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(decoder_embeddings, tf.fill([batch_size], EOS), EOS)
maximum_iterations = tf.round(tf.reduce_max(encoder_lengths) * 2)
# Decoder
init_state = attn_decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)
decoder = tf.contrib.seq2seq.BasicDecoder(attn_decoder_cell, helper, init_state, output_layer=projection_layer)
# Dynamic decoding
decoder_outputs, decoder_final_state, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=False,
swap_memory=True,
maximum_iterations=maximum_iterations)
decoder_logits = decoder_outputs.rnn_output
decoder_prediction = decoder_outputs.sample_id
if mode == "training":
with tf.name_scope("cross_entropy"):
labels = tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32)
decoder_crossent = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=decoder_logits)
with tf.name_scope("loss"):
target_weights = tf.sequence_mask(decoder_lengths, maxlen=20, dtype=decoder_logits.dtype)
train_loss = tf.reduce_sum(decoder_crossent * target_weights) / (batch_size * 20)
tf.summary.scalar('loss', train_loss)
with tf.name_scope("clip_gradients"):
params = tf.trainable_variables()
gradients = tf.gradients(train_loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate)
update_step = optimizer.apply_gradients(zip(clipped_gradients, params))
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(os.getcwd() + '/train', sess.graph)
test_writer = tf.summary.FileWriter(os.getcwd() + '/test', )
tf.global_variables_initializer().run()
sess.run(tf.global_variables_initializer())

Tensorflow RNN stuck at high cost

The following RNN model decreases the loss for the first one or two epochs and then fluctuates around the cost of 6. This seems like the model is so random and not learning at all. I varied the learning rate from 0.1 to 0.0001 and it didn't help. The data is fed with an input pipeline, which worked fine with other models, so the functions that extract the label and images are not presented here. I have looked at this for so many times but still couldn't find what's wrong with it. Here's the code:
n_steps = 224
n_inputs = 224
learning_rate = 0.00015
batch_size = 256 # n_neurons
epochs = 100
num_batch = int(len(trainnames)/batch_size)
keep_prob = tf.placeholder(tf.float32)
# TRAIN QUEUE
train_queue = tf.RandomShuffleQueue(len(trainnames)*1.5, 0, [tf.string, tf.float32], shapes=[[],[num_labels,]])
enqueue_train = train_queue.enqueue_many([trainnames, train_label])
train_image, train_image_label = train_queue.dequeue()
train_image = read_image_file(train_image)
train_batch, train_label_batch = tf.train.batch(
[train_image, train_image_label],
batch_size=batch_size,
num_threads=1,
capacity=10*batch_size,
enqueue_many=False,
shapes=[[224,224], [num_labels,]],
allow_smaller_final_batch=True
)
train_close = train_queue.close()
def RNN(inputs, reuse):
with tf.variable_scope('cells', reuse=reuse):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size, reuse=reuse)
with tf.variable_scope('rnn'):
outputs, states = tf.nn.dynamic_rnn(basic_cell, inputs, dtype=tf.float32)
fc_drop = tf.nn.dropout(states, keep_prob)
logits = tf.contrib.layers.fully_connected(fc_drop, num_labels, activation_fn=None)
return logits
#Training
with tf.name_scope("cost_function") as scope:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch, reuse=None)))
train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost)
cost_summary = tf.summary.scalar("cost_function", cost)
file_writer = tf.summary.FileWriter(logdir)
#Session
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord, start=True)
step = 0
for epoch in range(epochs):
sess.run(enqueue_train)
for batch in range(num_batch):
if step % 100 == 0:
summary_str = cost_summary.eval(feed_dict={keep_prob: 1.0})
file_writer.add_summary(summary_str, step)
else:
sess.run(train_step, feed_dict={keep_prob: 0.5})
step += 1
sess.run(train_close)
coord.request_stop()
coord.join(threads)
file_writer.close()

Batch Training Accuracy is always multiple of 10%

So I am training a CNN and compute the training accuracy for each batch. Most of the it gives out 100% batch training accuracy. which I though was okay because I'm testing my model against the data I trained it with. But at some iterations, I get a 90% or 90% batch training accuracy. And worst, sometimes it goes down to 0% real quick and bounces back to 100% batch training accuracy. And I used the algorithm in https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/04_Save_Restore.ipynb and they also computed the batch training accuracy but they don't get the same results I get. They started out with around 80% batch training accuracy and observed a gradual increase until 98%. Why is this?
I was suspecting that my network is overfitting.
Here is my exact code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import pyfftw
from scipy import signal
import xlrd
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
import time
from datetime import timedelta
import math
import os
from sklearn.metrics import confusion_matrix
##matplotlib inline
plt.style.use('ggplot')
## define funtions
def read_data(file_path):
## column_names = ['user-id','activity','timestamp', 'x-axis', 'y-axis', 'z-axis']
column_names = ['activity','timestamp', 'Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz', 'Mx', 'My', 'Mz'] ## 3 sensors
data = pd.read_csv(file_path,header = None, names = column_names)
return data
def feature_normalize(dataset):
mu = np.mean(dataset,axis = 0)
sigma = np.std(dataset,axis = 0)
return (dataset - mu)/sigma
def plot_axis(ax, x, y, title):
ax.plot(x, y)
ax.set_title(title)
ax.xaxis.set_visible(False)
ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
ax.set_xlim([min(x), max(x)])
ax.grid(True)
def plot_activity(activity,data):
fig, (ax0, ax1, ax2) = plt.subplots(nrows = 3, figsize = (15, 10), sharex = True)
plot_axis(ax0, data['timestamp'], data['Ax'], 'x-axis')
plot_axis(ax1, data['timestamp'], data['Ay'], 'y-axis')
plot_axis(ax2, data['timestamp'], data['Az'], 'z-axis')
plt.subplots_adjust(hspace=0.2)
fig.suptitle(activity)
plt.subplots_adjust(top=0.90)
plt.show()
def windows(data, size):
start = 0
while start < data.count():
yield start, start + size
start += (size / 2)
def segment_signal(data, window_size = None, num_channels=None): # edited
segments = np.empty((0,window_size,num_channels)) #change from 3 to 9 channels for AGM fusion #use variable num_channels=9
labels = np.empty((0))
for (n_start, n_end) in windows(data['timestamp'], window_size):
## x = data["x-axis"][start:end]
## y = data["y-axis"][start:end]
## z = data["z-axis"][start:end]
n_start = int(n_start)
n_end = int(n_end)
Ax = data["Ax"][n_start:n_end]
Ay = data["Ay"][n_start:n_end]
Az = data["Az"][n_start:n_end]
Gx = data["Gx"][n_start:n_end]
Gy = data["Gy"][n_start:n_end]
Gz = data["Gz"][n_start:n_end]
Mx = data["Mx"][n_start:n_end]
My = data["My"][n_start:n_end]
Mz = data["Mz"][n_start:n_end]
if(len(dataset['timestamp'][n_start:n_end]) == window_size): # include only windows with size of 90
segments = np.vstack([segments,np.dstack([Ax,Ay,Az,Gx,Gy,Gz,Mx,My,Mz])])
labels = np.append(labels,stats.mode(data["activity"][n_start:n_end])[0][0])
return segments, labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.0, shape = shape)
return tf.Variable(initial)
def depthwise_conv2d(x, W):
return tf.nn.depthwise_conv2d(x,W, [1, 1, 1, 1], padding='VALID')
def apply_depthwise_conv(x,weights,biases):
return tf.nn.relu(tf.add(depthwise_conv2d(x, weights),biases))
def apply_max_pool(x,kernel_size,stride_size):
return tf.nn.max_pool(x, ksize=[1, 1, kernel_size, 1],
strides=[1, 1, stride_size, 1], padding='VALID')
#------------------------get dataset----------------------#
## run shoaib_dataset.py to generate dataset_shoaib_total.txt
## get data from dataset_shoaib_total.txt
dataset = read_data('dataset_shoaib_total.txt')
#--------------------preprocessing------------------------#
dataset['Ax'] = feature_normalize(dataset['Ax'])
dataset['Ay'] = feature_normalize(dataset['Ay'])
dataset['Az'] = feature_normalize(dataset['Az'])
dataset['Gx'] = feature_normalize(dataset['Gx'])
dataset['Gy'] = feature_normalize(dataset['Gy'])
dataset['Gz'] = feature_normalize(dataset['Gz'])
dataset['Mx'] = feature_normalize(dataset['Mx'])
dataset['My'] = feature_normalize(dataset['My'])
dataset['Mz'] = feature_normalize(dataset['Mz'])
###--------------------plot activity data----------------#
##for activity in np.unique(dataset["activity"]):
## subset = dataset[dataset["activity"] == activity][:180]
## plot_activity(activity,subset)
#------------------fixed hyperparameters--------------------#
window_size = 200 #from 90 #FIXED at 4 seconds
#----------------input hyperparameters------------------#
input_height = 1
input_width = window_size
num_labels = 6
num_channels = 9 #from 3 channels #9 channels for AGM
#-------------------sliding time window----------------#
segments, labels = segment_signal(dataset, window_size=window_size, num_channels=num_channels)
labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)
reshaped_segments = segments.reshape(len(segments), (window_size*num_channels)) #use variable num_channels instead of constant 3 channels
#------------divide data into test and training set-----------#
train_test_split = np.random.rand(len(reshaped_segments)) < 0.80
train_x_init = reshaped_segments[train_test_split]
train_y_init = labels[train_test_split]
test_x = reshaped_segments[~train_test_split]
test_y = labels[~train_test_split]
train_validation_split = np.random.rand(len(train_x_init)) < 0.80
train_x = train_x_init[train_validation_split]
train_y = train_y_init[train_validation_split]
validation_x = train_x_init[~train_validation_split]
validation_y = train_y_init[~train_validation_split]
#---------------training hyperparameters----------------#
batch_size = 10
kernel_size = 60 #from 60 #optimal 2
depth = 15 #from 60 #optimal 15
num_hidden = 1000 #from 1000 #optimal 80
learning_rate = 0.0001
training_epochs = 8
total_batches = train_x.shape[0] ##// batch_size
#---------define placeholders for input----------#
X = tf.placeholder(tf.float32, shape=[None,input_width * num_channels], name="input")
X_reshaped = tf.reshape(X,[-1,input_height,input_width,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])
#---------------------perform convolution-----------------#
# first convolutional layer
c_weights = weight_variable([1, kernel_size, num_channels, depth])
c_biases = bias_variable([depth * num_channels])
c = apply_depthwise_conv(X_reshaped,c_weights,c_biases)
p = apply_max_pool(c,20,2)
# second convolutional layer
c2_weights = weight_variable([1, 6,depth*num_channels,depth//10])
c2_biases = bias_variable([(depth*num_channels)*(depth//10)])
c = apply_depthwise_conv(p,c2_weights,c2_biases)
#--------------flatten data for fully connected layers----------#
shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])
#------------fully connected layers----------------#
f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))
#----------------------dropout------------------#
keep_prob = tf.placeholder(tf.float32)
drop_layer = tf.nn.dropout(f, keep_prob)
#----------------------softmax layer----------------#
out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.add(tf.matmul(drop_layer, out_weights),out_biases), name="y_")
#-----------------loss optimization-------------#
loss = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)
#-----------------compute accuracy---------------#
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
cost_history = np.empty(shape=[1],dtype=float)
saver = tf.train.Saver()
session = tf.Session()
session.run(tf.global_variables_initializer())
#-------------early stopping-----------------#
# Best validation accuracy seen so far.
best_validation_accuracy = 0.0
# Iteration-number for last improvement to validation accuracy.
last_improvement = 0
# Stop optimization if no improvement found in this many iterations.
require_improvement = 1000
# Counter for total number of iterations performed so far.
total_iterations = 0
def validation_accuracy():
return session.run(accuracy, feed_dict={X: validation_x, Y: validation_y, keep_prob: 1.0})
def next_batch(b, batch_size, train_x, train_y):
##for b in range(total_batches):
offset = (b * batch_size) % (train_y.shape[0] - batch_size)
batch_x = train_x[offset:(offset + batch_size), :]
batch_y = train_y[offset:(offset + batch_size), :]
return batch_x, batch_y
def optimize(num_iterations):
# Ensure we update the global variables rather than local copies.
global total_iterations
global best_validation_accuracy
global last_improvement
# Start-time used for printing time-usage below.
start_time = time.time()
for i in range(num_iterations):
# Increase the total number of iterations performed.
# It is easier to update it in each iteration because
# we need this number several times in the following.
total_iterations += 1
# Get a batch of training examples.
# x_batch now holds a batch of images and
# y_true_batch are the true labels for those images.
##x_batch, y_true_batch = data.train.next_batch(train_batch_size)
x_batch, y_true_batch = next_batch(i, batch_size, train_x, train_y)
# Put the batch into a dict with the proper names
# for placeholder variables in the TensorFlow graph.
feed_dict_train = {X: x_batch,
Y: y_true_batch, keep_prob: 0.5}
# Run the optimizer using this batch of training data.
# TensorFlow assigns the variables in feed_dict_train
# to the placeholder variables and then runs the optimizer.
session.run(optimizer, feed_dict=feed_dict_train)
# Print status every 100 iterations and after last iteration.
if (total_iterations % 100 == 0) or (i == (num_iterations - 1)):
# Calculate the accuracy on the training-batch.
acc_train = session.run(accuracy, feed_dict={X: x_batch,
Y: y_true_batch, keep_prob: 1.0})
# Calculate the accuracy on the validation-set.
# The function returns 2 values but we only need the first.
##acc_validation, _ = validation_accuracy()
acc_validation = validation_accuracy()
# If validation accuracy is an improvement over best-known.
if acc_validation > best_validation_accuracy:
# Update the best-known validation accuracy.
best_validation_accuracy = acc_validation
# Set the iteration for the last improvement to current.
last_improvement = total_iterations
# Save all variables of the TensorFlow graph to file.
saver.save(sess=session, save_path="../shoaib-har_agm_es.ckpt")
# A string to be printed below, shows improvement found.
improved_str = '*'
else:
# An empty string to be printed below.
# Shows that no improvement was found.
improved_str = ''
# Status-message for printing.
msg = "Iter: {0:>6}, Train-Batch Accuracy: {1:>6.1%}, Validation Acc: {2:>6.1%} {3}"
# Print it.
print(msg.format(i + 1, acc_train, acc_validation, improved_str))
# If no improvement found in the required number of iterations.
if total_iterations - last_improvement > require_improvement:
print("No improvement found in a while, stopping optimization.")
# Break out from the for-loop.
break
# Ending time.
end_time = time.time()
# Difference between start and end-times.
time_dif = end_time - start_time
# Print the time-usage.
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
optimize(10000)
With the output:
What exactly is training accuracy? Is it even computed? Or do you compute the training accuracy on the entire training data and not just the batch you trained your network with?
Here I printed the results such that it prints out the batch training accuracy and the training accuracy on the entire dataset set for every multiples of 20 iterations.
The data is divided to 3 sets: train, validation and test.
Batch training accuracy is computed on the train set (the difference between the label and the prediction).
Validation accuracy is the accuracy on the validation set.
The batch accuracy can be computed just after a forward pass in the network. The number of samples in one forward pass is the batch size. It is just a way to train models faster (mini-batch gradient descent)
Overfitting is when the model works really good on known data (training set) but performs poorly on new data.
As to the 10% multiples, it is just the printing format you are using.

DNN With embedded layer returning sin wave cost/accuracy

I am a total newbie to tensor flow and machine learning, but trying to model a DNN with an embedded layer infront of it. For some reason I keep getting a sin wave of cost results as well as accuracy. I imagine there is something wrong with my code, so here goes:
This is my model and training routines:
def neural_network_model(x):
W = tf.Variable(
tf.truncated_normal([vocab_size, embedding_size], stddev=1 / math.sqrt(vocab_size)),
name="W")
embedded = tf.nn.embedding_lookup(W, x)
embedding_aggregated = tf.reduce_sum(embedded, [1])
hidden_1_layer = {
'weights': tf.Variable(tf.random_normal([embedding_size, n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden_2_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden_3_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
output = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.matmul(embedding_aggregated, hidden_1_layer['weights']) + hidden_1_layer['biases']
l1 = tf.nn.relu(l1)
l2 = tf.matmul(l1, hidden_2_layer['weights']) + hidden_2_layer['biases']
l2 = tf.nn.relu(l2)
l3 = tf.matmul(l2, hidden_3_layer['weights']) + hidden_3_layer['biases']
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output['weights']) + output['biases']
return output
def train_neural_network(x_batch, y_batch, test_x, test_y):
global_step = tf.Variable(0, trainable=False, name='global_step')
logits = neural_network_model(x_batch)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_batch))
tf.scalar_summary('cost', cost)
optimizer = tf.train.AdagradOptimizer(0.01).minimize(cost, global_step = global_step)
test_logits = neural_network_model(test_x)
prediction = tf.nn.softmax(test_logits)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(test_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
tf.scalar_summary('accuracy', accuracy)
merged = tf.merge_all_summaries()
saver = tf.train.Saver()
model_dir = "model_embedding"
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
with tf.Session() as sess:
train_writer = tf.train.SummaryWriter(model_dir + "/eval", sess.graph)
if (latest_checkpoint != None):
print("Restoring: ", latest_checkpoint)
saver.restore(sess, latest_checkpoint)
else:
print("Nothing to restore")
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
epoch = 1
while not coord.should_stop():
epoch_loss = 0
_, c, summary = sess.run([optimizer, cost, merged])
# embd = sess.run(emb)
# for idx in range(xb.size):
# print(xb[idx])
# print(yb[idx])
train_writer.add_summary(summary, global_step = global_step.eval())
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
print("Global step: ", global_step.eval())
print('Accuracy:',accuracy.eval())
#saver.save(sess, model_dir+'/model.ckpt', global_step=global_step) # default to last 5 checkpoint saves
epoch += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
My data is a bunch of word integer IDs padded to a size of 2056 uniformly with the padding token being added at the end so a lot of my tensors have a bunch of vocab_size integer value at the end, in order to pad up to 2056.
Is there something glaringly obvious about my code thats wrong?
For whoever runs into the same issue:
My error was reusing the neural_network_model() function and thus creating a new set of variables. The answer lies in reading how to share variables, and TF has a good page describing that at Sharing Variables

Resources