Tensorflow save model: GraphDef cannot be larger than 2GB - machine-learning

I'm getting the following error -- apparently at the time of saving my model
Step = 1799 | Tensorflow Accuracy = 1.0
Step = 1799 | My Accuracy = 0.0363355780022
Step = 1800 | Tensorflow Accuracy = 1.0
Step = 1800 | My Accuracy = 0.0364694929089
Traceback (most recent call last):
File "CNN-LSTM-seg-reg-sigmoid.py", line 290, in <module>
saver.save(sess, save_path)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1085, in save
self.export_meta_graph(meta_graph_filename)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1103, in export_meta_graph
add_shapes=True),
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2175, in as_graph_def
result, _ = self._as_graph_def(from_version, add_shapes)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2138, in _as_graph_def
raise ValueError("GraphDef cannot be larger than 2GB.")
ValueError: GraphDef cannot be larger than 2GB.
Here suggested to look out for tf.constants, but I have zero constants in my program. However, my weights and biases are like the following: tf.Variable(tf.random_normal([32]),name="bc1"). Could this be an issue?
If not that, than this tells me that somewhere I am adding to the graph after every loop iteration, but I'm unsure where it is occuring.
My first guess is when I make predictions. I make predictions via the
following code...
# Make prediction
im = Image.open('/home/volcart/Documents/Data/input_crops/temp data0001.tif')
batch_x = np.array(im)
batch_x = batch_x.reshape((1, n_input_x, n_input_y))
batch_x = batch_x.astype(float)
prediction = sess.run(pred, feed_dict={x: batch_x})
prediction = tf.sigmoid(prediction.reshape((n_input_x * n_input_y, n_classes)))
prediction = prediction.eval().reshape((n_input_x, n_input_y, n_classes))
My second guess is when I calculate loss and accuracy via the following: loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
My entire session code looks like the following:
# Initializing the variables
init = tf.initialize_all_variables()
saver = tf.train.Saver()
gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# Launch the graph
with tf.Session(config=config) as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph) #initialize graph for tensorboard
step = 1
# Import data
data = scroll_data.read_data('/home/volcart/Documents/Data/')
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = data.train.next_batch(batch_size)
# Run optimization op (backprop)
batch_x = batch_x.reshape((batch_size, n_input_x, n_input_y))
batch_y = batch_y.reshape((batch_size, n_input_x, n_input_y))
batch_y = convert_to_2_channel(batch_y, batch_size)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
step = step + 1
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
# Make prediction
im = Image.open('/home/volcart/Documents/Data/input_crops/temp data0001.tif')
batch_x = np.array(im)
batch_x = batch_x.reshape((1, n_input_x, n_input_y))
batch_x = batch_x.astype(float)
prediction = sess.run(pred, feed_dict={x: batch_x})
prediction = tf.sigmoid(prediction.reshape((n_input_x * n_input_y, n_classes)))
prediction = prediction.eval().reshape((n_input_x, n_input_y, n_classes))
# Temp arrays are to splice the prediction n_input_x x n_input_y x 2
# into 2 matrices n_input_x x n_input_y
temp_arr1 = np.empty((n_input_x, n_input_y))
for i in xrange(n_input_x):
for j in xrange(n_input_x):
for k in xrange(n_classes):
if k == 0:
temp_arr1[i][j] = 1 - prediction[i][j][k]
my_acc = accuracy_custom(temp_arr1,batch_y[0,:,:,0])
print "Step = " + str(step) + " | Tensorflow Accuracy = " + str(acc)
print "Step = " + str(step) + " | My Accuracy = " + str(my_acc)
if step % 100 == 0:
save_path = "/home/volcart/Documents/CNN-LSTM-reg-model/CNN-LSTM-seg-step-" + str(step) + "-model.ckpt"
saver.save(sess, save_path)
csv_file = "/home/volcart/Documents/CNN-LSTM-reg/CNNLSTMreg-step-" + str(step) + "-accuracy-" + str(my_acc) + ".csv"
np.savetxt(csv_file, temp_arr1, delimiter=",")

You are growing your graph at this line:
prediction = tf.sigmoid(prediction.reshape((n_input_x * n_input_y, n_classes)))
This converts your prediction numpy array to TensorFlow constant node, inlines it into the Graph, and adds Sigmoid node on top of that.
You can catch problems like this by adding tf.get_default_graph().finalize() before starting your training loop

You can rewrite the below line of your code utilizing the tf.placeholder:
prediction = tf.sigmoid(prediction.reshape((n_input_x * n_input_y, n_classes)))
this will solve the issue.

Related

Out of core learning for multi-label text classification problem

I am working on a multi-label text classification problem (Total target labels 90). The data distribution has a long tail and class imbalance. I am working with a sample of 100k records using the OVR strategy (One Versus Rest). Since the dataset is huge, I am trying out the partial_fit method. I came to know that there were some issues previously and similar question was asked back in 2017. I tried the partial_fit and found the same issue still exist or maybe I am not doing it correctly.
Scikit-learn version : 0.22.2.post1
Code
def stream_documents(data=None):
"""Iterate over documents of the dataset.
Documents are represented as dictionaries
"""
for index,row in data.iterrows():
tmp_dict = dict()
tmp_dict['text'] = row[TEXT_FEAT]
tmp_dict['target'] = row[TARGET_LABEL]
yield tmp_dict
def get_minibatch(doc_iter, size, mlb):
"""Extract a minibatch of examples, return a tuple X_text, y.
Note: size is before excluding invalid docs with no topics assigned.
"""
data = [(doc['text'], doc['target'])
for doc in itertools.islice(doc_iter, size)]
if not len(data):
return np.asarray([], dtype=int), np.asarray([], dtype=int)
X_text, y = zip(*data)
y = pd.Series(data=y)
y_encoded = mlb.transform(y.str.split(','))
# print("Y SHAPE : ",np.asarray(y_encoded,dtype=int).shape)
return X_text, np.asarray(y_encoded,dtype=int)
def iter_minibatches(doc_iter, minibatch_size):
"""Generator of minibatches."""
X_text, y = get_minibatch(doc_iter, minibatch_size, mlb)
while len(X_text):
yield X_text, y
X_text, y = get_minibatch(doc_iter, minibatch_size, mlb)
def progress(cls_name, stats):
"""Report progress information, return a string."""
duration = time.time() - stats['t0']
s = "%20s classifier : \t" % cls_name
s += "%(n_train)6d train docs " % stats
s += "%(n_test)6d test docs " % test_stats
s += "Acc: %(accuracy).3f " % stats
s += "f1: %(f1).3f " % stats
s += "P: %(p).3f " % stats
s += "in %.2fs (%5d docs/s)" % (duration, stats['n_train'] / duration)
return s
vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18)
data_stream = stream_documents(data=df_sample_xs) # X, y
partial_fit_classifiers = {
'SGD': OneVsRestClassifier(SGDClassifier(max_iter=1000, tol=1e-3)),
'Logistic':OneVsRestClassifier(LogisticRegression(solver='lbfgs',max_iter=500))
}
# test data statistics
test_stats = {'n_test': 0}
# First we hold out a number of examples to estimate accuracy
n_test_documents = 1000
tick = time.time()
X_test_text, y_test = get_minibatch(data_stream, 1000, mlb)
parsing_time = time.time() - tick
tick = time.time()
X_test = vectorizer.transform(X_test_text)
vectorizing_time = time.time() - tick
test_stats['n_test'] += len(y_test)
print("Test set is %d documents" % (len(y_test)))
cls_stats = {}
for cls_name in partial_fit_classifiers:
stats = {'n_train': 0, 'n_train_pos': 0,
'accuracy': 0.0,
'accuracy_history': [(0, 0)],
'f1': 0.0,
'f1_history': [(0,0)],
'p': 0.0,
'p_history': [(0,0)],
't0': time.time(),
'runtime_history': [(0, 0)],
'total_fit_time': 0.0}
cls_stats[cls_name] = stats
get_minibatch(data_stream, n_test_documents, mlb)
minibatch_size = 2000
minibatch_iterators = iter_minibatches(data_stream, minibatch_size)
total_vect_time = 0.0
# Main loop : iterate on mini-batchs of examples
for i, (X_train_text, y_train) in enumerate(minibatch_iterators):
tick = time.time()
X_train = vectorizer.transform(X_train_text)
total_vect_time += time.time() - tick
# print(X_train.shape,y_train.shape)
for cls_name, cls in partial_fit_classifiers.items():
tick = time.time()
print(cls_name)
# update estimator with examples in the current mini-batch
# cls.partial_fit(X_train, y_train, classes=all_classes)
cls.partial_fit(X_train, y_train, classes=mlb.transform(df_sample_xs[TARGET_LABEL].str.split(',')))
# accumulate test accuracy stats
cls_stats[cls_name]['total_fit_time'] += time.time() - tick
cls_stats[cls_name]['n_train'] += X_train.shape[0]
cls_stats[cls_name]['n_train_pos'] += sum(y_train)
tick = time.time()
cls_stats[cls_name]['accuracy'] = cls.score(X_test, y_test)
cls_stats[cls_name]['f1'] = f1_score(y_test, cls.predict(X_test))
cls_stats[cls_name]['p'] = precision_score(y_test, cls.predict(X_test))
cls_stats[cls_name]['prediction_time'] = time.time() - tick
acc_history = (cls_stats[cls_name]['accuracy'],cls_stats[cls_name]['n_train'])
cls_stats[cls_name]['accuracy_history'].append(acc_history)
f1_history = (cls_stats[cls_name]['f1'],cls_stats[cls_name]['n_train'])
cls_stats[cls_name]['f1_history'].append(f1_history)
p_history = (cls_stats[cls_name]['p'],cls_stats[cls_name]['n_train'])
cls_stats[cls_name]['p_history'].append(p_history)
run_history = (cls_stats[cls_name]['accuracy'],
cls_stats[cls_name]['f1'],
cls_stats[cls_name]['p'],
total_vect_time + cls_stats[cls_name]['total_fit_time'])
cls_stats[cls_name]['runtime_history'].append(run_history)
if i % 3 == 0:
print(progress(cls_name, cls_stats[cls_name]))
if i % 3 == 0:
print('\n')
Error
SGD
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-87-cf38c633c6aa> in <module>
31 # update estimator with examples in the current mini-batch
32 # cls.partial_fit(X_train, y_train, classes=all_classes)
---> 33 cls.partial_fit(X_train, y_train, classes=mlb.transform(df_sample_xs[TARGET_LABEL].str.split(',')))
34 # accumulate test accuracy stats
35 cls_stats[cls_name]['total_fit_time'] += time.time() - tick
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/multiclass.py in partial_fit(self, X, y, classes)
287 self.classes_))
288
--> 289 Y = self.label_binarizer_.transform(y)
290 Y = Y.tocsc()
291 columns = (col.toarray().ravel() for col in Y.T)
/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/preprocessing/_label.py in transform(self, y)
478 y_is_multilabel = type_of_target(y).startswith('multilabel')
479 if y_is_multilabel and not self.y_type_.startswith('multilabel'):
--> 480 raise ValueError("The object was not fitted with multilabel"
481 " input.")
482
ValueError: The object was not fitted with multilabel input.

Predicting probabilities in classfier tensorflow

Hey i am pretty new to tensorflow. I am building a classification model basically classifying into 0/1. Is there a way to predict probability of output being 1. Can predict_proba be used over here? Its been widely used in tflearn.dnn but can't find any reference to do it in my case.
def main():
train_x,test_x,train_y,test_y = load_csv_data()
x_size = train_x.shape[1]
y_size = train_y.shape[1]
print(x_size)
print(y_size)
# variables
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
weights_1 = initialize_weights((x_size, h_size))
weights_2 = initialize_weights((h_size, y_size))
# Forward propagation
y_pred = forward_propagation(X, weights_1, weights_2)
predict = tf.argmax(y_pred, dimension=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred))
updates_sgd = tf.train.GradientDescentOptimizer(sgd_step).minimize(cost)
# Start tensorflow session
with tf.Session() as sess:
init = tf.global_variables_initializer()
steps = 1
sess.run(init)
x = np.arange(steps)
test_acc = []
train_acc = []
print("Step, train accuracy, test accuracy")
for step in range(steps):
# Train with each example
batch_size = len(train_x)
avg_cost = 0
print(batch_size)
for i in range(len(train_x)):
_, c = sess.run([updates_sgd,cost], feed_dict={X: train_x[i: i + 1], y: train_y[i: i + 1]})
print(c)
avg_cost += c/batch_size
train_accuracy = np.mean(np.argmax(train_y, axis=1) ==
sess.run(predict, feed_dict={X: train_x, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==
sess.run(predict, feed_dict={X: test_x, y: test_y}))
print(avg_cost)
print("%d, %.2f%%, %.2f%%"
% (step + 1, 100. * train_accuracy, 100. * test_accuracy))
test_acc.append(100. * test_accuracy)
train_acc.append(100. * train_accuracy)
predict = tf.argmax(y_pred,1)
test_data = load_test_data( )
print(test_data)
pred = predict.eval(feed_dict={X:test_data})
print(pred)
for x in range(0,100):
print(pred[x])
print(np.unique(pred))
main()
Here you take argmax of probabilities:
predict = tf.argmax(y_pred, dimension=1)
If you return simply "y_pred" you should get probabilities.

Tensorflow RNN stuck at high cost

The following RNN model decreases the loss for the first one or two epochs and then fluctuates around the cost of 6. This seems like the model is so random and not learning at all. I varied the learning rate from 0.1 to 0.0001 and it didn't help. The data is fed with an input pipeline, which worked fine with other models, so the functions that extract the label and images are not presented here. I have looked at this for so many times but still couldn't find what's wrong with it. Here's the code:
n_steps = 224
n_inputs = 224
learning_rate = 0.00015
batch_size = 256 # n_neurons
epochs = 100
num_batch = int(len(trainnames)/batch_size)
keep_prob = tf.placeholder(tf.float32)
# TRAIN QUEUE
train_queue = tf.RandomShuffleQueue(len(trainnames)*1.5, 0, [tf.string, tf.float32], shapes=[[],[num_labels,]])
enqueue_train = train_queue.enqueue_many([trainnames, train_label])
train_image, train_image_label = train_queue.dequeue()
train_image = read_image_file(train_image)
train_batch, train_label_batch = tf.train.batch(
[train_image, train_image_label],
batch_size=batch_size,
num_threads=1,
capacity=10*batch_size,
enqueue_many=False,
shapes=[[224,224], [num_labels,]],
allow_smaller_final_batch=True
)
train_close = train_queue.close()
def RNN(inputs, reuse):
with tf.variable_scope('cells', reuse=reuse):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size, reuse=reuse)
with tf.variable_scope('rnn'):
outputs, states = tf.nn.dynamic_rnn(basic_cell, inputs, dtype=tf.float32)
fc_drop = tf.nn.dropout(states, keep_prob)
logits = tf.contrib.layers.fully_connected(fc_drop, num_labels, activation_fn=None)
return logits
#Training
with tf.name_scope("cost_function") as scope:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch, reuse=None)))
train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost)
cost_summary = tf.summary.scalar("cost_function", cost)
file_writer = tf.summary.FileWriter(logdir)
#Session
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord, start=True)
step = 0
for epoch in range(epochs):
sess.run(enqueue_train)
for batch in range(num_batch):
if step % 100 == 0:
summary_str = cost_summary.eval(feed_dict={keep_prob: 1.0})
file_writer.add_summary(summary_str, step)
else:
sess.run(train_step, feed_dict={keep_prob: 0.5})
step += 1
sess.run(train_close)
coord.request_stop()
coord.join(threads)
file_writer.close()

Batch Training Accuracy is always multiple of 10%

So I am training a CNN and compute the training accuracy for each batch. Most of the it gives out 100% batch training accuracy. which I though was okay because I'm testing my model against the data I trained it with. But at some iterations, I get a 90% or 90% batch training accuracy. And worst, sometimes it goes down to 0% real quick and bounces back to 100% batch training accuracy. And I used the algorithm in https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/04_Save_Restore.ipynb and they also computed the batch training accuracy but they don't get the same results I get. They started out with around 80% batch training accuracy and observed a gradual increase until 98%. Why is this?
I was suspecting that my network is overfitting.
Here is my exact code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import pyfftw
from scipy import signal
import xlrd
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
import time
from datetime import timedelta
import math
import os
from sklearn.metrics import confusion_matrix
##matplotlib inline
plt.style.use('ggplot')
## define funtions
def read_data(file_path):
## column_names = ['user-id','activity','timestamp', 'x-axis', 'y-axis', 'z-axis']
column_names = ['activity','timestamp', 'Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz', 'Mx', 'My', 'Mz'] ## 3 sensors
data = pd.read_csv(file_path,header = None, names = column_names)
return data
def feature_normalize(dataset):
mu = np.mean(dataset,axis = 0)
sigma = np.std(dataset,axis = 0)
return (dataset - mu)/sigma
def plot_axis(ax, x, y, title):
ax.plot(x, y)
ax.set_title(title)
ax.xaxis.set_visible(False)
ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
ax.set_xlim([min(x), max(x)])
ax.grid(True)
def plot_activity(activity,data):
fig, (ax0, ax1, ax2) = plt.subplots(nrows = 3, figsize = (15, 10), sharex = True)
plot_axis(ax0, data['timestamp'], data['Ax'], 'x-axis')
plot_axis(ax1, data['timestamp'], data['Ay'], 'y-axis')
plot_axis(ax2, data['timestamp'], data['Az'], 'z-axis')
plt.subplots_adjust(hspace=0.2)
fig.suptitle(activity)
plt.subplots_adjust(top=0.90)
plt.show()
def windows(data, size):
start = 0
while start < data.count():
yield start, start + size
start += (size / 2)
def segment_signal(data, window_size = None, num_channels=None): # edited
segments = np.empty((0,window_size,num_channels)) #change from 3 to 9 channels for AGM fusion #use variable num_channels=9
labels = np.empty((0))
for (n_start, n_end) in windows(data['timestamp'], window_size):
## x = data["x-axis"][start:end]
## y = data["y-axis"][start:end]
## z = data["z-axis"][start:end]
n_start = int(n_start)
n_end = int(n_end)
Ax = data["Ax"][n_start:n_end]
Ay = data["Ay"][n_start:n_end]
Az = data["Az"][n_start:n_end]
Gx = data["Gx"][n_start:n_end]
Gy = data["Gy"][n_start:n_end]
Gz = data["Gz"][n_start:n_end]
Mx = data["Mx"][n_start:n_end]
My = data["My"][n_start:n_end]
Mz = data["Mz"][n_start:n_end]
if(len(dataset['timestamp'][n_start:n_end]) == window_size): # include only windows with size of 90
segments = np.vstack([segments,np.dstack([Ax,Ay,Az,Gx,Gy,Gz,Mx,My,Mz])])
labels = np.append(labels,stats.mode(data["activity"][n_start:n_end])[0][0])
return segments, labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.0, shape = shape)
return tf.Variable(initial)
def depthwise_conv2d(x, W):
return tf.nn.depthwise_conv2d(x,W, [1, 1, 1, 1], padding='VALID')
def apply_depthwise_conv(x,weights,biases):
return tf.nn.relu(tf.add(depthwise_conv2d(x, weights),biases))
def apply_max_pool(x,kernel_size,stride_size):
return tf.nn.max_pool(x, ksize=[1, 1, kernel_size, 1],
strides=[1, 1, stride_size, 1], padding='VALID')
#------------------------get dataset----------------------#
## run shoaib_dataset.py to generate dataset_shoaib_total.txt
## get data from dataset_shoaib_total.txt
dataset = read_data('dataset_shoaib_total.txt')
#--------------------preprocessing------------------------#
dataset['Ax'] = feature_normalize(dataset['Ax'])
dataset['Ay'] = feature_normalize(dataset['Ay'])
dataset['Az'] = feature_normalize(dataset['Az'])
dataset['Gx'] = feature_normalize(dataset['Gx'])
dataset['Gy'] = feature_normalize(dataset['Gy'])
dataset['Gz'] = feature_normalize(dataset['Gz'])
dataset['Mx'] = feature_normalize(dataset['Mx'])
dataset['My'] = feature_normalize(dataset['My'])
dataset['Mz'] = feature_normalize(dataset['Mz'])
###--------------------plot activity data----------------#
##for activity in np.unique(dataset["activity"]):
## subset = dataset[dataset["activity"] == activity][:180]
## plot_activity(activity,subset)
#------------------fixed hyperparameters--------------------#
window_size = 200 #from 90 #FIXED at 4 seconds
#----------------input hyperparameters------------------#
input_height = 1
input_width = window_size
num_labels = 6
num_channels = 9 #from 3 channels #9 channels for AGM
#-------------------sliding time window----------------#
segments, labels = segment_signal(dataset, window_size=window_size, num_channels=num_channels)
labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)
reshaped_segments = segments.reshape(len(segments), (window_size*num_channels)) #use variable num_channels instead of constant 3 channels
#------------divide data into test and training set-----------#
train_test_split = np.random.rand(len(reshaped_segments)) < 0.80
train_x_init = reshaped_segments[train_test_split]
train_y_init = labels[train_test_split]
test_x = reshaped_segments[~train_test_split]
test_y = labels[~train_test_split]
train_validation_split = np.random.rand(len(train_x_init)) < 0.80
train_x = train_x_init[train_validation_split]
train_y = train_y_init[train_validation_split]
validation_x = train_x_init[~train_validation_split]
validation_y = train_y_init[~train_validation_split]
#---------------training hyperparameters----------------#
batch_size = 10
kernel_size = 60 #from 60 #optimal 2
depth = 15 #from 60 #optimal 15
num_hidden = 1000 #from 1000 #optimal 80
learning_rate = 0.0001
training_epochs = 8
total_batches = train_x.shape[0] ##// batch_size
#---------define placeholders for input----------#
X = tf.placeholder(tf.float32, shape=[None,input_width * num_channels], name="input")
X_reshaped = tf.reshape(X,[-1,input_height,input_width,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])
#---------------------perform convolution-----------------#
# first convolutional layer
c_weights = weight_variable([1, kernel_size, num_channels, depth])
c_biases = bias_variable([depth * num_channels])
c = apply_depthwise_conv(X_reshaped,c_weights,c_biases)
p = apply_max_pool(c,20,2)
# second convolutional layer
c2_weights = weight_variable([1, 6,depth*num_channels,depth//10])
c2_biases = bias_variable([(depth*num_channels)*(depth//10)])
c = apply_depthwise_conv(p,c2_weights,c2_biases)
#--------------flatten data for fully connected layers----------#
shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])
#------------fully connected layers----------------#
f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))
#----------------------dropout------------------#
keep_prob = tf.placeholder(tf.float32)
drop_layer = tf.nn.dropout(f, keep_prob)
#----------------------softmax layer----------------#
out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.add(tf.matmul(drop_layer, out_weights),out_biases), name="y_")
#-----------------loss optimization-------------#
loss = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)
#-----------------compute accuracy---------------#
correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
cost_history = np.empty(shape=[1],dtype=float)
saver = tf.train.Saver()
session = tf.Session()
session.run(tf.global_variables_initializer())
#-------------early stopping-----------------#
# Best validation accuracy seen so far.
best_validation_accuracy = 0.0
# Iteration-number for last improvement to validation accuracy.
last_improvement = 0
# Stop optimization if no improvement found in this many iterations.
require_improvement = 1000
# Counter for total number of iterations performed so far.
total_iterations = 0
def validation_accuracy():
return session.run(accuracy, feed_dict={X: validation_x, Y: validation_y, keep_prob: 1.0})
def next_batch(b, batch_size, train_x, train_y):
##for b in range(total_batches):
offset = (b * batch_size) % (train_y.shape[0] - batch_size)
batch_x = train_x[offset:(offset + batch_size), :]
batch_y = train_y[offset:(offset + batch_size), :]
return batch_x, batch_y
def optimize(num_iterations):
# Ensure we update the global variables rather than local copies.
global total_iterations
global best_validation_accuracy
global last_improvement
# Start-time used for printing time-usage below.
start_time = time.time()
for i in range(num_iterations):
# Increase the total number of iterations performed.
# It is easier to update it in each iteration because
# we need this number several times in the following.
total_iterations += 1
# Get a batch of training examples.
# x_batch now holds a batch of images and
# y_true_batch are the true labels for those images.
##x_batch, y_true_batch = data.train.next_batch(train_batch_size)
x_batch, y_true_batch = next_batch(i, batch_size, train_x, train_y)
# Put the batch into a dict with the proper names
# for placeholder variables in the TensorFlow graph.
feed_dict_train = {X: x_batch,
Y: y_true_batch, keep_prob: 0.5}
# Run the optimizer using this batch of training data.
# TensorFlow assigns the variables in feed_dict_train
# to the placeholder variables and then runs the optimizer.
session.run(optimizer, feed_dict=feed_dict_train)
# Print status every 100 iterations and after last iteration.
if (total_iterations % 100 == 0) or (i == (num_iterations - 1)):
# Calculate the accuracy on the training-batch.
acc_train = session.run(accuracy, feed_dict={X: x_batch,
Y: y_true_batch, keep_prob: 1.0})
# Calculate the accuracy on the validation-set.
# The function returns 2 values but we only need the first.
##acc_validation, _ = validation_accuracy()
acc_validation = validation_accuracy()
# If validation accuracy is an improvement over best-known.
if acc_validation > best_validation_accuracy:
# Update the best-known validation accuracy.
best_validation_accuracy = acc_validation
# Set the iteration for the last improvement to current.
last_improvement = total_iterations
# Save all variables of the TensorFlow graph to file.
saver.save(sess=session, save_path="../shoaib-har_agm_es.ckpt")
# A string to be printed below, shows improvement found.
improved_str = '*'
else:
# An empty string to be printed below.
# Shows that no improvement was found.
improved_str = ''
# Status-message for printing.
msg = "Iter: {0:>6}, Train-Batch Accuracy: {1:>6.1%}, Validation Acc: {2:>6.1%} {3}"
# Print it.
print(msg.format(i + 1, acc_train, acc_validation, improved_str))
# If no improvement found in the required number of iterations.
if total_iterations - last_improvement > require_improvement:
print("No improvement found in a while, stopping optimization.")
# Break out from the for-loop.
break
# Ending time.
end_time = time.time()
# Difference between start and end-times.
time_dif = end_time - start_time
# Print the time-usage.
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
optimize(10000)
With the output:
What exactly is training accuracy? Is it even computed? Or do you compute the training accuracy on the entire training data and not just the batch you trained your network with?
Here I printed the results such that it prints out the batch training accuracy and the training accuracy on the entire dataset set for every multiples of 20 iterations.
The data is divided to 3 sets: train, validation and test.
Batch training accuracy is computed on the train set (the difference between the label and the prediction).
Validation accuracy is the accuracy on the validation set.
The batch accuracy can be computed just after a forward pass in the network. The number of samples in one forward pass is the batch size. It is just a way to train models faster (mini-batch gradient descent)
Overfitting is when the model works really good on known data (training set) but performs poorly on new data.
As to the 10% multiples, it is just the printing format you are using.

DNN With embedded layer returning sin wave cost/accuracy

I am a total newbie to tensor flow and machine learning, but trying to model a DNN with an embedded layer infront of it. For some reason I keep getting a sin wave of cost results as well as accuracy. I imagine there is something wrong with my code, so here goes:
This is my model and training routines:
def neural_network_model(x):
W = tf.Variable(
tf.truncated_normal([vocab_size, embedding_size], stddev=1 / math.sqrt(vocab_size)),
name="W")
embedded = tf.nn.embedding_lookup(W, x)
embedding_aggregated = tf.reduce_sum(embedded, [1])
hidden_1_layer = {
'weights': tf.Variable(tf.random_normal([embedding_size, n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden_2_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden_3_layer = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
output = {
'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.matmul(embedding_aggregated, hidden_1_layer['weights']) + hidden_1_layer['biases']
l1 = tf.nn.relu(l1)
l2 = tf.matmul(l1, hidden_2_layer['weights']) + hidden_2_layer['biases']
l2 = tf.nn.relu(l2)
l3 = tf.matmul(l2, hidden_3_layer['weights']) + hidden_3_layer['biases']
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output['weights']) + output['biases']
return output
def train_neural_network(x_batch, y_batch, test_x, test_y):
global_step = tf.Variable(0, trainable=False, name='global_step')
logits = neural_network_model(x_batch)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_batch))
tf.scalar_summary('cost', cost)
optimizer = tf.train.AdagradOptimizer(0.01).minimize(cost, global_step = global_step)
test_logits = neural_network_model(test_x)
prediction = tf.nn.softmax(test_logits)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(test_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
tf.scalar_summary('accuracy', accuracy)
merged = tf.merge_all_summaries()
saver = tf.train.Saver()
model_dir = "model_embedding"
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
with tf.Session() as sess:
train_writer = tf.train.SummaryWriter(model_dir + "/eval", sess.graph)
if (latest_checkpoint != None):
print("Restoring: ", latest_checkpoint)
saver.restore(sess, latest_checkpoint)
else:
print("Nothing to restore")
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
epoch = 1
while not coord.should_stop():
epoch_loss = 0
_, c, summary = sess.run([optimizer, cost, merged])
# embd = sess.run(emb)
# for idx in range(xb.size):
# print(xb[idx])
# print(yb[idx])
train_writer.add_summary(summary, global_step = global_step.eval())
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
print("Global step: ", global_step.eval())
print('Accuracy:',accuracy.eval())
#saver.save(sess, model_dir+'/model.ckpt', global_step=global_step) # default to last 5 checkpoint saves
epoch += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
My data is a bunch of word integer IDs padded to a size of 2056 uniformly with the padding token being added at the end so a lot of my tensors have a bunch of vocab_size integer value at the end, in order to pad up to 2056.
Is there something glaringly obvious about my code thats wrong?
For whoever runs into the same issue:
My error was reusing the neural_network_model() function and thus creating a new set of variables. The answer lies in reading how to share variables, and TF has a good page describing that at Sharing Variables

Resources