tensorflow, I want to change input image size - machine-learning

I check tutorial of Tnesorflow. And now, I want to change input IMAGE_SIZE from 28x28 to 56x56 for improving accuracy. So, I change IMAGE_SIZE variable but that program throw error. Below is an original code and I want to change input image size. Where should I change?
# -*- coding: utf-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
flags = tf.app.flags
flags.DEFINE_string('train', 'train.txt', 'File name of train data')
flags.DEFINE_string('test', 'test.txt', 'File name of train data')
flags.DEFINE_string('train_dir', '/tmp/data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 200, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
""" 予測モデルを作成する関数
images_placeholder: 画像のplaceholder
keep_prob: dropout率のplace_holder
y_conv: 各クラスの確率(のようなもの)
# 重みを標準偏差0.1の正規分布で初期化
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
# バイアスを標準偏差0.1の正規分布で初期化
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# 畳み込み層の作成
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# プーリング層の作成
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# 入力を28x28x3に変形
x_image = tf.reshape(images_placeholder, [-1, 28, 28, 3])
# 畳み込み層1の作成
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# プーリング層1の作成
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
# 畳み込み層2の作成
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# プーリング層2の作成
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
# 全結合層1の作成
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# dropoutの設定
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 全結合層2の作成
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
# ソフトマックス関数による正規化
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# 各ラベルの確率のようなものを返す
return y_conv
def loss(logits, labels):
""" lossを計算する関数
logits: ロジットのtensor, float - [batch_size, NUM_CLASSES]
labels: ラベルのtensor, int32 - [batch_size, NUM_CLASSES]
cross_entropy: 交差エントロピーのtensor, float
# 交差エントロピーの計算
cross_entropy = -tf.reduce_sum(labels*tf.log(logits))
# TensorBoardで表示するよう指定
tf.scalar_summary("cross_entropy", cross_entropy)
return cross_entropy
def training(loss, learning_rate):
""" 訓練のOpを定義する関数
loss: 損失のtensor, loss()の結果
learning_rate: 学習係数
train_step: 訓練のOp
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
""" 正解率(accuracy)を計算する関数
logits: inference()の結果
labels: ラベルのtensor, int32 - [batch_size, NUM_CLASSES]
accuracy: 正解率(float)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.scalar_summary("accuracy", accuracy)
return accuracy
if __name__ == '__main__':
# ファイルを開く
f = open(FLAGS.train, 'r')
# データを入れる配列
train_image = []
train_label = []
for line in f:
# 改行を除いてスペース区切りにする
line = line.rstrip()
l = line.split()
# データを読み込んで28x28に縮小
img = cv2.imread('tmp/data/' + l[0])
img = cv2.resize(img, (28, 28))
# 一列にした後、0-1のfloat値にする
# ラベルを1-of-k方式で用意する
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
# numpy形式に変換
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
f = open(FLAGS.test, 'r')
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread('tmp/data/' + l[0])
img = cv2.resize(img, (28, 28))
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
with tf.Graph().as_default():
# 画像を入れる仮のTensor
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
# ラベルを入れる仮のTensor
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
# dropout率を入れる仮のTensor
keep_prob = tf.placeholder("float")
# inference()を呼び出してモデルを作る
logits = inference(images_placeholder, keep_prob)
# loss()を呼び出して損失を計算
loss_value = loss(logits, labels_placeholder)
# training()を呼び出して訓練
train_op = training(loss_value, FLAGS.learning_rate)
# 精度の計算
acc = accuracy(logits, labels_placeholder)
# 保存の準備
saver = tf.train.Saver()
# Sessionの作成
sess = tf.Session()
# 変数の初期化
# TensorBoardで表示する値の設定
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter("/tmp/log/loglog1", sess.graph)
# 訓練の実行
for step in range(FLAGS.max_steps):
for i in range(len(train_image)/FLAGS.batch_size):
# batch_size分の画像に対して訓練の実行
batch = FLAGS.batch_size*i
# feed_dictでplaceholderに入れるデータを指定する
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5})
# 1 step終わるたびに精度を計算する
train_accuracy = sess.run(acc, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
print "step %d, training accuracy %g"%(step, train_accuracy)
# 1 step終わるたびにTensorBoardに表示する値を追加する
summary_str = sess.run(summary_op, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0})
summary_writer.add_summary(summary_str, step)
print "test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0})
# 訓練が終了したらテストデータに対する精度を表示
print "test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0})
# 最終的なモデルを保存
save_path = saver.save(sess, "model.ckpt")

There are at least two other places in the code that depend on the image size:
The definition of x_image hard-codes the image size:
x_image = tf.reshape(images_placeholder, [-1, 28, 28, 3])
Assuming you set IMAGE_SIZE to be 56, you should replace it with:
x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
The number of neurons in the output fully connected layer depends on the image size (downsampled by the pooling layers), and will increase by 4x when you increase the number of pixels in the input by 4x. The following lines:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
...should be replaced with:
W_fc1 = weight_variable([14 * 14 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 14 * 14 *64])

You use some hardcoded numbers while building the model. Please change them to use IMAGE_SIZE instead as follows :
# 入力を28x28x3に変形
x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])


Tensorflow multi-GPU MNIST classifier: low accuracy

I am stuck with multiple GPU MNIST classifier in Tensorflow. Code runs without errors, but accuracy is very poor (30%). I am new to Tensorflow so I do not know where is the problem ? GPU: 2x GTX 1080 Ti.
I have found several tutorials for multiple GPU, but code is hard to follow. For this reason I am trying to develop MNIST CNN classifier from scratch.
from __future__ import print_function
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
return average_grads
with tf.device('/cpu:0'):
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img=tf.reshape(x, [-1, 28, 28, 1])
x_dict['x0'],x_dict['x1'] = tf.split(x_img,2)
y = tf.placeholder(tf.float32, [None, 10], name='y')
y_dict['y0'],y_dict['y1'] = tf.split(y,2)
keep_prob = tf.placeholder(tf.float32)
w0=tf.get_variable('w0',initializer=tf.truncated_normal([5, 5,1,32], stddev=0.1))
w1=tf.get_variable('w1',initializer=tf.truncated_normal([5,5,32,64], stddev=0.1))
w2=tf.get_variable('w2',initializer=tf.truncated_normal([7*7*64,1024], stddev=0.1))
w3=tf.get_variable('w3',initializer=tf.truncated_normal([1024,10], stddev=0.1))
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w2)+b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.nn.sigmoid(tf.matmul(h_fc1_drop,w3)+b3)
return y
for i in range(0,2):
with tf.device(('/gpu:{0}').format(i)):
with tf.variable_scope(('scope_gpu_{0}').format(i)):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y{0}').format(i)] * tf.log(yy), reduction_indices=[1]))
with tf.device('/cpu:0'):
grad = average_gradients(grads)
train_step = opt.apply_gradients(grad)
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y_dict['y0'], 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
writer = tf.summary.FileWriter('C:\\tmp\\test\\', graph=tf.get_default_graph())
t1_1 = datetime.datetime.now()
for step in range(0,10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1-t1_1))
if __name__ == "__main__":
The problems that I noticed:
Your cross-entropy loss is wrong (see this question for details, in short you're computing binary cross-entropy).
I dropped manual gradient computation in favor of tf.train.AdamOptimizer.
I dropped the split of the input of x (it's not the right way to do distributed computation in tensorflow).
The result model easily gets to 99% accuracy even on one GPU.
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img = tf.reshape(x, [-1, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, 10], name='y')
keep_prob = tf.placeholder(tf.float32)
stddev = 0.1
w0 = tf.get_variable('w0', initializer=tf.truncated_normal([5, 5, 1, 32], stddev=stddev))
b0 = tf.get_variable('b0', initializer=tf.zeros([32]))
w1 = tf.get_variable('w1', initializer=tf.truncated_normal([5, 5, 32, 64], stddev=stddev))
b1 = tf.get_variable('b1', initializer=tf.zeros([64]))
w2 = tf.get_variable('w2', initializer=tf.truncated_normal([7 * 7 * 64, 1024], stddev=stddev))
b2 = tf.get_variable('b2', initializer=tf.zeros([1024]))
w3 = tf.get_variable('w3', initializer=tf.truncated_normal([1024, 10], stddev=stddev))
b3 = tf.get_variable('b3', initializer=tf.zeros([10]))
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_conv1 = tf.nn.relu(conv2d(xx, w0) + b0)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, w1) + b1)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w2) + b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.matmul(h_fc1_drop, w3) + b3
return y
yy = model_forward(x_img)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=yy, labels=y))
train_step = tf.train.AdamOptimizer().minimize(loss)
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("/home/maxim/p/data/mnist-tf", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
t1_1 = datetime.datetime.now()
for step in range(0, 10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1 - t1_1))
if __name__ == "__main__":
Now, if you really want it, you can do data or model parallelism to utilize your GPU power (there is a great post about it, but sometimes it doesn't render correctly due to hosting problems).
Along with the points mentioned in the first two answers, take a look at return average_grads in average_gradients function, it's returning from the 1st iteration of the first for loop, meaning the gradients will only apply to the first variable (probably w0). Hence only w0 is getting updated and so you are getting a very low accuracy since the rest of the variables stay to their original values (either random/zeros).
This is because the model is not using the same weights & biases for inference on CPU as well as on the other GPU devices.
For example:
for i in range(0,2):
with tf.device(('/gpu:{0}').format(i)):
with tf.variable_scope(('scope_gpu_{0}').format(i)) as infer_scope:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y{0}').format(i)] * tf.log(yy), reduction_indices=[1]))
The reason you are getting low accuracy is that without specifying reuse_variables() and you try to call the model inference inside each epoch, the graph would create a new model with random weights & biases initialization, which is not what you favored.

Model in Tensorflow is not Working need review of the code not sure whats going Wrong

i am modifying the deep mnist code for my own data. i modified a model a bit but i am facing some basic issues like i pass data to my model one by one and it runs reall fast but when i pass my model all examples at ones it gets really slow and i also getting 0% accuracy. Kindly review my code i am doing something horribly wrong but i do not know where and what steps should i follow to make it correct.
Here is my model
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
x_image = tf.reshape(x, [-1, 28, 28, 1])
W_conv1 = weight_variable([5, 5, 1, 200])
b_conv1 = bias_variable([200])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 200, 100])
b_conv2 = bias_variable([100])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 100, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*100])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 19])
b_fc2 = bias_variable([19])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
Here are the fucntion my model calls.
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
and this is my main
def main(_):
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 19])
y_conv, keep_prob = deepnn(x)
cross_entropy tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
for i in range(34670):
#batch = mnist.train.next_batch(50)
if i % 1000 == 0:
train_accuracy = accuracy.eval(feed_dict={x: np.reshape(input_to_nn(i),(-1,784)), y_:np.reshape(output_of_nn(i),(-1,19)), keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: np.reshape(input_to_nn(i),(-1,784)), y_:np.reshape(output_of_nn(i),(-1,19)), keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={x:input_nn, y_:output_nn, keep_prob: 1.0}))
I think that the problem is in these lines:
W_fc2 = weight_variable([1024, 19])
b_fc2 = bias_variable([19])
Your model trains to predict 19 classes. Normally there are 10 digit, if you don't really have images with 19 classes, better revert the values to original 10.

CNN for cifar10 dataset in Tensorflow

I am trying to replicate results obtained by a convolutional neural network for CIFAR10 using Tensorflow, however after some epochs (~60 epochs) my performance (accuracy) is around 10%, so I do not if the CNN is well trained?
This code is based on Deep mnist for experts https://www.tensorflow.org/get_started/mnist/pros , however in Cifar10 it does not work
import numpy as np
import tensorflow as tf
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
return dict
#unpacking training and test data
b1 = unpickle("~/cifar-10-batches-py/data_batch_1")
b2 = unpickle("~/cifar-10-batches-py/data_batch_2")
b3 = unpickle("~/cifar-10-batches-py/data_batch_3")
b4 = unpickle("~/cifar-10-batches-py/data_batch_4")
b5 = unpickle("~/cifar-10-batches-py/data_batch_5")
test = unpickle("~/cifar-10-batches-py/test_batch")
#Preparing test data
test_data = test['data']
test_label = test['labels']
#Preparing training data
train_data = np.concatenate([b1['data'],b2['data'],b3['data'],b4['data'],b5['data']],axis=0)
train_label = np.concatenate([b1['labels'],b2['labels'],b3['labels'],b4['labels'],b5['labels']],axis=0)
#Reshaping data
train_data = np.reshape(train_data,[50000,32,32,3])
test_data = np.reshape(test_data,[10000,32,32,3])
batch_size = 100
image_width = 32
image_height = 32
channels = 3
#Constructing Graph
x = tf.placeholder(tf.float32, [None, image_width, image_height, channels])#Training Data
y = tf.placeholder(tf.int32, [None])
one_hot = tf.one_hot(y,depth=10)#Converting in one hot vectors
#Constructing CNN Layers
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], taken from: http://textminingonline.com/dive-into-tensorflow-part-v-deep-mnist
W_conv1 = weight_variable([7, 7, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 32, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#Constructing MLP layers
W_fc1 = weight_variable([8 * 8 * 64, 64])
b_fc1 = bias_variable([64])
h_pool3_flat = tf.reshape(h_conv3, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
W_fc2 = weight_variable([64, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
#Computing Cost function
cross_entropy = -tf.reduce_sum(one_hot*tf.log(tf.clip_by_value(y_conv,1e-10,1e20)))
train_step = tf.train.MomentumOptimizer(learning_rate = 0.0001, momentum = 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=16))
epochs = 100
b_per = 0
row = []
for e in range(epochs):
print( "epoch", e)
avg_cost = 0
#foreach batch
for j in range(int(train_data.shape[0]/batch_size)):
data = train_data[subset,:,:,:]
label = train_label[subset]
_,c = sess.run([train_step,cross_entropy], feed_dict={x: data, y: label})
avg_cost += c / data.shape[0]
b_per = b_per + 1
if b_per%10==0 :
row.append(sess.run(accuracy, feed_dict={x: test_data, y: test_label }))
It is wrong in data reshape part! It should be,
# Reshaping data
train_data = train_data.reshape(50000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")
test_data = test_data.reshape(10000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")

Why does CNN with constant initialization learn at all?

Usually, weights for neural networks are initialized randomly so that they receive different gradients and learn different weights. In theory, if all weights are initialized the same way, all nodes will have the same weights no matter how long you train. Thus the training shouldn't work at all.
However, the code below gives 56% accuracy on MNIST after 7000 epochs. Why is that the case?
#!/usr/bin/env python
"""MNIST with Tensorflow."""
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import os
import numpy as np
epochs = 20000
model_checkpoint_path = 'checkpoints/mnist_tf_model.ckpt'
def weight_variable(shape):
#initial = tf.truncated_normal(shape, stddev=0.01)
initial = tf.constant(0.0, shape=shape)
return tf.get_variable(initializer=initial, name='weights')
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.get_variable(initializer=initial, name='biases')
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def eval_network(sess, summary_writer, dataset, correct_prediction, epoch):
correct_sum = 0
total_test = 0
training_summary = tf.get_default_graph().get_tensor_by_name("training_accuracy:0")
loss_summary = tf.get_default_graph().get_tensor_by_name("loss:0")
for i in range(dataset.labels.shape[0] / 1000):
feed_dict = {x: dataset.images[i * 1000:(i + 1) * 1000],
y_: dataset.labels[i * 1000:(i + 1) * 1000]}
[test_correct, train_summ, loss_summ] = sess.run([correct_prediction,
summary_writer.add_summary(train_summ, epoch)
summary_writer.add_summary(loss_summ, epoch)
test_correct = correct_prediction.eval(feed_dict=feed_dict)
correct_sum += sum(test_correct)
total_test += len(test_correct)
return float(correct_sum) / total_test
def log_score(sess, summary_writer, filename, mnist, scoring, epoch):
with open(filename, "a") as myfile:
train = eval_network(sess, summary_writer, mnist.train, scoring, epoch)
test = eval_network(sess, summary_writer, mnist.test, scoring, epoch)
myfile.write("%i;%0.6f;%0.6f\n" % (epoch, train, test))
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
with tf.Session() as sess:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])
with tf.variable_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1, name='ReLU1')
h_pool1 = max_pool_2x2(h_conv1)
with tf.variable_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='ReLU2')
h_pool2 = max_pool_2x2(h_conv2)
with tf.variable_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
with tf.variable_scope('softmax'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv * 10**-7),
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.scalar_summary("training_accuracy", accuracy, name="training_accuracy")
tf.scalar_summary("loss", cross_entropy, name="loss")
summary_writer = tf.train.SummaryWriter('summary_dir', sess.graph)
for i in range(epochs):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
log_score(sess, summary_writer,
mnist, correct_prediction, i)
train_step.run(feed_dict={x: batch[0],
y_: batch[1]})
log_score(sess, summary_writer, 'validation-curve-accuracy.csv',
mnist, correct_prediction, epochs)
Nr 1
After adding 10**-7 to the tf.log(..) term, the NANs are gone:
Nr 2
This is an old plot which did have a problem due to log(0) after 16k epochs.
The loss is plotted here. The triangles are NANs.
Here is the accuracy - due to the smoothing, it does not directly fall to ~10%.

Why is my convolution autoencoder not getting trained properly?

Why is my convolutional autoencoder not converging properly? I have a very simple layer stack.
Encoder: Conv/ReLU(Kernel size: 7x7, stride = 1, padding = 0) => maxPool(kernel size=2x2, stride = 2) => Conv/ReLU(Kernel size: 5x5, stride = 1, padding = 0) => MaxPool(kernel size=2x2, stride = 2)
Decoder: Nearest Neighbour Upsampling => Deconv/ReLU => Nearest Neighbour Upsampling => Deconv/ReLU
Training Images are of size 30x30x1.
I tried to train it with 1000 images over 1000 epoch, but the error (MSE) is still 120.
num_images = 1000
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def encoder(X, w, w2, wd, wd2):
l1a = tf.nn.relu(tf.nn.conv2d(X, w,strides=[1, 1, 1, 1], padding='VALID'))
l1 = tf.nn.max_pool(l1a, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2,strides=[1, 1, 1, 1], padding='VALID'))
l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
l1da = tf.image.resize_images(l2, 8, 8, 1, align_corners=False)
output_shapel1d = tf.convert_to_tensor([BATCH_SIZE, 12, 12, 32], dtype=tf.int32);
l1d = tf.nn.relu(tf.nn.conv2d_transpose(l1da, wd, output_shapel1d, strides=[1, 1, 1, 1], padding='VALID'))
l2da = tf.image.resize_images(l1d, 24, 24, 1, align_corners=False)
output_shapel2d = tf.convert_to_tensor([BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], dtype=tf.int32);
l2d = tf.nn.relu(tf.nn.conv2d_transpose(l2da, wd2, output_shapel2d, strides=[1, 1, 1, 1], padding='VALID'))
return l2d
complete_image = extract_data(0, 1000)
trX = complete_image[0:900]
trY = trX
teX = complete_image[900:1000]
teY = teX
X = tf.placeholder("float", [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])
Y = tf.placeholder("float", [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])
w = init_weights([7, 7, 1, 32])
w2 = init_weights([5, 5, 32, 64])
wd = init_weights([5, 5, 32, 64])
wd2 = init_weights([7, 7, 1, 32])
py_x = encoder(X, w, w2, wd, wd2)
cost = tf.reduce_mean(tf.squared_difference(py_x, Y, name = None))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = py_x;
global_step = tf.Variable(0, name='global_step', trainable=False)
saver = tf.train.Saver()
with tf.Session() as sess:
start = global_step.eval() # get last global_step
print "Start from:", start
if FLAGS.output == "train":
for i in range(start, 500):
training_batch = zip(range(0, num_images - BATCH_SIZE, batch_size),
range(batch_size, num_images - BATCH_SIZE, batch_size))
for start, end in training_batch:
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
total_epoch_cost += sess.run(cost, feed_dict={X: trX[start:end], Y: trY[start:end]})
avg_epoch_cost = total_epoch_cost/BATCH_SIZE
print "cost during epoch " + `i` + "is ", avg_epoch_cost
I have added the complete code in this gist with slight modifications. I am training this with around 10,000 images, and the error after 488 epochs is 74.8.
