My conv-layer has the output shape of (64,3,3,80) where 64 is the batch size. The next layer is a dense layer of shape (3920,4096). How do I reshape the output of my conv-layer to fit with the shape of my dense layer? I am implementing in tensorflow :)
This is the layer just before the dense layer.
stride_conv = [1,1,1,1]
padding='SAME'
filter_3 = tf.Variable(initial_value=tf.random_normal([3,3,112,80]))
conv_3 = tf.nn.conv2d(conv_2,filter_3,stride_conv,padding)
Thanks!
conv3 => Reshape => FC1 (720->4096)
[64,3,3,80] => [64,720] => [64,4096]
Following code does the Conv to FC as shown above:
shape = int(np.prod(conv_3.get_shape()[1:]))
conv_3_flat = tf.reshape(conv_3, [-1, shape])
fc1w = tf.Variable(tf.truncated_normal([shape, 4096],dtype=tf.float32,stddev=1e-1), name='weights')
fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
fc1 = tf.nn.bias_add(tf.matmul(conv_3_flat, fc1w), fc1b)
fc1 = tf.nn.relu(fc1)
Hope this helps.
Also, simple MNIST model (taken from here: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/convolutional_network.py)
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
Related
I am following the D2L book and one of the exercises on the GoogLeNet chapter is to take the model from the Rethinking the Inception Architecture for Computer Vision paper so I did that and basically recreated the Inception V2 architecture based on the Inception V1 architecture presented in the D2L book, the only thing I didn't include was the grid reduction described in the paper. I expected this to improve results on my models but if anything it made them worse. I was testing the models using the D2L code presented in the GoogLeNet chapter on the fashion_mnist dataset and got a test accuracy of 0.908 using Inception V1 however on my V2 implementation from the paper I only got test acc of 0.885 and that was the highest I could get. I expected the V2 model to be better so I don't really know where I went wrong, if I made an error in my code or what but any insights into the V2 architecture or from the paper would be greatly appreciated.
This is the code for the first 3 inception blocks (from figure 5 in the paper)
class InceptionB1(nn.Module):
# `c1`--`c4` are the number of output channels for each path
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
super(InceptionB1, self).__init__(**kwargs)
# Path 1 is a single 1 x 1 convolutional layer
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
# Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
# convolutional layer
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
# Path 3 is a 1 x 1 convolutional layer followed by 2 3x3 convs (factorization)
# convolutional layer
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=3, padding=1)
# Replacing path3_2 with modified path using factorization through smaller convolutions
self.p3_3 = nn.Conv2d(c3[1], c3[1], kernel_size=3, padding = 1)
# Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
# convolutional layer
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
p3 = F.relu(self.p3_3(self.p3_2(F.relu(self.p3_1(x)))))
p4 = F.relu(self.p4_2(self.p4_1(x)))
# Concatenate the outputs on the channel dimension
return torch.cat((p1, p2, p3, p4), dim=1)
This is my code for the middle 5 blocks (figure 6 in the paper)
class InceptionB2(nn.Module):
# `c1`--`c4` are the number of output channels for each path
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
super(InceptionB2, self).__init__(**kwargs)
# Path 1 is a single 1 x 1 convolutional layer
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
# Path 2 is a 1 x 1 convolutional layer followed by a 1 x 3 then 3x1 conv
# convolutional layer
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=(1,3), padding=1)
self.p2_3 = nn.Conv2d(c2[1], c2[1], kernel_size=(3,1), padding=0)
# Path 3 is a 1 x 1 conv layer followed by a 1x3 conv then 3x1 then 1x3 then 3x1
# convolutional layer
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=(1,3), padding=1)
self.p3_3 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=0)
self.p3_4 = nn.Conv2d(c3[1], c3[1], kernel_size=(1,3), padding=0)
self.p3_5 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=1)
# Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
# convolutional layer
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_3(F.relu(self.p2_2(F.relu(self.p2_1(x))))))
p3 = F.relu(self.p3_5(F.relu(self.p3_4(F.relu(self.p3_3(F.relu(self.p3_2(F.relu(self.p3_1(x))))))))))
p4 = F.relu(self.p4_2(self.p4_1(x)))
# Concatenate the outputs on the channel dimension
return torch.cat((p1, p2, p3, p4), dim=1)
This is my code for the last 2 blocks (figure 7 in the paper)
class InceptionB3(nn.Module):
# `c1`--`c4` are the number of output channels for each path
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
super(InceptionB3, self).__init__(**kwargs)
# Path 1 is a single 1 x 1 convolutional layer
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
# Path 2 is a 1 x 1 convolutional layer followed by a 1x3 and 3x1
# convolutional layer
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2_1 = nn.Conv2d(c2[0], c2[1], kernel_size=(1,3), padding=0)
self.p2_2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=(3,1), padding=0)
# Path 3 is a 1 x 1 convolutional layer followed by a 3x3 then 1x3 and 3x1
# convolutional layer
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=3, padding=1)
self.p3_3_1 = nn.Conv2d(c3[1], c3[1], kernel_size=(1,3), padding=0)
self.p3_3_2 = nn.Conv2d(c3[1], c3[1], kernel_size=(3,1), padding=0)
# Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
# convolutional layer
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2_1 = F.relu(self.p2_2_1(F.relu(self.p2_1(x))))
p2_2 = F.relu(elf.p2_2_2(F.relu(self.p2_1(x))))
p2 = torch.matmul(p2_1, p2_2)
p3_1 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
p3_2_1 = F.relu(self.p3_3_1(p3_1))
p3_2_2 = F.relu(sself.p3_3_2(p3_1))
p3 = torch.matmul(p3_2_1, p3_2_2)
p4 = F.relu(self.p4_2(self.p4_1(x)))
# Concatenate the outputs on the channel dimension
return torch.cat((p1, p2, p3, p4), dim=1)
Here is the link to the D2L chapter
https://d2l.ai/chapter_convolutional-modern/googlenet.html
This is the training graph for the v1 model
This is the training graph for the v2 model
I am pretty new to Deep learning. I was experimenting with fine tuning of pretrained models on my own dataset but I am not able to improve the test and training accuracy. Both the Losses are hovering around 62 from beginning of training to last. I am using Xception as the pretrained model and combined with GlobalAveragePooling2D, a dense layer and dropout of 0.2.
The dataset consists of 3522 images belonging to 2 class of training and 881 images belonging to 2 classes of test set. Problem is I am not able add any more images to the datasets. This is the maximum number of images I could add to the datasets. Tried ImageDataGenerator but still it's of no use. Images of two classes looks bit similar in this constraint can I increase the accuracy.
Code:
base_model = Xception(include_top=False, weights='imagenet')
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.2)(x)
predictions = Dense(2, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
num_training_img=3522
num_test_img=881
stepsPerEpoch = num_training_img/batch_size
validationSteps= num_test_img/batch_size
history= model.fit_generator(
train_data_gen,
steps_per_epoch=stepsPerEpoch,
epochs=20,
validation_data = test_data_gen,
validation_steps=validationSteps
)
layer_num = len(model.layers)
for layer in model.layers[:129]:
layer.trainable = False
for layer in model.layers[129:]:
layer.trainable = True
# update the weights
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])
num_training_img=3522
num_test_img=881
stepsPerEpoch = num_training_img/batch_size
validationSteps= num_test_img/batch_size
history= model.fit_generator(
train_data_gen,
steps_per_epoch=stepsPerEpoch,
epochs=20,
validation_data = test_data_gen,
validation_steps=validationSteps
)
You should make the layers non-trainable before creating the model.
base_model = Xception(include_top=False, weights='imagenet')
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.2)(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
Your last layer has 2 units, which suggests, softmax is a better fit.
predictions = Dense(2, activation='softmax')(x)
Try with Adam and change loss.
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
I don't know how could I evaluate the training accuracy and test accuracy every epoch in the following code? This CNN is used for MNIST classification and the code is copied from TensorFlow tutorial https://www.tensorflow.org/tutorials/layers.
It seems that it only records the loss for every epoch and I cannot find a way to add accuracy to the code.
How could I do that?
def cnn_model_fn(features, labels, mode):
"""Model function for CNN."""
# Input Layer
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# Dense Layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=10)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main(unused_argv):
# Load training and eval data
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
# Create the Estimator
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data},
y=train_labels,
batch_size=100,
num_epochs=None,
shuffle=True)
mnist_classifier.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
main(1)
When training a neural network, it is common to train the model for many epochs. It may be preferable to print the accuracy after every n epochs, where you might set n based on the total epochs you plan to use. Personally, I prefer to log the data and view it in Tensorboard.
I am trying to implement a multi-input LSTM model using keras. The code is as follows:
data_1 -> shape (1150,50)
data_2 -> shape (1150,50)
y_train -> shape (1150,50)
input_1 = Input(shape=data_1.shape)
LSTM_1 = LSTM(100)(input_1)
input_2 = Input(shape=data_2.shape)
LSTM_2 = LSTM(100)(input_2)
concat = Concatenate(axis=-1)
x = concat([LSTM_1, LSTM_2])
dense_layer = Dense(1, activation='sigmoid')(x)
model = keras.models.Model(inputs=[input_1, input_2], outputs=[dense_layer])
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['acc'])
model.fit([data_1, data_2], y_train, epochs=10)
When I run this code, I get a ValueError:
ValueError: Error when checking model input: expected input_1 to have 3 dimensions, but got array with shape (1150, 50)
Do anyone have any solution to this problem?
Use data1 = np.expand_dims(data1, axis=2), before you define the model. LSTM expects inputs with dimensions (batch_size, timesteps, features), so, in your case, I guessing you have 1 feature, 50 time steps and 1150 samples, you need to add a dimension at the end of your vector.
This need to be done before you define the model otherwise when you set input_1 = Input(shape=data_1.shape) you are telling keras that your input has 1150 timesteps and 50 features,so it will expect inputs of shape (None, 1150, 50) (the non stands for "any dimension will be accepted").
The same holds for input_2
Hope this helps
I'm trying to train simple neural network
that consists of:
Convolution layer filter (5x5) x 8, stride 2.
Max pooling 25x25 (the image has kinda low amount of details)
Flatting output into (2x2x8) vector
Classifier with logistic regression
Altogether network has < 1000 weights.
File: nn.py
#!/bin/python
import tensorflow as tf
import create_batch
# Prepare data
batch = create_batch.batch
x = tf.reshape(batch[0], [-1,100,100,3])
y_ = batch[1]
# CONVOLUTION NETWORK
# For initialization
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.3)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.2, shape=shape)
return tf.Variable(initial)
# Convolution with stride 1
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='SAME')
def max_pool_25x25(x):
return tf.nn.max_pool(x, ksize=[1, 25, 25, 1],
strides=[1, 25, 25, 1], padding='SAME')
# First layer
W_conv1 = weight_variable([5, 5, 3, 8])
b_conv1 = bias_variable([8])
x_image = tf.reshape(x, [-1,100,100,3])
# First conv1
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_25x25(h_conv1)
# Dense connection layer
# make data flat
W_fc1 = weight_variable([2 * 2 * 8, 2])
b_fc1 = bias_variable([2])
h_pool1_flat = tf.reshape(h_pool1, [-1, 2*2*8])
y_conv = tf.nn.softmax(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
#Learning
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(200):
if i%10 == 0:
train_accuracy = accuracy.eval(session=sess)
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(session=sess)
File: create_batch.py
#!/bin/python
import tensorflow as tf
PATH1 = "../dane/trening/NK/"
PATH2 = "../dane/trening/K/"
def create_labeled_image_list():
filenames = [(PATH1 + "nk_%d.png" % i) for i in range(300)]
labels = [[1,0] for i in range(300)]
filenames += [(PATH2 + "kulki_%d.png" % i) for i in range(300)]
labels += [[0,1] for i in range(300)]
return filenames, labels
def read_images_from_disk(input_queue):
label = input_queue[1]
file_contents = tf.read_file(input_queue[0])
example = tf.image.decode_png(file_contents, channels=3)
example.set_shape([100, 100, 3])
example = tf.to_float(example)
print ("READ, label:")
print(label)
return example, label
# Start
image_list, label_list = create_labeled_image_list()
# Create appropriate tensors for naming
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list, dtype=tf.float32)
input_queue = tf.train.slice_input_producer([images, labels],
shuffle=True)
image, label = read_images_from_disk(input_queue)
batch = tf.train.batch([image, label], batch_size=600)
I'm feeding 100x100 images i have two classess 300 images each.
Basically randomly initialzied network at step 0 has better accuracy than trained one.
Network stops learning after it reaches 0.5 accuracy (basically coin flip). Images contain blue blooby thing (class 1) or grass (class 2).
I'm traning network using whole imageset at once (600 images), the loss function is cross entropy.
What I'm doing wrong?
OK, I've find a fix there were two errors, now the network is learning.
Images were RGBA despite the fact I declared them as RGB in tf
I did not perform normalization of Images to [-1,1] float32.
In tensorflow it should be done with something like this:
# i use "im" for image
tf.image.convert_image_dtype(im, dtype=float32)
im = tf.sub(im, -0.5)
im = tf.mul(im, 2.0)
To all newbies to ML - prepare your data with caution!
Thanks.