Weights from Conv Layer when applied to image layer gives saturated output - machine-learning

I am visualizing my first layer output with image_layer when applied with trained weight. However, when I try to visualize, I get white images as follows:
Ignore the last four, the filters are of size 7x7 and there are 32 of them.
The model is built on the following architecture (Code Attached):
import numpy as np
import tensorflow as tf
import cv2
from matplotlib import pyplot as plt
% matplotlib inline
model_path = "T_set_4/Model/model.ckpt"
# Define the model parameters
# Convolutional Layer 1.
filter_size1 = 7 # Convolution filters are 7 x 7 pixels.
num_filters1 = 32 # There are 32 of these filters.
# Convolutional Layer 2.
filter_size2 = 7 # Convolution filters are 7 x 7 pixels.
num_filters2 = 64 # There are 64 of these filters.
# Fully-connected layer.
fc_size = 512 # Number of neurons in fully-connected layer.
# Define the data dimensions
# We know that MNIST images are 48 pixels in each dimension.
img_size = 48
# Images are stored in one-dimensional arrays of this length.
img_size_flat = img_size * img_size
# Tuple with height and width of images used to reshape arrays.
img_shape = (img_size, img_size)
# Number of colour channels for the images: 1 channel for gray-scale.
num_channels = 1
# Number of classes, one class for each of 10 digits.
num_classes = 2
def new_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def new_biases(length):
return tf.Variable(tf.constant(0.05, shape=[length]))
def new_conv_layer(input, # The previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
use_pooling=True): # Use 2x2 max-pooling.
# Shape of the filter-weights for the convolution.
# This format is determined by the TensorFlow API.
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights aka. filters with the given shape.
weights = new_weights(shape=shape)
# Create new biases, one for each filter.
biases = new_biases(length=num_filters)
# Create the TensorFlow operation for convolution.
# Note the strides are set to 1 in all dimensions.
# The first and last stride must always be 1,
# because the first is for the image-number and
# the last is for the input-channel.
# But e.g. strides=[1, 2, 2, 1] would mean that the filter
# is moved 2 pixels across the x- and y-axis of the image.
# The padding is set to 'SAME' which means the input image
# is padded with zeroes so the size of the output is the same.
layer = tf.nn.conv2d(input=input,
filter=weights,
strides=[1, 1, 1, 1],
padding='SAME')
# Add the biases to the results of the convolution.
# A bias-value is added to each filter-channel.
layer += biases
# Rectified Linear Unit (ReLU).
# It calculates max(x, 0) for each input pixel x.
# This adds some non-linearity to the formula and allows us
# to learn more complicated functions.
layer = tf.nn.relu(layer)
# Use pooling to down-sample the image resolution?
if use_pooling:
# This is 2x2 max-pooling, which means that we
# consider 2x2 windows and select the largest value
# in each window. Then we move 2 pixels to the next window.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
# norm1
norm1 = tf.nn.lrn(layer, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# Note that ReLU is normally executed before the pooling,
# but since relu(max_pool(x)) == max_pool(relu(x)) we can
# save 75% of the relu-operations by max-pooling first.
# We return both the resulting layer and the filter-weights
# because we will plot the weights later.
return layer, weights
def flatten_layer(layer):
# Get the shape of the input layer.
layer_shape = layer.get_shape()
# The shape of the input layer is assumed to be:
# layer_shape == [num_images, img_height, img_width, num_channels]
# The number of features is: img_height * img_width * num_channels
# We can use a function from TensorFlow to calculate this.
num_features = layer_shape[1:4].num_elements()
# Reshape the layer to [num_images, num_features].
# Note that we just set the size of the second dimension
# to num_features and the size of the first dimension to -1
# which means the size in that dimension is calculated
# so the total size of the tensor is unchanged from the reshaping.
layer_flat = tf.reshape(layer, [-1, num_features])
# The shape of the flattened layer is now:
# [num_images, img_height * img_width * num_channels]
# Return both the flattened layer and the number of features.
return layer_flat, num_features
def new_fc_layer(input, # The previous layer.
num_inputs, # Num. inputs from prev. layer.
num_outputs, # Num. outputs.
use_relu=True): # Use Rectified Linear Unit (ReLU)?
# Create new weights and biases.
weights = new_weights(shape=[num_inputs, num_outputs])
biases = new_biases(length=num_outputs)
# Calculate the layer as the matrix multiplication of
# the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
# Use ReLU?
if use_relu:
layer = tf.nn.relu(layer)
return layer
# Create the model
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x')
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
# Create the model footprint
layer_conv1, weights_conv1 = new_conv_layer(input=x_image,
num_input_channels=num_channels,
filter_size=filter_size1,
num_filters=num_filters1,
use_pooling=True)
layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1,
num_input_channels=num_filters1,
filter_size=filter_size2,
num_filters=num_filters2,
use_pooling=True)
layer_flat, num_features = flatten_layer(layer_conv2)
layer_fc1 = new_fc_layer(input=layer_flat,
num_inputs=num_features,
num_outputs=fc_size,
use_relu=True)
layer_fc2 = new_fc_layer(input=layer_fc1,
num_inputs=fc_size,
num_outputs=num_classes,
use_relu=False)
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Restore the model
saver = tf.train.Saver()
session = tf.Session()
saver.restore(session, model_path)
The code I followed to create visualized weights is from the following:
Source code
Can someone tell me is the training or network is too shallow?

This is a perfectly fine visualization of the feature maps (not the weights) produced by the first convolutional layer on the early stages of the training.
The first layers learn to extract simple features, the learning process is somehow slow and thus you first learn to "blur" the input images, but once the network starts to converge you'll see that the first layers will start extracting meaningful low-level features (edges and so on).
Just monitor the training process and let the network training a bit more.
If, instead, you got bad performance (always look at the validation accuracy) your feature maps will always look noisy and you should start tuning the hyperparameters (lowering the learning rate, regularize, ...) in order to extract meaningful features and thus get good results

Related

Pytorch VNet final softmax activation layer for segmentation. Different channel dimensions to labels. How do I get prediction output?

I am trying to build a V-Net. When I pass the images to segment during training, the output has 2 channels after the softmax activation (as specified in the architecture in the attached image) but the label and input has 1. How do I convert this such that output is the segmented image? Do I just take one of the channels as the final output when training (e.g output = output[:, 0, :, :, :]) and the other channel would be background?
outputs = network(inputs)
batch_size = 32
outputs.shape: [32, 2, 64, 128, 128]
inputs.shape: [32, 1, 64, 128, 128]
labels.shape: [32, 1, 64, 128, 128]
Here is my Vnet forward pass:
def forward(self, x):
# Initial input transition
out = self.in_tr(x)
# Downward transitions
out, residual_0 = self.down_depth0(out)
out, residual_1 = self.down_depth1(out)
out, residual_2 = self.down_depth2(out)
out, residual_3 = self.down_depth3(out)
# Bottom layer
out = self.up_depth4(out)
# Upward transitions
out = self.up_depth3(out, residual_3)
out = self.up_depth2(out, residual_2)
out = self.up_depth1(out, residual_1)
out = self.up_depth0(out, residual_0)
# Pass to convert to 2 channels
out = self.final_conv(out)
# return softmax
out = F.softmax(out)
return out [batch_size, 2, 64, 128, 128]
V Net architecture as described in (https://arxiv.org/pdf/1606.04797.pdf)
That paper has two outputs as they predict two classes:
The network predictions, which consist of two volumes having the same resolution as the original input data, are processed through a soft-max layer which
outputs the probability of each voxel to belong to foreground and to background.
Therefore this is not an autoencoder, where your inputs are passed back through the model as outputs. They use a set of labels which distinguish between their pixels of interest (foreground) and other (background). You will need to change your data if you wish to use the V-net in this manner.
It won't be as simple as designating a channel as output because this will be a classification task rather than a regression task. You will need annotated labels to work with this model architecture.

What is K Max Pooling? How to implement it in Keras?

I have to add a k-max pooling layer in CNN model to detect fake reviews. Please can you let me know how to implement it using keras.
I searched the internet but I got no good resources.
As per this paper, k-Max Pooling is a pooling operation that is a generalisation of the max pooling over the time dimension used in the Max-TDNN sentence model
and different from the local max pooling operations applied in a convolutional network for object recognition (LeCun et al., 1998).
The k-max pooling operation makes it possible
to pool the k most active features in p that may be
a number of positions apart; it preserves the order
of the features, but is insensitive to their specific
positions.
There are few resources which show how to implement it in tensorflow or keras:
How to implement K-Max pooling in Tensorflow or Keras?
https://github.com/keras-team/keras/issues/373
New Pooling Layers For Varying-Length Convolutional Networks
Keras implementation of K-Max Pooling with TensorFlow Backend
There seems to be a solution here as #Anubhav_Singh suggested. This response got almost 5 times more thumbs up (24) than thumbs down (5) on the github keras issues link. I am just quoting it as-is here and let people try it out and say whether it worked for them or not.
Original author: arbackus
from keras.engine import Layer, InputSpec
from keras.layers import Flatten
import tensorflow as tf
class KMaxPooling(Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, **kwargs):
super().__init__(**kwargs)
self.input_spec = InputSpec(ndim=3)
self.k = k
def compute_output_shape(self, input_shape):
return (input_shape[0], (input_shape[2] * self.k))
def call(self, inputs):
# swap last two dimensions since top_k will be applied along the last dimension
shifted_input = tf.transpose(inputs, [0, 2, 1])
# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
# return flattened output
return Flatten()(top_k)
Note: it was reported to be running very slow (though it worked for people).
Check this out. Not thoroughly tested but works fine for me. Let me know what you think. P.S. Latest tensorflow version.
tf.nn.top_k does not preserve the order of occurrence of values. So, that is the think that need to be worked upon
import tensorflow as tf
from tensorflow.keras import layers
class KMaxPooling(layers.Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, axis=1, **kwargs):
super(KMaxPooling, self).__init__(**kwargs)
self.input_spec = layers.InputSpec(ndim=3)
self.k = k
assert axis in [1,2], 'expected dimensions (samples, filters, convolved_values),\
cannot fold along samples dimension or axis not in list [1,2]'
self.axis = axis
# need to switch the axis with the last elemnet
# to perform transpose for tok k elements since top_k works in last axis
self.transpose_perm = [0,1,2] #default
self.transpose_perm[self.axis] = 2
self.transpose_perm[2] = self.axis
def compute_output_shape(self, input_shape):
input_shape_list = list(input_shape)
input_shape_list[self.axis] = self.k
return tuple(input_shape_list)
def call(self, x):
# swap sequence dimension to get top k elements along axis=1
transposed_for_topk = tf.transpose(x, perm=self.transpose_perm)
# extract top_k, returns two tensors [values, indices]
top_k_vals, top_k_indices = tf.math.top_k(transposed_for_topk,
k=self.k, sorted=True,
name=None)
# maintain the order of values as in the paper
# sort indices
sorted_top_k_ind = tf.sort(top_k_indices)
flatten_seq = tf.reshape(transposed_for_topk, (-1,))
shape_seq = tf.shape(transposed_for_topk)
len_seq = tf.shape(flatten_seq)[0]
indices_seq = tf.range(len_seq)
indices_seq = tf.reshape(indices_seq, shape_seq)
indices_gather = tf.gather(indices_seq, 0, axis=-1)
indices_sum = tf.expand_dims(indices_gather, axis=-1)
sorted_top_k_ind += indices_sum
k_max_out = tf.gather(flatten_seq, sorted_top_k_ind)
# return back to normal dimension but now sequence dimension has only k elements
# performing another transpose will get the tensor back to its original shape
# but will have k as its axis_1 size
transposed_back = tf.transpose(k_max_out, perm=self.transpose_perm)
return transposed_back
Here is my implementation of k-max pooling as explained in the comment of #Anubhav Singh above (the order of topk is preserved)
def test60_simple_test(a):
# swap last two dimensions since top_k will be applied along the last dimension
#shifted_input = tf.transpose(a) #[0, 2, 1]
# extract top_k, returns two tensors [values, indices]
res = tf.nn.top_k(a, k=3, sorted=True, name=None)
b = tf.sort(res[1],axis=0,direction='ASCENDING',name=None)
e=tf.gather(a,b)
#e=e[0:3]
return (e)
a = tf.constant([7, 2, 3, 9, 5], dtype = tf.float64)
print('*input:',a)
print('**output', test60_simple_test(a))
The result:
*input: tf.Tensor([7. 2. 3. 9. 5.], shape=(5,), dtype=float64)
**output tf.Tensor([7. 9. 5.], shape=(3,), dtype=float64)
Here is a Pytorch version implementation of k-max pooling:
import torch
def kmax_pooling(x, dim, k):
index = x.topk(k, dim = dim)[1].sort(dim = dim)[0]
return x.gather(dim, index)
Hope it would help.

Exporting a neural network created in Python to CoreML, is that possible?

Is that possible to export a neural network algorithm, like this one published by this guy to a CoreML model?
from numpy import exp, array, random, dot
class NeuralNetwork():
def __init__(self):
# Seed the random number generator, so it generates the same numbers
# every time the program runs.
random.seed(1)
# We model a single neuron, with 3 input connections and 1 output connection.
# We assign random weights to a 3 x 1 matrix, with values in the range -1 to 1
# and mean 0.
self.synaptic_weights = 2 * random.random((3, 1)) - 1
# The Sigmoid function, which describes an S shaped curve.
# We pass the weighted sum of the inputs through this function to
# normalise them between 0 and 1.
def __sigmoid(self, x):
return 1 / (1 + exp(-x))
# The derivative of the Sigmoid function.
# This is the gradient of the Sigmoid curve.
# It indicates how confident we are about the existing weight.
def __sigmoid_derivative(self, x):
return x * (1 - x)
# We train the neural network through a process of trial and error.
# Adjusting the synaptic weights each time.
def train(self, training_set_inputs, training_set_outputs, number_of_training_iterations):
for iteration in xrange(number_of_training_iterations):
# Pass the training set through our neural network (a single neuron).
output = self.think(training_set_inputs)
# Calculate the error (The difference between the desired output
# and the predicted output).
error = training_set_outputs - output
# Multiply the error by the input and again by the gradient of the Sigmoid curve.
# This means less confident weights are adjusted more.
# This means inputs, which are zero, do not cause changes to the weights.
adjustment = dot(training_set_inputs.T, error * self.__sigmoid_derivative(output))
# Adjust the weights.
self.synaptic_weights += adjustment
# The neural network thinks.
def think(self, inputs):
# Pass inputs through our neural network (our single neuron).
return self.__sigmoid(dot(inputs, self.synaptic_weights))
if __name__ == "__main__":
#Intialise a single neuron neural network.
neural_network = NeuralNetwork()
print "Random starting synaptic weights: "
print neural_network.synaptic_weights
# The training set. We have 4 examples, each consisting of 3 input values
# and 1 output value.
training_set_inputs = array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]])
training_set_outputs = array([[0, 1, 1, 0]]).T
# Train the neural network using a training set.
# Do it 10,000 times and make small adjustments each time.
neural_network.train(training_set_inputs, training_set_outputs, 10000)
print "New synaptic weights after training: "
print neural_network.synaptic_weights
# Test the neural network with a new situation.
print "Considering new situation [1, 0, 0] -> ?: "
print neural_network.think(array([1, 0, 0]))
What should be the done?
Yes, this is possible. You can use the NeuralNetworkBuilder class from coremltools for this.

TensorFlow - Classification with thousands of labels

I'm very new to TensorFlow. I've been trying use TensorFlow to create a function where I give it a vector with 6 features and get back a label.
I have a training data set in the form of 6 features and 1 label. The label is in the first column:
309,3,0,2,4,0,6
309,12,0,2,4,0,6
309,0,4,17,2,0,6
318,0,660,414,58,3,12
311,0,0,414,58,0,2
298,0,53,355,5,0,2
60,16,14,381,30,4,2
312,0,8,8,13,0,3
...
I have the index for the labels which is a list of thousand and thousands of names:
309,Joe
318,Joey
311,Bruce
...
How do I create a model and train it using TensorFlow to be able to predict the label, given a vector without the first column?
--
This is what I tried:
from __future__ import print_function
import tflearn
name_count = sum(1 for line in open('../../names.csv')) # this comes out to 24260
# Load CSV file, indicate that the first column represents labels
from tflearn.data_utils import load_csv
data, labels = load_csv('../../data.csv', target_column=0,
categorical_labels=True, n_classes=name_count)
# Build neural network
net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(data, labels, n_epoch=10, batch_size=16, show_metric=True)
# Predict
pred = model.predict([[218,5,124,26,0,3]]) # 326
print("Name:", pred[0][1])
It's based on https://github.com/tflearn/tflearn/blob/master/tutorials/intro/quickstart.md
I get the error:
ValueError: Cannot feed value of shape (16, 24260) for Tensor u'TargetsData/Y:0', which has shape '(?, 2)'
24260 is the number of lines in names.csv
Thank you!
net = tflearn.fully_connected(net, 2, activation='softmax')
looks to be saying you have 2 output classes, but in reality you have 24260. 16 is the size of your minibatch, so you have 16 rows of 24260 columns (one of these 24260 will be a 1, the others will be all 0s).

Parallelizing a tensorflow operation across multiple GPU's

In below code of a single hidden layer neural network I'm attempting to parallelize the gradient descent operation across two GPU's. I'm just attempting to thinking about this conceptually at the moment. There does not appear to be very much literature on how to perform this. Reading Training Multi-GPU on Tensorflow: a simpler way? does not provide a concrete answer. In below code I've added two functions runOnGPU1() & runOnGPU1() which is a conceptual idea of how to split the training of the network across two GPU's. Can these two loops be split in order to share the computation across multiple GPU's ?
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
# a batch of inputs of 2 value each
inputs = tf.placeholder(tf.float32, shape=[None, 2])
# a batch of output of 1 value each
desired_outputs = tf.placeholder(tf.float32, shape=[None, 1])
# [!] define the number of hidden units in the first layer
HIDDEN_UNITS = 4
# connect 2 inputs to 3 hidden units
# [!] Initialize weights with random numbers, to make the network learn
weights_1 = tf.Variable(tf.truncated_normal([2, HIDDEN_UNITS]))
# [!] The biases are single values per hidden unit
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
# connect 2 inputs to every hidden unit. Add bias
layer_1_outputs = tf.nn.sigmoid(tf.matmul(inputs, weights_1) + biases_1)
# [!] The XOR problem is that the function is not linearly separable
# [!] A MLP (Multi layer perceptron) can learn to separe non linearly separable points ( you can
# think that it will learn hypercurves, not only hyperplanes)
# [!] Lets' add a new layer and change the layer 2 to output more than 1 value
# connect first hidden units to 2 hidden units in the second hidden layer
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, 2]))
# [!] The same of above
biases_2 = tf.Variable(tf.zeros([2]))
# connect the hidden units to the second hidden layer
layer_2_outputs = tf.nn.sigmoid(
tf.matmul(layer_1_outputs, weights_2) + biases_2)
# [!] create the new layer
weights_3 = tf.Variable(tf.truncated_normal([2, 1]))
biases_3 = tf.Variable(tf.zeros([1]))
logits = tf.nn.sigmoid(tf.matmul(layer_2_outputs, weights_3) + biases_3)
# [!] The error function chosen is good for a multiclass classification taks, not for a XOR.
error_function = 0.5 * tf.reduce_sum(tf.subtract(logits, desired_outputs) * tf.subtract(logits, desired_outputs))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(error_function)
sess.run(tf.global_variables_initializer())
training_inputs = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
training_outputs = [[0.0], [1.0], [1.0], [0.0]]
def runOnGPU1() :
for i in range(5):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
print(loss)
def runOnGPU2() :
for i in range(5):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
print(loss)
runOnGPU1()
runOnGPU2()

Resources