Hi I am currently struggling between converting between popular 2d keypoint output , from COCO keypoints to openpose . I have the following keypoint order from coco keypoints of the order x1,y1,c1 ....x17,y17,c17 where x,y are the x y cordinates and C is the confidence score of the joints being detected. I was wondering if any one has successfully mapped between Coco and openpose
def convert_coco_to_openpose_cords(coco_keypoints_list):
# coco keypoints: [x1,y1,v1,...,xk,yk,vk] (k=17)
# ['Nose', Leye', 'Reye', 'Lear', 'Rear', 'Lsho', 'Rsho', 'Lelb',
# 'Relb', 'Lwri', 'Rwri', 'Lhip', 'Rhip', 'Lkne', 'Rkne', 'Lank', 'Rank']
# openpose keypoints: [y1,...,yk], [x1,...xk] (k=18, with Neck)
# ['Nose', *'Neck'*, 'Rsho', 'Relb', 'Rwri', 'Lsho', 'Lelb', 'Lwri','Rhip',
# 'Rkne', 'Rank', 'Lhip', 'Lkne', 'Lank', 'Leye', 'Reye', 'Lear', 'Rear']
indices = [0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 1, 2, 3, 4]
y_cords = []
x_cords = []
for i in indices:
xi, yi, vi = coco_keypoints_list[i*3:(i+1)*3]
if vi == 0: # not labeled
y_cords.append(MISSING_VALUE)
x_cords.append(MISSING_VALUE)
elif vi == 1: # labeled but not visible
y_cords.append(yi)
x_cords.append(xi)
elif vi == 2: # labeled and visible
y_cords.append(yi)
x_cords.append(xi)
else:
raise ValueError("vi value: {}".format(vi))
# Get 'Neck' keypoint by interpolating between 'Lsho' and 'Rsho' keypoints
l_shoulder_index = 5
r_shoulder_index = 6
l_shoulder_keypoint = coco_keypoints_list[l_shoulder_index*3:(l_shoulder_index+1)*3]
r_shoulder_keypoint = coco_keypoints_list[r_shoulder_index*3:(r_shoulder_index+1)*3]
if l_shoulder_keypoint[2] > 0 and r_shoulder_keypoint[2] > 0:
neck_keypoint_y = int((l_shoulder_keypoint[1]+r_shoulder_keypoint[1])/2.)
neck_keypoint_x = int((l_shoulder_keypoint[0]+r_shoulder_keypoint[0])/2.)
else:
neck_keypoint_y = neck_keypoint_x = MISSING_VALUE
open_pose_neck_index = 1
y_cords.insert(open_pose_neck_index, neck_keypoint_y)
x_cords.insert(open_pose_neck_index, neck_keypoint_x)
return np.concatenate([np.expand_dims(y_cords, -1),
np.expand_dims(x_cords, -1)], axis=1)
Related
I have a piece of code which I use to visualize a graph:
if (visualize == True):
# Black removed and is used for noise instead.
unique_labels = set(db.labels_)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]
class_member_mask = (db.labels_ == k)
xy = scaled_points[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=14)
xy = scaled_points[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)
# display the graph
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
# get the image into a variable that OpenCV likes
# uh, how?
while this works, I want to have the end result (whatever is being shown) as an OpenCV image.
Since I don't even have the variable -image-, I have no idea how to achieve this.
Did anyone do something similar?
EDIT: I am actually getting close. Now I can create an OpenCV image out of a fig, but the contents are not right. The fig is empty. I wonder where I go wrong? Why doesn't it get the plt object from above and draw the actual content?
fig = plt.figure()
canvas = FigureCanvas(fig)
canvas.draw()
# convert canvas to image
graph_image = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
graph_image = graph_image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
# it still is rgb, convert to opencv's default bgr
graph_image = cv2.cvtColor(graph_image,cv2.COLOR_RGB2BGR)
Okay, I finally got it! One has to create the fig object at the very beginning, then use the necessary plotting functions, then convert to canvas and then to OpenCV image.
EDIT: Thanks to the suggestion of #ImportanceOfBeingErnest, now the code is even more straightforward!
Here is the full code:
if (visualize == True):
# create a figure
fig = plt.figure()
# Black removed and is used for noise instead.
unique_labels = set(db.labels_)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]
class_member_mask = (db.labels_ == k)
xy = scaled_points[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=14)
xy = scaled_points[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)
# convert it to an OpenCV image/numpy array
canvas = FigureCanvas(fig)
canvas.draw()
# convert canvas to image
graph_image = np.array(fig.canvas.get_renderer()._renderer)
# it still is rgb, convert to opencv's default bgr
graph_image = cv2.cvtColor(graph_image,cv2.COLOR_RGB2BGR)
I created a very trivial LSTM to try to predict a short sequence, but it won't overfit and approach a loss of zero the way I expect.
Instead it just converges around a loss of ~1.5, even if it definitely has enough degrees of freedom to learn this sequence verbatim.
import tensorflow as tf
import time
tf.logging.set_verbosity(tf.logging.DEBUG)
#
# Training data, just a single sequence
#
train_input = [[0, 1, 2, 3, 4, 5, 0, 6, 7, 0]]
train_output = [[1, 2, 3, 4, 5, 0, 6, 7, 8, 0]]
#
# Training metadata
#
batch_size = 1
sequence_length = 10
n_classes = 9
# Network size
rnn_cell_size = 10
rnn_layers = 2
embedding_rank = 3
#
# Training hyperparameters
#
epochs = 100
n_batches = 100
learning_rate = 0.01
#
# Model
#
features = tf.placeholder(tf.int32, [None, sequence_length], name="features")
embeddings = tf.Variable(tf.random_uniform([n_classes, embedding_rank], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, features)
cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(rnn_cell_size) for i in range(rnn_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
cell, _ = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
# Convert sequences x batches x outputs to (sequences * batches) x outputs
flat_lstm_output = tf.reshape(cell, [-1, rnn_cell_size])
output = tf.contrib.layers.fully_connected(inputs=flat_lstm_output, num_outputs=n_classes)
softmax = tf.nn.softmax(output)
#
# Training
#
targets = tf.placeholder(tf.int32, [None, sequence_length])
# Convert sequences x batches x targets to (sequences * batches) x targets
flat_targets = tf.reshape(targets, [-1])
loss = tf.losses.sparse_softmax_cross_entropy(flat_targets, softmax)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
loss_sum = 0
epoch_start = time.time()
for j in range(n_batches):
_, step_loss = sess.run([train_op, loss], {
features: train_input,
targets: train_output,
})
loss_sum = loss_sum + step_loss
print('avg_loss', loss_sum / n_batches, 'avg_time', (time.time() - epoch_start) / n_batches)
I get the feeling something very basic is missing here - what am I doing wrong?
EDIT
I tried to simplify it even more, and now I'm down to the following even more trivial example (that also doesn't converge):
import tensorflow as tf
import time
tf.logging.set_verbosity(tf.logging.DEBUG)
#
# Training data, just a single sequence
#
train_input = [0, 1, 2, 3, 4]
train_output = [1, 2, 3, 4, 5]
#
# Training metadata
#
batch_size = 1
sequence_length = 5
n_classes = 6
#
# Training hyperparameters
#
epochs = 100
n_batches = 100
learning_rate = 0.01
#
# Model
#
features = tf.placeholder(tf.int32, [None])
one_hot = tf.contrib.layers.one_hot_encoding(features, n_classes)
output = tf.contrib.layers.fully_connected(inputs=one_hot, num_outputs=10)
output = tf.contrib.layers.fully_connected(inputs=output, num_outputs=n_classes)
#
# Training
#
targets = tf.placeholder(tf.int32, [None])
one_hot_targets = tf.one_hot(targets, depth=n_classes)
loss = tf.losses.softmax_cross_entropy(one_hot_targets, output)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
loss_sum = 0
epoch_start = time.time()
for j in range(n_batches):
_, step_loss = sess.run([train_op, loss], {
features: train_input,
targets: train_output,
})
loss_sum = loss_sum + step_loss
print('avg_loss', loss_sum / n_batches, 'avg_time', (time.time() - epoch_start) / n_batches)
Did you check the lower values for the learning rate (e.g., 0.001 or 0.0001)?
Your networks aren't fitting (let alone overfitting) because you don't have enough data. The LSTM has only one sequence and the MLP has 5 datapoints.
Compare this with the number of parameters you need to estimate: your MLP has 120 parameters (if I'm counting correctly). There is no way you can estimate all these with only 5 datapoints unless you're very lucky. (you can make it more likely to converge by splitting your sequence into smaller batches, but even then it won't converge very often).
In short, neural networks need a decent amount of data to be usable.
The answer was three-fold.
1) The example without the RNN converges if I replace the default activation in the fully connected layers (relu) with tanh.
This seems to be because the relu ignores a lot of input (everything below zero) and doesn't provide a gradient at all. With more input it might have worked.
2) The example WITH the RNN needs to remove the activation in the final fully connected layer (before the softmax) completely using None - it doesn't converge well (or at all, in most combinations) with an activation of the fully connected layer in front of the softmax.
3) The RNN example also needs to remove the explicit softmax, since sparse_softmax_cross_entropy applies softmax already.
Finally working code:
import tensorflow as tf
import time
tf.logging.set_verbosity(tf.logging.DEBUG)
#
# Training data, just a single sequence
#
train_input = [[0, 1, 2, 3, 4, 5, 0, 6, 7, 0]]
train_output = [[1, 2, 3, 4, 5, 0, 6, 7, 8, 0]]
#
# Training metadata
#
batch_size = 1
sequence_length = 10
n_classes = 9
# Network size
rnn_cell_size = 10
rnn_layers = 2
embedding_rank = 3
#
# Training hyperparameters
#
epochs = 100
n_batches = 100
learning_rate = 0.01
#
# Model
#
features = tf.placeholder(tf.int32, [None, sequence_length], name="features")
embeddings = tf.Variable(tf.random_uniform([n_classes, embedding_rank], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, features)
cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(rnn_cell_size) for i in range(rnn_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
cell, _ = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
# Convert [batche_size, sequence_length, rnn_cell_size] to [(batch_size * sequence_length), rnn_cell_size]
flat_lstm_output = tf.reshape(cell, [-1, rnn_cell_size])
output = tf.contrib.layers.fully_connected(inputs=flat_lstm_output, num_outputs=n_classes, activation_fn=None)
#
# Training
#
targets = tf.placeholder(tf.int32, [None, sequence_length])
# Convert [batch_size, sequence_length] to [batch_size * sequence_length]
flat_targets = tf.reshape(targets, [-1])
loss = tf.losses.sparse_softmax_cross_entropy(flat_targets, output)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
loss_sum = 0
epoch_start = time.time()
for j in range(n_batches):
_, step_loss = sess.run([train_op, loss], {
features: train_input,
targets: train_output,
})
loss_sum = loss_sum + step_loss
print('avg_loss', loss_sum / n_batches, 'avg_time', (time.time() - epoch_start) / n_batches)
I am building CNN fitting my own data, based on this example
Basically, my data has 3640 features; I have a convolution layer followed by a pooling layer, that pools every other feature, so I end up with dimensions (?, 1, 1819, 1) because 3638 features after conv layer / 2 == 1819.
When I try to reshape my data after pooling to get it in the form [n_samples, n_fetures]
print("pool_shape", pool_shape) #pool (?, 1, 1819, 10)
print("y_shape", y_shape) #y (?,)
pool.set_shape([pool_shape[0], pool_shape[2]*pool_shape[3]])
y.set_shape([y_shape[0], 1])
I get an error:
ValueError: Shapes (?, 1, 1819, 10) and (?, 18190) are not compatible
My code:
N_FEATURES = 140*26
N_FILTERS = 1
WINDOW_SIZE = 3
def my_conv_model(x, y):
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
print("x ", x.get_shape())
print("y ", y.get_shape())
# to form a 4d tensor of shape batch_size x 1 x N_FEATURES x 1
x = tf.reshape(x, [-1, 1, N_FEATURES, 1])
# this will give you sliding window of 1 x WINDOW_SIZE convolution.
features = tf.contrib.layers.convolution2d(inputs=x,
num_outputs=N_FILTERS,
kernel_size=[1, WINDOW_SIZE],
padding='VALID')
print("features ", features.get_shape()) #features (?, 1, 3638, 10)
# Max pooling across output of Convolution+Relu.
pool = tf.nn.max_pool(features, ksize=[1, 1, 2, 1],
strides=[1, 1, 2, 1], padding='SAME')
pool_shape = pool.get_shape()
y_shape = y.get_shape()
print("pool_shape", pool_shape) #pool (?, 1, 1819, 10)
print("y_shape", y_shape) #y (?,)
### here comes the error ###
pool.set_shape([pool_shape[0], pool_shape[2]*pool_shape[3]])
y.set_shape([y_shape[0], 1])
pool_shape = pool.get_shape()
y_shape = y.get_shape()
print("pool_shape", pool_shape) #pool (?, 1, 1819, 10)
print("y_shape", y_shape) #y (?,)
prediction, loss = learn.models.logistic_regression(pool, y)
return prediction, loss
How to reshape the data to get any meaningful representation of it and to later pass it to logistic regression layer?
This looks like a confusion between the Tensor.set_shape() method and the tf.reshape() operator. In this case, you should use tf.reshape() because you are changing the shape of the pool and y tensors:
The tf.reshape(tensor, shape) operator takes a tensor of any shape, and returns a tensor with the given shape, as long as they have the same number of elements. This operator should be used to change the shape of the input tensor.
The tensor.set_shape(shape) method takes a tensor that might have a partially known or unknown shape, and asserts to TensorFlow that it actually has the given shape. This method should be used to provide more information about the shape of a particular tensor.
It can be used, e.g., when you take the output of an operator that has a data-dependent output shape (such as tf.image.decode_jpeg()) and assert that it has a static shape (e.g. based on knowledge about the sizes of images in your dataset).
In your program, you should replace the calls to set_shape() with something like the following:
pool_shape = tf.shape(pool)
pool = tf.reshape(pool, [pool_shape[0], pool_shape[2] * pool_shape[3]])
y_shape = tf.shape(y)
y = tf.reshape(y, [y_shape[0], 1])
# Or, more straightforwardly:
y = tf.expand_dims(y, 1)
I'm trying to train simple neural network
that consists of:
Convolution layer filter (5x5) x 8, stride 2.
Max pooling 25x25 (the image has kinda low amount of details)
Flatting output into (2x2x8) vector
Classifier with logistic regression
Altogether network has < 1000 weights.
File: nn.py
#!/bin/python
import tensorflow as tf
import create_batch
# Prepare data
batch = create_batch.batch
x = tf.reshape(batch[0], [-1,100,100,3])
y_ = batch[1]
# CONVOLUTION NETWORK
# For initialization
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.3)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.2, shape=shape)
return tf.Variable(initial)
# Convolution with stride 1
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='SAME')
def max_pool_25x25(x):
return tf.nn.max_pool(x, ksize=[1, 25, 25, 1],
strides=[1, 25, 25, 1], padding='SAME')
# First layer
W_conv1 = weight_variable([5, 5, 3, 8])
b_conv1 = bias_variable([8])
x_image = tf.reshape(x, [-1,100,100,3])
# First conv1
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_25x25(h_conv1)
# Dense connection layer
# make data flat
W_fc1 = weight_variable([2 * 2 * 8, 2])
b_fc1 = bias_variable([2])
h_pool1_flat = tf.reshape(h_pool1, [-1, 2*2*8])
y_conv = tf.nn.softmax(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
#Learning
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Session
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(200):
if i%10 == 0:
train_accuracy = accuracy.eval(session=sess)
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(session=sess)
File: create_batch.py
#!/bin/python
import tensorflow as tf
PATH1 = "../dane/trening/NK/"
PATH2 = "../dane/trening/K/"
def create_labeled_image_list():
filenames = [(PATH1 + "nk_%d.png" % i) for i in range(300)]
labels = [[1,0] for i in range(300)]
filenames += [(PATH2 + "kulki_%d.png" % i) for i in range(300)]
labels += [[0,1] for i in range(300)]
return filenames, labels
def read_images_from_disk(input_queue):
label = input_queue[1]
file_contents = tf.read_file(input_queue[0])
example = tf.image.decode_png(file_contents, channels=3)
example.set_shape([100, 100, 3])
example = tf.to_float(example)
print ("READ, label:")
print(label)
return example, label
# Start
image_list, label_list = create_labeled_image_list()
# Create appropriate tensors for naming
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list, dtype=tf.float32)
input_queue = tf.train.slice_input_producer([images, labels],
shuffle=True)
image, label = read_images_from_disk(input_queue)
batch = tf.train.batch([image, label], batch_size=600)
I'm feeding 100x100 images i have two classess 300 images each.
Basically randomly initialzied network at step 0 has better accuracy than trained one.
Network stops learning after it reaches 0.5 accuracy (basically coin flip). Images contain blue blooby thing (class 1) or grass (class 2).
I'm traning network using whole imageset at once (600 images), the loss function is cross entropy.
What I'm doing wrong?
OK, I've find a fix there were two errors, now the network is learning.
Images were RGBA despite the fact I declared them as RGB in tf
I did not perform normalization of Images to [-1,1] float32.
In tensorflow it should be done with something like this:
# i use "im" for image
tf.image.convert_image_dtype(im, dtype=float32)
im = tf.sub(im, -0.5)
im = tf.mul(im, 2.0)
To all newbies to ML - prepare your data with caution!
Thanks.
I am modifying the tutorial here, http://deeplearning.net/tutorial/lenet.html, to a 3D convolutional neural network.
However, I met a problem when I run, as,
Traceback (most recent call last):
File "cnnProgram.py", line 191, in <module>
minibatch_avg_cost = train_model(minibatch_index)
File "/Volumes/TONY/anaconda/lib/python2.7/site-packages/theano/compile/function_module.py", line 871, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/Volumes/TONY/anaconda/lib/python2.7/site-packages/theano/gof/link.py", line 314, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/Volumes/TONY/anaconda/lib/python2.7/site-packages/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: total size of new array must be unchanged
Apply node that caused the error: Reshape{4}(InplaceDimShuffle{0,4,1,2,3}.0, TensorConstant{[5000 5.. 12 5]})
Toposort index: 102
Inputs types: [TensorType(float64, 5D), TensorType(int64, vector)]
Inputs shapes: [(200, 10, 5, 12, 5), (4,)]
Inputs strides: [(24000, 8, 4800, 400, 80), (8,)]
Inputs values: ['not shown', array([5000, 5, 12, 5])]
Outputs clients: [[CorrMM_gradWeights{valid, (1, 1)}(Reshape{4}.0, Reshape{4}.0), CorrMM{valid, (1, 1)}(Reshape{4}.0, Subtensor{::, ::, ::int64, ::int64}.0)]]
Here is my code, it seems the problem occurs when I change the dimension order with .dimshuffle? This is too weird, and I completely can't figure out why.
Here is my code.
from __future__ import print_function
import scipy.io as sio
import numpy as np
import theano.tensor as T
import theano
from theano import shared
from lasagne.layers import InputLayer, DenseLayer
import os
import sys
import timeit
from mlp import LogRegr, HiddenLayer, DropoutLayer
from convnet3d import ConvLayer, NormLayer, PoolLayer, RectLayer
from activations import relu, tanh, sigmoid, softplus
# Get data
dataReadyForCNN_withValid = sio.loadmat("DataReadyForCNN_withValid.mat")
xTrain = dataReadyForCNN_withValid["xTrain"]
xTrain = xTrain.astype("float64")
yTrainCond = dataReadyForCNN_withValid["yTrainCond"]
yTrainCond = yTrainCond.astype("int32")
yTrainWord = dataReadyForCNN_withValid["yTrainWord"]
yTrainWord = yTrainWord.astype("int32")
xValidate = dataReadyForCNN_withValid["xTrain"]
xValidate = xValidate.astype("float64")
yValidateCond = dataReadyForCNN_withValid["yValidateCond"]
yValidateCond = yValidateCond.astype("int32")
yValidateWord = dataReadyForCNN_withValid["yValidateWord"]
yValidateWord = yValidateWord.astype("int32")
xTest = dataReadyForCNN_withValid["xTest"]
xTest = xTest.astype("float64")
yTestCond = dataReadyForCNN_withValid["yTestCond"]
yTestCond = yTestCond.astype("int32")
yTestWord = dataReadyForCNN_withValid["yTestWord"]
yTestWord = yTestWord.astype("int32")
##################################
# Build Model
#################################
# xTrain = np.random.rand(500, 1, 51, 61, 23).astype('float64')
dtensor5 = T.TensorType('float64', (False,)*5)
x = dtensor5('x') # the input data
y = T.ivector()
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
# input = (nImages, nChannel(nFeatureMaps), nDim1, nDim2, nDim3)
# layer1 (500, 5, 47, 56, 22)
# layer2 (500, 5, 10, 12, 5)
# layer3 (500, 3, 9, 11, 4)
# layer4 (500, 3, 5, 6, 2)
kernel_shape = (5,6,2)
fMRI_shape = (51, 61, 23)
n_in_maps = 1 # channel
n_out_maps = 5 # num of feature maps, aka the depth of the neurons
batch_size = 200
# 1st: Convolution Layer
layer1_input = x
layer1 = ConvLayer(layer1_input, 1, 5, (5, 6, 2), fMRI_shape,
batch_size, tanh)
# print layer1.output.eval({x:xTrain[:500]}).shape
# 2nd: Pool layer
poolShape = (5, 5, 5)
layer2 = PoolLayer(layer1.output, poolShape)
# print layer2.output.eval({x:xTrain}).shape
# 3rd: Convolution Layer
layer3 = ConvLayer(layer2.output, 5, 3, (2, 2, 2), (10, 12, 5),
500, tanh)
# print layer3.output.eval({x:xTrain[:500]}).shape
# 4th: Pool layer
layer4 = PoolLayer(layer3.output, (2, 2, 2))
# print layer4.output.eval({x:xTrain[:500]}).shape
# 5th: Dense layer
layer5_input = T.flatten(layer4.output, outdim=2)
layer5 = HiddenLayer(layer5_input, n_in=180, n_out=500, activation=tanh)
# layer5.output.eval({x:xTrain[:500]}).shape
# 6th: Logistic layer
layer6 = LogRegr(layer5.output, 500, 12, tanh)
cost = layer6.negative_log_likelihood(y)
# create a function to compute the mistakes that are made by the model
test_model = theano.function(
[index],
layer6.errors(y),
givens={
x: shared(xTest)[index * batch_size: (index + 1) * batch_size],
y: shared(yTestCond[0])[index * batch_size: (index + 1) * batch_size]
}
)
validate_model = theano.function(
[index],
layer6.errors(y),
givens={
x: shared(xValidate)[index * batch_size: (index + 1) * batch_size],
y: shared(yValidateCond[0])[index * batch_size: (index + 1) * batch_size]
}
)
# create a list of all model parameters to be fit by gradient descent
params = layer5.params + layer3.params + layer1.params + layer6.params
# create a list of gradients for all model parameters
grads = T.grad(cost, params)
# train_model is a function that updates the model parameters by
# SGD Since this model has many parameters, it would be tedious to
# manually create an update rule for each model parameter. We thus
# create the updates list by automatically looping over all
# (params[i], grads[i]) pairs.
learning_rate=0.1
updates = [
(param_i, param_i - learning_rate * grad_i)
for param_i, grad_i in zip(params, grads)
]
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: shared(xTrain)[index * batch_size: (index + 1) * batch_size],
y: shared(yTrainCond[0])[index * batch_size: (index + 1) * batch_size]
}
)
###############
# TRAIN MODEL #
###############
import timeit
print('... training')
n_train_batches = 10
n_test_batches = 10
n_validate_batches = 10
n_epochs=200
# early-stopping parameters
patience = 10000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience // 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = np.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in range(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i) for i
in range(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = [test_model(i) for i
in range(n_test_batches)]
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print(('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%') %
(best_validation_loss * 100., best_iter + 1, test_score * 100.))
Because I did not have your data. So I changed the getting data part of your code and then tested it. I did not get any bug.
Make sure your input data is in the right form(with the shape of (50, 1, 51, 61, 23) in your case).