attempt to index global 'optim' (a nil value) - lua

require 'torch';
require 'nn';
require 'nnx';
mnist = require 'mnist';
fullset = mnist.traindataset()
testset = mnist.testdataset()
trainset = {
size = 50000,
data = fullset.data[{{1,50000}}]:double(),
label = fullset.label[{{1,50000}}]
}
validationset = {
size = 10000,
data = fullset.data[{{50001, 60000}}]:double(),
label = fullset.label[{{50001,60000}}]
}
-- MNIST Dataset has 28x28 images
model = nn.Sequential()
model:add(nn.SpatialConvolutionMM(1, 32, 5, 5)) -- 32x24x24
model:add(nn.ReLU())
model:add(nn.SpatialMaxPooling(3, 3, 3, 3)) -- 32x8x8
model:add(nn.SpatialConvolutionMM(32, 64, 5, 5)) -- 64x4x4
model:add(nn.Tanh())
model:add(nn.SpatialMaxPooling(2, 2, 2, 2)) -- 64x2x2
model:add(nn.Reshape(64*2*2))
model:add(nn.Linear(64*2*2, 200))
model:add(nn.Tanh())
model:add(nn.Linear(200, 10))
model:add(nn.LogSoftMax())
criterion = nn.ClassNLLCriterion()
x, dldx = model:getParameters() -- now x stores the trainable parameters and dldx stores the gradient wrt these params in the model above
sgd_params = {
learningRate = 1e-2,
learningRateDecay = 1e-4,
weightDecay = 1e-3,
momentum = 1e-4
}
step = function ( batchsize )
-- setting up variables
local count = 0
local current_loss = 0
local shuffle = torch.randperm(trainset.size)
-- setting default batchsize as 200
batchsize = batchsize or 200
-- setting inputs and targets for minibatches
for minibatch_number = 1, trainset.size, batchsize do
local size = math.min( trainset.size - minibatch_number + 1, batchsize )
local inputs = torch.Tensor(size, 28, 28)
local targets = torch.Tensor(size)
for index = 1, size do
inputs[index] = trainset.data[ shuffle[ index + minibatch_number ]]
targets[index] = trainset.label[ shuffle[ index + minibatch_number ] ]
end
-- defining feval function to return loss and gradients of loss w.r.t. params
feval = function( x_new )
--print ( "---------------------------------safe--------------------")
if x ~= x_new then x:copy(x_new) end
-- initializing gradParsams to zero
dldx:zero()
-- calculating loss and param gradients
local loss = criterion:forward( model.forward( inputs ), targets )
model:backward( inputs, criterion:backward( model.output, targets ) )
return loss, dldx
end
-- getting loss
-- optim returns x*, {fx} where x* is new set of params and {fx} is { loss } => fs[ 1 ] carries loss from feval
print(feval ~= nil and x ~= nil and sgd_params ~= nil)
_,fs = optim.sgd(feval, x, sgd_params)
count = count + 1
current_loss = current_loss + fs[ 1 ]
end
--returning avg loss over the minibatch
return current_loss / count
end
max_iters = 30
for i = 1 ,max_iters do
local loss = step()
print(string.format('Epoch: %d Current loss: %4f', i, loss))
end
I am new to torch and lua and I'm not able to find an error in the above code. Can anyone suggest a way to debug it?
The error:
/home/afroz/torch/install/bin/luajit: /home/afroz/test.lua:88: attempt to index global 'optim' (a nil value)
stack traceback:
/home/afroz/test.lua:88: in function 'step'
/home/afroz/test.lua:102: in main chunk
[C]: in function 'dofile'
...froz/torch/install/lib/luarocks/rocks/trepl/scm-1/bin/th:145: in main chunk
[C]: at 0x00406670

optim is not defined in the scope of your script. You try to call optim.sgd which of course results in the error you see.
Like nn, optim is a extension package to torch.
require 'torch';
require 'nn';
require 'nnx';
Remember those lines in the beginning of your script? They basically execute the definition of those packages.
Make sure optim is installed, then try to require it.
https://github.com/torch/optim

optim is not assigned anywhere in the script, so when the script references optim.sgd, its value is nil and you get the error you shown. You need to doublecheck the script to make sure the optim is assigned the correct value.

Related

I can't really figure out what's wrong with my neural network class (Lua)

local matrix = require("matrixx")
local Class = require("class")
NeuralNetwork = Class{}
function NeuralNetwork:init(input_nodes, hidden_nodes, output_nodes)
self.input_nodes = input_nodes
self.hidden_nodes = hidden_nodes
self.output_nodes = output_nodes
self.weights_ih = matrix(self.hidden_nodes, self.input_nodes, math.random())
self.weights_ho = matrix(self.output_nodes, self.hidden_nodes, math.random())
self.bias_h = matrix(self.hidden_nodes, 1, math.random())
self.bias_o = matrix(self.output_nodes, 1, math.random())
self.learning_rate = 0.1
end
function NeuralNetwork:feedforward(input_array)
--Generating the Hidden Outputs
local inputs = matrix(input_array)
for i=1, #input_array do
inputs[i][1] = input_array[i]
end
local hidden = self.weights_ih * inputs
hidden = hidden + self.bias_h
--Activation Function
hidden = matrix.map(hidden, tanh)
--Generating the output's output
local output = self.weights_ho * hidden
output = output + self.bias_o
output = matrix.map(output, tanh)
return output
end
function NeuralNetwork:train(input_array, target_array)
--Generating the Hidden Outputs
local inputs = matrix(input_array)
for i=1, #input_array do
inputs[i][1] = input_array[i]
end
local hidden = self.weights_ih * inputs
hidden = hidden + self.bias_h
--Activation Function
hidden = matrix.map(hidden, tanh)
--Generating the output's output
local outputs = self.weights_ho * hidden
outputs = outputs + self.bias_o
outputs = matrix.map(outputs, tanh)
--Convert Targets Array to Matrix object
local targets = matrix(#target_array, 1)
for i=1, #target_array do
targets[i][1] = target_array[i]
end
--Calculate the error
local output_errors = targets - outputs
--Calculate gradient
local gradients = matrix.map(outputs, tanhd)
gradients = gradients * output_errors
gradients = gradients * self.learning_rate
-- Calculate deltas
local hidden_T = matrix.transpose(hidden)
local weight_ho_deltas = gradients * hidden_T
-- Adjust the weights by deltas
self.weights_ho = self.weights_ho + weight_ho_deltas
-- Adjust the bias by its deltas (which is just the gradients)
self.bias_o = self.bias_o + gradients
-- Calculate the hidden layer errors
local who_t = matrix.transpose(self.weights_ho)
local hidden_errors = who_t * output_errors
-- Calculate hidden gradient
local hidden_gradient = matrix.map(hidden, tanhd)
hidden_gradient = hidden_gradient * hidden_errors * self.learning_rate
-- Calcuate input->hidden deltas
local inputs_T = matrix.transpose(inputs)
local weight_ih_deltas = hidden_gradient * inputs_T
self.weights_ih = self.weights_ih + weight_ih_deltas
-- Adjust the bias by its deltas (which is just the gradients)
self.bias_h = self.bias_h + hidden_gradient
self.weights_ih:print()
print()
self.weights_ho:print()
print()
end
function sigmoid(x)
return 1 / (1 + math.exp(-x))
end
function dsigmoid(x)
return sigmoid(x) * (1 - sigmoid(x))
end
function tanh(x)
return (math.exp(x) - math.exp(-x)) / (math.exp(x) + math.exp(-x))
end
function tanhd(x)
return 1 / math.cosh(x)^2
end
--MAIN
local nn = NeuralNetwork(2, 2, 1)
local training_data = {
{
inputs = {0, 1},
target = {1}
},
{
inputs = {1, 1},
target = {0}
},
{
inputs = {1, 0},
target = {1}
},
{
inputs = {0, 0},
target = {0}
}
}
for i = 1, 30 do
local data = training_data[math.floor(math.random(#training_data))]
nn:train(data.inputs, data.target)
end
nn:feedforward({0, 1}):print()
nn:feedforward({1, 1}):print()
nn:feedforward({0, 0}):print()
nn:feedforward({1, 0}):print()
I wrote this NeuralNetwork class.
I used a class library and a matrix library
Respectively class matrix
Seems like it's all correct to me (ideally, at least), btw when I instantiate a new NN with 2 inputs, 2 hidden neurons and 1 output and try to solve a XOR, it doesn't work.
What am i missing? Maybe I misunderstood the matrix library, hope someone can help me
EDIT:
I added a map function in the library to apply a math function to every number in a matrix.
function matrix.map( m1, func )
local mtx = {}
for i = 1,#m1 do
mtx[i] = {}
for j = 1,#m1[1] do
mtx[i][j] = func(m1[i][j])
end
end
return setmetatable( mtx, matrix_meta )
end

Error in function rembuff:floor() in lua code file: attempt to call method 'floor' (a nil value)

In Machine Translation Dataset I have successfully pre-trained my model in Lua. Now I move to train my model.
But I get the error in a Lua file in the function rembuff:floor()
Error: Attempt to call method 'floor' (a nil value)
This is that specific function :
function MarginBatchBeamSearcher:nextSearchStep(t, batch_pred_inp, batch_ctx, beam_dec, beam_scorer,gold_scores, target, target_w, gold_rnn_state_dec, delts, losses, global_noise)
local K = self.K
local resval, resind, rembuff = self.resval, self.resind, self.rembuff
local finalval, finalind = self.finalval, self.finalind
self:synchDropout(t, global_noise)
-- pred_inp should be what was predicted at the last step
local outs = beam_dec:forward({batch_pred_inp, batch_ctx, unpack(self.prev_state)})
local all_scores = beam_scorer:forward(outs[#outs]) -- should be (batch_l*K) x V matrix
local V = all_scores:size(2)
local mistaken_preds = {}
for n = 1, self.batch_size do
delts[n] = 0
losses[n] = 0
if t <= target_w[n]-1 then -- only do things if t <= length (incl end token) - 2
local beam_size = #self.pred_pfxs[n]
local nstart = (n-1)*K+1
local nend = n*K
local scores = all_scores:sub(nstart, nstart+beam_size-1):view(-1) -- scores for this example
-- take top K
torch.topk(resval, resind, scores, K, 1, true)
-- see if we violated margin
torch.min(finalval, finalind, resval, 1) -- resind[finalind[1]] is idx of K'th highest predicted word
-- checking that true score at least 1 higher than K'th
losses[n] = math.max(0, 1 - gold_scores[n][target[t+1][n]] + finalval[1])
-- losses[n] = math.max(0, - gold_scores[n][target[t+1][n]] + finalval[1])
if losses[n] > 0 then
local parent_idx = math.ceil(resind[finalind[1]]/V)
local pred_word = ((resind[finalind[1]]-1)%V) + 1
mistaken_preds[n] = {prev = self.pred_pfxs[n][parent_idx], val = pred_word}
delts[n] = 1 -- can change.....
else
-- put predicted next words in pred_inp
rembuff:add(resind, -1) -- set rembuff = resind - 1
rembuff:div(V)
--if rembuff.floor then
rembuff:floor()
I am unable to rectify this error :
Please help !

Tensorflow Neural Network for Binary Classicication; how do I use placeholder

Here is my code:
My target is a vector with shape(N,) which is a vector with only binary numbers
However, I'm running into compiling errors
/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/Lai/Dropbox/PersonalProject/MachineLearningForSports/models/NeuralNetwork.py
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
Traceback (most recent call last):
File "/Users/Lai/Dropbox/PersonalProject/MachineLearningForSports/models/NeuralNetwork.py", line 102, in <module>
_, c = sess.run([optimizer,cost],feed_dict = {x:batch_x,y:batch_y})
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 943, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0', which has shape '(?, 2)'
Since my batch size is 100; I believe that the error is at when comparing my target to my predictions. The tf.placable seems make the prediction with N*2, although I'm sure. Any help ?? Thanks
import tensorflow as tf
import DataPrepare as dp
import numpy as np
def random_init(x,num_feature_1st,num_feature_2nd,num_class):
W1 = tf.Variable(tf.random_normal([num_feature_1st,num_feature_2nd]))
bias1 = tf.Variable(tf.random_normal([num_feature_2nd]))
W2 = tf.Variable(tf.random_normal([num_feature_2nd,num_class]))
bias2 = tf.Variable(tf.random_normal([num_class]))
return [W1,bias1,W2,bias2]
def softsign(z):
"""The softsign function, applied elementwise."""
return z / (1. + np.abs(z))
def multilayer_perceptron(x,num_feature_1st,num_feature_2nd,num_class):
params = random_init(x,num_feature_1st,num_feature_2nd,num_class)
layer_1 = tf.add(tf.matmul(x,params[0]),params[1])
layer_1 = softsign(layer_1)
#layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1,params[2]),params[3])
#output = tf.nn.softmax(layer_2)
output = tf.nn.sigmoid(layer_2)
return output
def next_batch(num, dataX,dataY):
idx = np.arange(0,len(dataX))
np.random.shuffle(idx)
idx = idx[0:num]
dataX_shuffle = [dataX[i] for i in idx]
dataY_shuffle = [dataY[i] for i in idx]
dataX_shuffle = np.asarray(dataX_shuffle)
dataY_shuffle = np.asarray(dataY_shuffle)
return dataX_shuffle, dataY_shuffle
if __name__ == "__main__":
#sess = tf.InteractiveSession()
learning_rate = 0.001
training_epochs = 10
batch_size = 100
display_step = 1
num_feature_1st = 6
num_feature_2nd = 500
num_class = 2
x = tf.placeholder('float', [None, 6])
y = tf.placeholder('float',[None,2])
data = dp.dataPrepare(dp.datas,dp.path)
trainX = data[0]
testX = data[1] # a matrix
trainY = data[2] # a vector with binary number
testY = data[3]
params = random_init(x,num_feature_1st,num_feature_2nd,num_class)
# construct model
pred = multilayer_perceptron(x, num_feature_1st, num_feature_2nd, num_class)
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
#train
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(trainX[:,0])/batch_size)
for i in range(total_batch):
batch_x, batch_y = next_batch(batch_size,trainX,trainY)
_, c = sess.run([optimizer,cost],feed_dict = {x:batch_x,y:batch_y})
avg_cost += c/total_batch
if epoch % display_step ==0:
print("Epoch: ", "%04d" % (epoch+1), " cost= ", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
Whenever you execute a dynamic node from the computation graph - which is pretty much any node that is not an input - you need to specify all dependent variables. Think about this this way: If you had a mathematical function of the form
y = f(x) = Ax + b (for example)
and you wanted to evaluate that function, you need to specify x as well. You need not, however, specify x if you wanted to evaluate (i.e. read) the value of A, since A is known (at least in this context).
Consequently, you can evaluate (by passing it to tf.Session.run(...) the parameters of your network without specifying the inputs (A in the example above). You cannot, however, evaluate the output of your functions without specifying the inputs (in the example, you need to specify x).
As for your code, the following line will thus not work:
print(sess.run(pred)), since you ask the session to evaluate a function without specifying its inputs.

Error while running # StochasticGradient: training. Torch, lua

I'm trying to train a feed forward neural network for the first time in torch. Here's my dataset: http://ocw.mit.edu/courses/sloan-school-of-management/15-097-prediction-machine-learning-and-statistics-spring-2012/datasets/transfusion.csv
Here's the code (based, http://mdtux89.github.io/2015/12/11/torch-tutorial.html):
require 'nn'
mlp = nn.Sequential()
inputSize = 4
hiddenLayer1Size = 4
hiddenLayer2Size = 4
mlp:add(nn.Linear(inputSize,hiddenLayer1Size)) -- row, coulm
mlp:add(nn.Tanh())
mlp:add(nn.Linear(hiddenLayer1Size,hiddenLayer2Size))
mlp:add(nn.Tanh())
nclasses = 1
mlp:add(nn.Linear(hiddenLayer2Size,nclasses))
mlp:add(nn.LogSoftMax())
output = mlp:forward(torch.rand(1,4))
print(output)
-- TRAINING using inbuilt stochastic gradient descent, 2 params: network, criterian fun. --
LRate = 0.1
criterion = nn.ClassNLLCriterion()
trainer = nn.StochasticGradient(mlp, criterion)
trainer.learningRate = LRate
function string:splitAtCommas()
local sep, values = ",", {}
local pattern = string.format("([^%s]+)", sep)
self:gsub(pattern, function(c) values[#values+1] = c end)
return values
end
function loadData(dataFile)
local dataset,i = {},0
for line in io.lines(dataFile) do
local values = line:splitAtCommas()
local y = torch.Tensor(1)
y[1] = values[#values] -- the target class is the last number in the line
values[#values] = nil
local x = torch.Tensor(values) -- the input data is all the other numbers
dataset[i] = {x, y}
i = i + 1
end
function dataset:size() return (i - 1) end -- the requirement mentioned
return dataset
end
dataset = loadData("transfusion.csv")
trainer:train(dataset)
Here's the error report:
# StochasticGradient: training
/Users/drdre/torch/install/share/lua/5.1/nn/THNN.lua:109: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at /Users/drdre/torch/extra/nn/lib/THNN/generic/ClassNLLCriterion.c:38
stack traceback:
[C]: in function 'v'
/Users/drdre/torch/install/share/lua/5.1/nn/THNN.lua:109: in function 'ClassNLLCriterion_updateOutput'
...dre/torch/install/share/lua/5.1/nn/ClassNLLCriterion.lua:41: in function 'forward'
...re/torch/install/share/lua/5.1/nn/StochasticGradient.lua:35: in function 'f'
[string "local f = function() return trainer:train(dat..."]:1: in main chunk
[C]: in function 'xpcall'
/Users/drdre/torch/install/share/lua/5.1/itorch/main.lua:209: in function </Users/drdre/torch/install/share/lua/5.1/itorch/main.lua:173>
/Users/drdre/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
/Users/drdre/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
/Users/drdre/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
/Users/drdre/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
/Users/drdre/torch/install/share/lua/5.1/itorch/main.lua:381: in main chunk
[C]: in function 'require'
(command line):1: in main chunk
[C]: at 0x0105e4cd10
Use nclasses = 2 and y[1] = values[#values] + 1. See the doc:
a desired output y (an integer 1 to n, in this case n = 2 classes)

How to compute the gradient of loss with repect to an arbitrary layer/weight in Torch?

I'm transiting from Theano to Torch. So please bear with me. In Theano, it was kind of straight-forward to compute the gradients of loss function w.r.t even a specific weight. I wonder, how can one do this in Torch?
Assume we have the following code which generates some data/labels and defines a model :
t = require 'torch'
require 'nn'
require 'cunn'
require 'cutorch'
-- Generate random labels
function randLabels(nExamples, nClasses)
-- nClasses: number of classes
-- nExamples: number of examples
label = {}
for i=1, nExamples do
label[i] = t.random(1, nClasses)
end
return t.FloatTensor(label)
end
inputs = t.rand(1000, 3, 32, 32) -- 1000 samples, 3 color channels
inputs = inputs:cuda()
labels = randLabels(inputs:size()[1], 10)
labels = labels:cuda()
net = nn.Sequential()
net:add(nn.SpatialConvolution(3, 6, 5, 5))
net:add(nn.ReLU())
net:add(nn.SpatialMaxPooling(2, 2, 2, 2))
net:add(nn.View(6*14*14))
net:add(nn.Linear(6*14*14, 300))
net:add(nn.ReLU())
net:add(nn.Linear(300, 10))
net = net:cuda()
-- Loss
criterion = nn.CrossEntropyCriterion()
criterion = criterion:cuda()
forwardPass = net:forward(inputs)
net:zeroGradParameters()
dEd_WeightsOfLayer1 -- How to compute this?
forwardPass = nil
net = nil
criterion = nil
inputs = nil
labels = nil
collectgarbage()
How can I compute the gradient w.r.t weights of convolutinal layer?
Okay, I found the answer (thanks to alban desmaison on Torch7 Google group).
The code in the question has a bug and does not work. So I re-write the code. Here's how you can get the gradients with respect to each node/parameter:
t = require 'torch'
require 'cunn'
require 'nn'
require 'cutorch'
-- A function to generate some random labels
function randLabels(nExamples, nClasses)
-- nClasses: number of classes
-- nExamples: number of examples
label = {}
for i=1, nExamples do
label[i] = t.random(1, nClasses)
end
return t.FloatTensor(label)
end
-- Declare some variables
nClass = 10
kernelSize = 5
stride = 2
poolKernelSize = 2
nData = 100
nChannel = 3
imageSize = 32
-- Generate some [random] data
data = t.rand(nData, nChannel, imageSize, imageSize) -- 100 Random images with 3 channels
data = data:cuda() -- Transfer to the GPU (remove this line if you're not using GPU)
label = randLabels(data:size()[1], nClass)
label = label:cuda() -- Transfer to the GPU (remove this line if you're not using GPU)
-- Define model
net = nn.Sequential()
net:add(nn.SpatialConvolution(3, 6, 5, 5))
net:add(nn.ReLU())
net:add(nn.SpatialMaxPooling(poolKernelSize, poolKernelSize, stride, stride))
net:add(nn.View(6*14*14))
net:add(nn.Linear(6*14*14, 350))
net:add(nn.ReLU())
net:add(nn.Linear(350, 10))
net = net:cuda() -- Transfer to the GPU (remove this line if you're not using GPU)
criterion = nn.CrossEntropyCriterion()
criterion = criterion:cuda() -- Transfer to the GPU (remove this line if you're not using GPU)
-- Do forward pass and get the gradient for each node/parameter:
net:forward(data) -- Do the forward propagation
criterion:forward(net.output, label) -- Computer the overall negative log-likelihood error
criterion:backward(net.output, label); -- Don't forget to put ';'. Otherwise you'll get everything printed on the screen
net:backward(data, criterion.gradInput); -- Don't forget to put ';'. Otherwise you'll get everything printed on the screen
-- Now you can access the gradient values
layer1InputGrad = net:get(1).gradInput
layer1WeightGrads = net:get(1).gradWeight
net = nil
data = nil
label = nil
criterion = nil
Copy and paste the code and it works like charm :)

Resources