Why is all the outputs from my Neural Network the same when using the ReLU activation function? - machine-learning

When using the ReLU activation function in my Neural Network I noticed all the outputs were the same. The Sigmoid activation function works just fine. Is there something wrong with the ReLU function? Why is this happening? What can I do to fix this?
The example is a simple XOR Neural Network written in Lua.
math.randomseed(os.time())
local nn = require("NeuralNetwork")
network = nn.new(2,1,1,4,0.2, "ReLU")
local x=0
local attempts = 100000
for i = 1,attempts do
x+=1
if x > 10000 then
wait()
x=0
end
network:backPropagate({0, 0}, {0})
network:backPropagate({1, 0}, {1})
network:backPropagate({0, 1}, {1})
network:backPropagate({1, 1}, {0})
end
print("0 0 | "..network:feedForward({0,0})[1])
print("1 0 | "..network:feedForward({1,0})[1])
print("0 1 | "..network:feedForward({0,1})[1])
print("1 1 | "..network:feedForward({1,1})[1])
Output:
0 0 | 0.48780487804878037
1 0 | 0.48780487804878037
0 1 | 0.48780487804878037
1 1 | 0.48780487804878037
- Library -
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1+math.exp(-x)) end,
ReLU = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLU = function(x) return x >= 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum += (expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers+2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum += net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
--print(outputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](outputs[i])
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i+1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta += nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn

Related

Why is Lua treating tables as numbers?

I was playing around with Lua and neural networks, and I stumbled across a weird bug in my code, where Lua appears to be treating a table as a number. I have the following code...
function CreateEmptyNeuron()
local neuron = {
val = 0,
forward = { },
backward = { },
}
return neuron;
end
--layerSize is an integer representing the size of the layer we're creating.
function CreateLayer(layerSize)
local layer = {}
for i = 1, layerSize, 1 do
local n = CreateEmptyNeuron();
table.insert(layer, n)
end
return layer;
end
--layerSize is actually an array of integers representing the array sizes of
--each individual layer (first layer is the input layer, last layer is output layer)
function CreateLayers(numLayers, layerSize)
local layers = {}
for i = 1, numLayers, 1 do
local layer = CreateLayer(layerSize[i]);
table.insert(layers, layer)
end
return layers;
end
--This function initializes the "val" variable in each table, and
--forward connects each neuron to every node in the next layer...
function ForwardConnectLayers(network)
for i = 1, #network, 1 do
local layer = network[i]
local next_layer = nil
if (i+1) < #network then
next_layer = network[i+1]
else
print("We have reached the output layer...")
next_layer = nil
end
for j = 1, #layer, 1 do
local neuron = layer[j]
neuron.val = (math.random() + math.random(0, 100))/100;
if next_layer ~= nil then
for x = 1, #next_layer, 1 do
neuron.forward[x] = math.random(1, 100)/100
print("Found forward layer...")
end
else
print("We cannot forward connect the output layer...\n")
end
end
end
end
function BackwardConnectLayers(network)
for i = 1, #network, 1 do
local layer = network[i]
local prev_layer = nil
if (i-1) > 1 then
prev_layer = network[i-1]
else
print("We have reached the input layer...")
prev_layer = nil
end
for j = #layer, 1, -1 do
local neuron = layer[j]
--neuron.val = (math.random() + math.random(0, 100))/100;
if prev_layer ~= nil then
for x = 1, #prev_layer, 1 do
table.insert(neuron.backward, prev_layer[x])
print("Found input layer...")
end
else
print("We cannot backward connect the input layer...\n")
end
end
end
end
function CreateBrain()
local LAYER_SIZES = {
10, 20, 20, 5
}
local brain = CreateLayers(4, LAYER_SIZES)
ForwardConnectLayers(brain)
BackwardConnectLayers(brain)
return brain;
end
AI = CreateBrain();
AI.Run = function(inputs, expectedOutputs)
local input_layer = AI[1]
local output_layer = AI[#AI]
for i = 0, #inputs, 1 do
input_layer[i] = inputs[i]
end
--For each layer in the network...
for l = 1, #AI, 1 do
--Get the next layer and this layer...
local this_layer = AI[l]
local next_layer = AI[l+1]
--And for each neuron in the next layer...
--Multiply the value of the neuron in this layer by
--the value of the modifier, and set the value of the next
--Neuron to be nextneuron.val + sum(<x>.val*<x>[k])
if next_layer ~= nil then
for m = 1, #next_layer, 1 do
local prev_layer_sum = 0
for n = 1, #this_layer, 1 do
local neuron = this_layer[n]
print(neuron)
end
end
end
end
end
ai_inputs = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
expected_outputs = { 8, 8, 8, 8, 8 }
AI.Run(ai_inputs, expected_outputs)
When I run the code as shown, it, as expected, prints a list of addresses to memory for Lua tables:
But then, if I alter AI.Run to the following:
AI.Run = function(inputs, expectedOutputs)
local input_layer = AI[1]
local output_layer = AI[#AI]
for i = 0, #inputs, 1 do
input_layer[i] = inputs[i]
end
--For each layer in the network...
for l = 1, #AI, 1 do
--Get the next layer and this layer...
local this_layer = AI[l]
local next_layer = AI[l+1]
--And for each neuron in the next layer...
--Multiply the value of the neuron in this layer by
--the value of the modifier, and set the value of the next
--Neuron to be nextneuron.val + sum(<x>.val*<x>[k])
if next_layer ~= nil then
for m = 1, #next_layer, 1 do
local prev_layer_sum = 0
for n = 1, #this_layer, 1 do
local neuron = this_layer[n]
for k, v in pairs(neuron) do
print(k .. ", " .. v)
end
end
end
end
end
end
I get the following error message:
Indicating that Lua thinks the table is a number... which is, suffice to say, confusing. There's probably some language technicality defining this behavior, but I can't find good documentation surrounding this (in part because I don't even know what term to search; I'm not usually a Lua programmer).
EDIT:
So, it appears, somewhere in the code, numbers are getting inserted into the layers...
(Link to online Lua compiler where you can run the code: http://tpcg.io/_WPRPQV)
Your AI.Run function does
local input_layer = AI[1]
for i = 0, #inputs, 1 do
input_layer[i] = inputs[i]
end
where inputs is a table of numbers, i.e. the first layer of your network is replaced by just numbers.
You probably want to replace the val of the neuron in that layer instead:
local input_layer = AI[1]
for i = 0, #inputs, 1 do
input_layer[i].val = inputs[i]
end

What is the problem with my Gradient Descent Algorithm or how its applied?

I've been trying figure out what I have done wrong for many hours, but just can't figure out. I've even looked at other basic Neural Network libraries to make sure that my gradient descent algorithms were correct, but it still isn't working.
I'm trying to teach it XOR but it outputs -
input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245
after 1000 trainings, so clearly there's something wrong.
The code is written in lua and I created the Neural Network from raw data so you can easily understand how the data is formatted.
- Training code -
math.randomseed(os.time())
local nn = require("NeuralNetwork")
local network = nn.newFromRawData({
["activationFunction"] = "sigmoid",
["learningRate"] = 0.3,
["net"] = {
[1] = {
[1] = {
["value"] = 0
},
[2] = {
["value"] = 0
}
},
[2] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[2] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[3] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[4] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
}
},
[3] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1,
[3] = 1,
[4] = 1
}
}
}
}
})
attempts = 1000
for i = 1,attempts do
network:backPropagate({0,0},{0})
network:backPropagate({1,0},{1})
network:backPropagate({0,1},{1})
network:backPropagate({1,1},{0})
end
print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])
- Library -
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1+math.exp(-x/1)) end,
ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum += 1/2*(expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers+2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum += net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i+1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta += nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn
Any help would be highly appreciated, thanks!
All initial weights must be DIFFERENT numbers, otherwise backpropagation will not work. For example, you can replace 1 with math.random()
Increase number of attempts to 10000
With these modifications, your code works fine:
Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456

Chainer how to save and load DQN model

I'm learning the Deep Reinforcement learning
framework Chainer.
I've followed a tutorial and gotten the following code:
def train_dddqn(env):
class Q_Network(chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super(Q_Network, self).__init__(
fc1=L.Linear(input_size, hidden_size),
fc2=L.Linear(hidden_size, hidden_size),
fc3=L.Linear(hidden_size, hidden_size // 2),
fc4=L.Linear(hidden_size, hidden_size // 2),
state_value=L.Linear(hidden_size // 2, 1),
advantage_value=L.Linear(hidden_size // 2, output_size)
)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def __call__(self, x):
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
hs = F.relu(self.fc3(h))
ha = F.relu(self.fc4(h))
state_value = self.state_value(hs)
advantage_value = self.advantage_value(ha)
advantage_mean = (F.sum(advantage_value, axis=1) / float(self.output_size)).reshape(-1, 1)
q_value = F.concat([state_value for _ in range(self.output_size)], axis=1) + (
advantage_value - F.concat([advantage_mean for _ in range(self.output_size)], axis=1))
return q_value
def reset(self):
self.cleargrads()
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy(Q)
optimizer = chainer.optimizers.Adam()
optimizer.setup(Q)
epoch_num = 50
step_max = len(env.data) - 1
memory_size = 200
batch_size = 50
epsilon = 1.0
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
gamma = 0.97
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time()
for epoch in range(epoch_num):
pobs = env.reset()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint(3)
if np.random.rand() > epsilon:
pact = Q(np.array(pobs, dtype=np.float32).reshape(1, -1))
pact = np.argmax(pact.data)
# act
obs, reward, done = env.step(pact)
# add memory
memory.append((pobs, pact, reward, obs, done))
if len(memory) > memory_size:
memory.pop(0)
# train or update q
if len(memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation(memory)
memory_idx = range(len(shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array(shuffled_memory[i:i + batch_size])
b_pobs = np.array(batch[:, 0].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_pact = np.array(batch[:, 1].tolist(), dtype=np.int32)
b_reward = np.array(batch[:, 2].tolist(), dtype=np.int32)
b_obs = np.array(batch[:, 3].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_done = np.array(batch[:, 4].tolist(), dtype=np.bool)
q = Q(b_pobs)
indices = np.argmax(q.data, axis=1)
maxqs = Q_ast(b_obs).data
target = copy.deepcopy(q.data)
for j in range(batch_size):
Q.reset()
loss = F.mean_squared_error(q, target)
total_loss += loss.data
loss.backward()
optimizer.update()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy(Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append(total_reward)
total_losses.append(total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum(total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum(total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time() - start
print('\t'.join(map(str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time()
return Q, total_losses, total_rewards
Q, total_losses, total_rewards = train_dddqn(Environment1(train))
My question is how can I save and load this Model which has been train very well?I know Kreas has some function like: model.save and load_model.
So what's the specify code I need for this Chainer code?
You can use serializer module to save/load chainer's model's parameter (Chain class).
from chainer import serializers
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
# --- train Q here... ---
# copy Q parameter into Q_ast by saving Q's parameter and load to Q_ast
serializers.save_npz('my.model', Q)
serializers.load_npz('my.model', Q_ast)
See official document for details:
http://docs.chainer.org/en/stable/guides/serializers.html
Also, you may refer chainerrl, which is a chainer library for reinforcement learning.
https://github.com/chainer/chainerrl
chainerrl have a util function copy_param to copy parameter from network source_link to target_link.
https://github.com/chainer/chainerrl/blob/master/chainerrl/misc/copy_param.py#L12-L30

I can't really figure out what's wrong with my neural network class (Lua)

local matrix = require("matrixx")
local Class = require("class")
NeuralNetwork = Class{}
function NeuralNetwork:init(input_nodes, hidden_nodes, output_nodes)
self.input_nodes = input_nodes
self.hidden_nodes = hidden_nodes
self.output_nodes = output_nodes
self.weights_ih = matrix(self.hidden_nodes, self.input_nodes, math.random())
self.weights_ho = matrix(self.output_nodes, self.hidden_nodes, math.random())
self.bias_h = matrix(self.hidden_nodes, 1, math.random())
self.bias_o = matrix(self.output_nodes, 1, math.random())
self.learning_rate = 0.1
end
function NeuralNetwork:feedforward(input_array)
--Generating the Hidden Outputs
local inputs = matrix(input_array)
for i=1, #input_array do
inputs[i][1] = input_array[i]
end
local hidden = self.weights_ih * inputs
hidden = hidden + self.bias_h
--Activation Function
hidden = matrix.map(hidden, tanh)
--Generating the output's output
local output = self.weights_ho * hidden
output = output + self.bias_o
output = matrix.map(output, tanh)
return output
end
function NeuralNetwork:train(input_array, target_array)
--Generating the Hidden Outputs
local inputs = matrix(input_array)
for i=1, #input_array do
inputs[i][1] = input_array[i]
end
local hidden = self.weights_ih * inputs
hidden = hidden + self.bias_h
--Activation Function
hidden = matrix.map(hidden, tanh)
--Generating the output's output
local outputs = self.weights_ho * hidden
outputs = outputs + self.bias_o
outputs = matrix.map(outputs, tanh)
--Convert Targets Array to Matrix object
local targets = matrix(#target_array, 1)
for i=1, #target_array do
targets[i][1] = target_array[i]
end
--Calculate the error
local output_errors = targets - outputs
--Calculate gradient
local gradients = matrix.map(outputs, tanhd)
gradients = gradients * output_errors
gradients = gradients * self.learning_rate
-- Calculate deltas
local hidden_T = matrix.transpose(hidden)
local weight_ho_deltas = gradients * hidden_T
-- Adjust the weights by deltas
self.weights_ho = self.weights_ho + weight_ho_deltas
-- Adjust the bias by its deltas (which is just the gradients)
self.bias_o = self.bias_o + gradients
-- Calculate the hidden layer errors
local who_t = matrix.transpose(self.weights_ho)
local hidden_errors = who_t * output_errors
-- Calculate hidden gradient
local hidden_gradient = matrix.map(hidden, tanhd)
hidden_gradient = hidden_gradient * hidden_errors * self.learning_rate
-- Calcuate input->hidden deltas
local inputs_T = matrix.transpose(inputs)
local weight_ih_deltas = hidden_gradient * inputs_T
self.weights_ih = self.weights_ih + weight_ih_deltas
-- Adjust the bias by its deltas (which is just the gradients)
self.bias_h = self.bias_h + hidden_gradient
self.weights_ih:print()
print()
self.weights_ho:print()
print()
end
function sigmoid(x)
return 1 / (1 + math.exp(-x))
end
function dsigmoid(x)
return sigmoid(x) * (1 - sigmoid(x))
end
function tanh(x)
return (math.exp(x) - math.exp(-x)) / (math.exp(x) + math.exp(-x))
end
function tanhd(x)
return 1 / math.cosh(x)^2
end
--MAIN
local nn = NeuralNetwork(2, 2, 1)
local training_data = {
{
inputs = {0, 1},
target = {1}
},
{
inputs = {1, 1},
target = {0}
},
{
inputs = {1, 0},
target = {1}
},
{
inputs = {0, 0},
target = {0}
}
}
for i = 1, 30 do
local data = training_data[math.floor(math.random(#training_data))]
nn:train(data.inputs, data.target)
end
nn:feedforward({0, 1}):print()
nn:feedforward({1, 1}):print()
nn:feedforward({0, 0}):print()
nn:feedforward({1, 0}):print()
I wrote this NeuralNetwork class.
I used a class library and a matrix library
Respectively class matrix
Seems like it's all correct to me (ideally, at least), btw when I instantiate a new NN with 2 inputs, 2 hidden neurons and 1 output and try to solve a XOR, it doesn't work.
What am i missing? Maybe I misunderstood the matrix library, hope someone can help me
EDIT:
I added a map function in the library to apply a math function to every number in a matrix.
function matrix.map( m1, func )
local mtx = {}
for i = 1,#m1 do
mtx[i] = {}
for j = 1,#m1[1] do
mtx[i][j] = func(m1[i][j])
end
end
return setmetatable( mtx, matrix_meta )
end

Language Modelling with RNN and LSTM Cell in Tensorflow

My RNN for language modelling is predicting only "the" "and" and "unknown" what's wrong with my code?
Here I define the hyper parameters:
num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0
Here I generate the data (my input is given by word embeddings long 100 calculated with Word2Vec):
def generateData():
uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
x_tr = np.array([model_ted[word] for words in uniqueSent])
#Roll array elements along a given axis.
#Elements that roll beyond the last position are re-introduced at the first.
x_tr = x_tr.reshape((100, batch_size, -1)) # The first index changing slowest, subseries as rows
x = x_tr.transpose((1, 2, 0))
print("hi")
new_y = indexList[1: len(indexList)- 4]
new_y.append(indexList[len(indexList)-3])
y = np.array(new_y)
print(len(y))
y = y.reshape((batch_size, -1))
return (x, y)
Define the placeholders:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
Inputs and desired outputs:
labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder
Forward pass:
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__)
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)
init_state = tf.zeros([batch_size, cell.state_size])
outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)
iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits)
for logits, labels in zip(logits_series, labels_series)]
total_loss = tf.add_n(losses)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
x,y = generateData()
del(model_ted)
Training:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
print("start")
_current_state = np.zeros((batch_size, 2*state_size))
#avevo genrateData fuori e -currentstate dentro
for epoch_idx in range(num_epochs):
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx,:]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state, _predictions_series = sess.run(
[total_loss, train_step, current_state, predictions_series],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
del(batchX)
del(batchY)
perplexity = 2 ** (_total_loss/truncated_backprop_length )
print(perplexity)
del(perplexity)
_predictions_series = np.array(_predictions_series)
pr = _predictions_series.transpose([1, 0, 2])
pr_ind = []
for line in pr[0]:
pr_ind.append(np.argmax(line))
for index in pr_ind:
print(decoder[index], end = " " )
del(pr_ind)
print("\n learning rate: ", end = " ")
print(learning_rate)

Resources