I have built a regular ANN–BP setup with one unit on input and output layer and 4 nodes in hidden with sigmoid. Giving it a simple task to approximate linear f(n) = n with n in range 0-100.
PROBLEM: Regardless of number of layers, units in hidden layer or whether or not I am using bias in node values it learns to approximate f(n) = Average(dataset) like so:
Code is written in JavaScript as a proof of concept. I have defined three classes: Net, Layer and Connection, where Layer is an array of input, bias and output values, Connection is a 2D array of weights and delta weights. Here is the Layer code where all important calculations happen:
Ann.Layer = function(nId, oNet, oConfig, bUseBias, aInitBiases) {
var _oThis = this;
var _initialize = function() {
_oThis.id = nId;
_oThis.length = oConfig.nodes;
_oThis.outputs = new Array(oConfig.nodes);
_oThis.inputs = new Array(oConfig.nodes);
_oThis.gradients = new Array(oConfig.nodes);
_oThis.biases = new Array(oConfig.nodes);
_oThis.outputs.fill(0);
_oThis.inputs.fill(0);
_oThis.biases.fill(0);
if (bUseBias) {
for (var n=0; n<oConfig.nodes; n++) {
_oThis.biases[n] = Ann.random(aInitBiases[0], aInitBiases[1]);
}
}
};
/****************** PUBLIC ******************/
this.id;
this.length;
this.inputs;
this.outputs;
this.gradients;
this.biases;
this.next;
this.previous;
this.inConnection;
this.outConnection;
this.isInput = function() { return !this.previous; }
this.isOutput = function() { return !this.next; }
this.calculateGradients = function(aTarget) {
var n, n1, nOutputError,
fDerivative = Ann.Activation.Derivative[oConfig.activation];
if (this.isOutput()) {
for (n=0; n<oConfig.nodes; n++) {
nOutputError = this.outputs[n] - aTarget[n];
this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
}
} else {
for (n=0; n<oConfig.nodes; n++) {
nOutputError = 0.0;
for (n1=0; n1<this.outConnection.weights[n].length; n1++) {
nOutputError += this.outConnection.weights[n][n1] * this.next.gradients[n1];
}
// console.log(this.id, nOutputError, this.outputs[n], fDerivative(this.outputs[n]));
this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
}
}
}
this.updateInputWeights = function() {
if (!this.isInput()) {
var nY,
nX,
nOldDeltaWeight,
nNewDeltaWeight;
for (nX=0; nX<this.previous.length; nX++) {
for (nY=0; nY<this.length; nY++) {
nOldDeltaWeight = this.inConnection.deltaWeights[nX][nY];
nNewDeltaWeight =
- oNet.learningRate
* this.previous.outputs[nX]
* this.gradients[nY]
// Add momentum, a fraction of old delta weight
+ oNet.learningMomentum
* nOldDeltaWeight;
if (nNewDeltaWeight == 0 && nOldDeltaWeight != 0) {
console.log('Double overflow');
}
this.inConnection.deltaWeights[nX][nY] = nNewDeltaWeight;
this.inConnection.weights[nX][nY] += nNewDeltaWeight;
}
}
}
}
this.updateInputBiases = function() {
if (bUseBias && !this.isInput()) {
var n,
nNewDeltaBias;
for (n=0; n<this.length; n++) {
nNewDeltaBias =
- oNet.learningRate
* this.gradients[n];
this.biases[n] += nNewDeltaBias;
}
}
}
this.feedForward = function(a) {
var fActivation = Ann.Activation[oConfig.activation];
this.inputs = a;
if (this.isInput()) {
this.outputs = this.inputs;
} else {
for (var n=0; n<a.length; n++) {
this.outputs[n] = fActivation(a[n] + this.biases[n]);
}
}
if (!this.isOutput()) {
this.outConnection.feedForward(this.outputs);
}
}
_initialize();
}
The main feedForward and backProp functions are defined like so:
this.feedForward = function(a) {
this.layers[0].feedForward(a);
this.netError = 0;
}
this.backPropagate = function(aExample, aTarget) {
this.target = aTarget;
if (aExample.length != this.getInputCount()) { throw "Wrong input count in training data"; }
if (aTarget.length != this.getOutputCount()) { throw "Wrong output count in training data"; }
this.feedForward(aExample);
_calculateNetError(aTarget);
var oLayer = null,
nLast = this.layers.length-1,
n;
for (n=nLast; n>0; n--) {
if (n === nLast) {
this.layers[n].calculateGradients(aTarget);
} else {
this.layers[n].calculateGradients();
}
}
for (n=nLast; n>0; n--) {
this.layers[n].updateInputWeights();
this.layers[n].updateInputBiases();
}
}
Connection code is rather simple:
Ann.Connection = function(oNet, oConfig, aInitWeights) {
var _oThis = this;
var _initialize = function() {
var nX, nY, nIn, nOut;
_oThis.from = oNet.layers[oConfig.from];
_oThis.to = oNet.layers[oConfig.to];
nIn = _oThis.from.length;
nOut = _oThis.to.length;
_oThis.weights = new Array(nIn);
_oThis.deltaWeights = new Array(nIn);
for (nX=0; nX<nIn; nX++) {
_oThis.weights[nX] = new Array(nOut);
_oThis.deltaWeights[nX] = new Array(nOut);
_oThis.deltaWeights[nX].fill(0);
for (nY=0; nY<nOut; nY++) {
_oThis.weights[nX][nY] = Ann.random(aInitWeights[0], aInitWeights[1]);
}
}
};
/****************** PUBLIC ******************/
this.weights;
this.deltaWeights;
this.from;
this.to;
this.feedForward = function(a) {
var n, nX, nY, aOut = new Array(this.to.length);
for (nY=0; nY<this.to.length; nY++) {
n = 0;
for (nX=0; nX<this.from.length; nX++) {
n += a[nX] * this.weights[nX][nY];
}
aOut[nY] = n;
}
this.to.feedForward(aOut);
}
_initialize();
}
And my activation functions and derivatives are defined like so:
Ann.Activation = {
linear : function(n) { return n; },
sigma : function(n) { return 1.0 / (1.0 + Math.exp(-n)); },
tanh : function(n) { return Math.tanh(n); }
}
Ann.Activation.Derivative = {
linear : function(n) { return 1.0; },
sigma : function(n) { return n * (1.0 - n); },
tanh : function(n) { return 1.0 - n * n; }
}
And configuration JSON for the network is as follows:
var Config = {
id : "Config1",
learning_rate : 0.01,
learning_momentum : 0,
init_weight : [-1, 1],
init_bias : [-1, 1],
use_bias : false,
layers: [
{nodes : 1},
{nodes : 4, activation : "sigma"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1},
{from : 1, to : 2}
]
}
Perhaps, your experienced eye can spot the problem with my calculations?
See example in JSFiddle
I did not look extensively at the code (because it is a lot of code to look at, would need to take more time for that later, and I am not 100% familiar with javascript). Either way, I believe Stephen introduced some changes in how the weights are calculated, and his code seems to give correct results, so I'd recommend looking at that.
Here are a few points though that are not necessarily about the correctness of computations, but may still help:
How many examples are you showing the network for training? Are you showing the same input multiple times? You should show every example that you have (inputs) multiple times; showing every example only one time is not sufficient for algorithms based on gradient descent to learn, since they only move a little bit in the correct direction every time. It is possible that all of your code is correct, but you simply have to give it a bit more time to train.
Introducing more hidden layers like Stephen did may help to speed up training, or it may be detrimental. This is typically something you'd want to experiment with for your specific case. It definitely shouldn't be necessary for this simple problem though. I suspect a more important difference between your configuration and Stephen's configuration may be the activation function used in the hidden layer(s). You used a sigmoid, which means that all of the input values get squashed to lie below 1.0 in the hidden layer, and then you need to very large weights to transform these numbers back to the desired output (which can be up to a value of 100). Stephen used linear activation functions for all layers, which in this specific case is likely to make training much easier because you are actually trying to learn a linear function. In many other cases it would be desirable to introduce non-linearities though.
It may be beneficial to transform (normalize) both your input and your desired output to lie in [0, 1] instead of [0, 100]. This would make it more likely for your sigmoid layer to produce good results (though I'm still not sure if it would be enough, because you're still introducing a nonlinearity in a case where you intend to learn a linear function, and you may need more hidden nodes to correct for that). In ''real-world'' cases, where you have multiple different input variables, this is also typically done, because it ensures that all input variables are treated as being equally important initially. You could always do a preprocessing step where you normalize the input to [0, 1], give that as input to the network, train it to produce output in [0, 1], and then add a postprocessing step where you transform the output back to the original range.
First... I really like this code. I know very little about NNs (just getting started) so pardon my lacking here if any.
Here is a summary of the changes I made:
//updateInputWeights has this in the middle now:
nNewDeltaWeight =
oNet.learningRate
* this.gradients[nY]
/ this.previous.outputs[nX]
// Add momentum, a fraction of old delta weight
+ oNet.learningMomentum
* nOldDeltaWeight;
//updateInputWeights has this at the bottom now:
this.inConnection.deltaWeights[nX][nY] += nNewDeltaWeight; // += added
this.inConnection.weights[nX][nY] += nNewDeltaWeight;
// I modified the following:
_calculateNetError2 = function(aTarget) {
var oOutputLayer = _oThis.getOutputLayer(),
nOutputCount = oOutputLayer.length,
nError = 0.0,
nDelta = 0.0,
n;
for (n=0; n<nOutputCount; n++) {
nDelta = aTarget[n] - oOutputLayer.outputs[n];
nError += nDelta;
}
_oThis.netError = nError;
};
The config section looks like this now:
var Config = {
id : "Config1",
learning_rate : 0.001,
learning_momentum : 0.001,
init_weight : [-1.0, 1.0],
init_bias : [-1.0, 1.0],
use_bias : false,
/*
layers: [
{nodes : 1, activation : "linear"},
{nodes : 5, activation : "linear"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1}
,{from : 1, to : 2}
]
*/
layers: [
{nodes : 1, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1}
,{from : 1, to : 2}
,{from : 2, to : 3}
,{from : 3, to : 4}
,{from : 4, to : 5}
]
}
Related
I'm trying to implement my own neat implementation and I can't get myself to understand how speciation works
I tried my best to follow the pesudocode I found in this paper (start of page 13)
but I'm think I'm doing it really wrong but I don't understand the right way to do it, here is my code
the sepciate function that splits the population into species:
function speciate(population, species=[]) {
let newSpecies = [...species];
for(const net of population) {
let placed = false;
for(const s of newSpecies) {
for(const member of s) {
if(sh(net, member)) {
s.push(net);
placed = true;
break;
}
}
if(placed) break;
}
if(!placed) {
newSpecies.push([net]);
}
}
return newSpecies;
}
the repopulation function that generates a new population using the number of offsprings:
function repopulate(popCount, species) {
let globalAvg = 0;
species.forEach(s => {
globalAvg += s.reduce((P, net) => P + net.genome.fitness, 0) / s.length;
});
let newPop = [];
for(const s of species) {
let N = popCount;
let sAvg = s.reduce((P, net) => P + net.genome.fitness, 0) / s.length;
let offspringCount = (sAvg / globalAvg) * N;
for(let i = 0; i < offspringCount; i++) {
let parent1 = wheelSelect(s);
let parent2 = wheelSelect(s);
let child = parent1.genome.crossover(parent2.genome);
child.mutateAddNeuron(0.01);
child.mutateAddConnection(0.01);
child.mutateWeight(0.01);
child.mutateEnabledToggle(0.01);
child.layerNeurons();
let net = new NeuralNetwork();
net.wireUsingGenome(child);
newPop.push(net);
}
}
return newPop;
}
the problem I'm facing is that the population number seems to change every new generation sometimes it goes up and sometimes down, so I'm gussing I'm calculating the offspring count wrong or my speciation isn't working correctly but I can't figure it out
any help is appreciated!
I have a highcharts sankey diagram with two sides:
There are situations where some of my nodes have empty links (=with 0 weight). I would like the node to being displayed despite having no link from or to it.
Any chance I can achieve this?
I read on this thread that I have to fake it with weight=1 connexions, I could make the link transparent, and twitch the tooltip to hide those, but that's very painful for something that feels pretty basic.
Maybe a custom call of the generateNode call or something?
Thanks for the help
You can use the following wrap to show a node when the weight is 0.
const isObject = Highcharts.isObject,
merge = Highcharts.merge
function getDLOptions(
params
) {
const optionsPoint = (
isObject(params.optionsPoint) ?
params.optionsPoint.dataLabels : {}
),
optionsLevel = (
isObject(params.level) ?
params.level.dataLabels : {}
),
options = merge({
style: {}
}, optionsLevel, optionsPoint);
return options;
}
Highcharts.wrap(
Highcharts.seriesTypes.sankey.prototype,
'translateNode',
function(proceed, node, column) {
var translationFactor = this.translationFactor,
series = this,
chart = this.chart,
options = this.options,
sum = node.getSum(),
nodeHeight = Math.max(Math.round(sum * translationFactor),
this.options.minLinkWidth),
nodeWidth = Math.round(this.nodeWidth),
crisp = Math.round(options.borderWidth) % 2 / 2,
nodeOffset = column.sankeyColumn.offset(node,
translationFactor),
fromNodeTop = Math.floor(Highcharts.pick(nodeOffset.absoluteTop, (column.sankeyColumn.top(translationFactor) +
nodeOffset.relativeTop))) + crisp,
left = Math.floor(this.colDistance * node.column +
options.borderWidth / 2) + Highcharts.relativeLength(node.options.offsetHorizontal || 0,
nodeWidth) +
crisp,
nodeLeft = chart.inverted ?
chart.plotSizeX - left :
left;
node.sum = sum;
proceed.apply(this, Array.prototype.slice.call(arguments, 1));
if (1) {
// Draw the node
node.shapeType = 'rect';
node.nodeX = nodeLeft;
node.nodeY = fromNodeTop;
let x = nodeLeft,
y = fromNodeTop,
width = node.options.width || options.width || nodeWidth,
height = node.options.height || options.height || nodeHeight;
if (chart.inverted) {
x = nodeLeft - nodeWidth;
y = chart.plotSizeY - fromNodeTop - nodeHeight;
width = node.options.height || options.height || nodeWidth;
height = node.options.width || options.width || nodeHeight;
}
// Calculate data label options for the point
node.dlOptions = getDLOptions({
level: (this.mapOptionsToLevel)[node.level],
optionsPoint: node.options
});
// Pass test in drawPoints
node.plotX = 1;
node.plotY = 1;
// Set the anchor position for tooltips
node.tooltipPos = chart.inverted ? [
(chart.plotSizeY) - y - height / 2,
(chart.plotSizeX) - x - width / 2
] : [
x + width / 2,
y + height / 2
];
node.shapeArgs = {
x,
y,
width,
height,
display: node.hasShape() ? '' : 'none'
};
} else {
node.dlOptions = {
enabled: false
};
}
}
);
Demo:
http://jsfiddle.net/BlackLabel/uh6fp89j/
In the above solution, another node arrangement would be difficult to achieve and may require a lot of modifications beyond our scope of support.
You can consider using mentioned "tricky solution", since might return a better positioning result. This solution is based on changing 0 weight nodes on the chart.load() event and converting the tooltip as well, so it may require adjustment to your project.
chart: {
events: {
load() {
this.series[0].points.forEach(point => {
if (point.weight === 0) {
point.update({
weight: 0.1,
color: 'transparent'
})
}
})
}
}
},
tooltip: {
nodeFormatter: function() {
return `${this.name}: <b>${Math.floor(this.sum)}</b><br/>`
},
pointFormatter: function() {
return `${this.fromNode.name} → ${this.toNode.name}: <b>${Math.floor(this.weight)}</b><br/>`
}
},
Demo:
http://jsfiddle.net/BlackLabel/0dqpabku/
Is there a method we use to reach the desired number in an array given in dart language.. I can do this for binary ones, but I can't do it for a code that finds the sum of 3 or more elements
For example
Input: candidates = [10,1,2,7,6,1,5], target = 8
Output:
[
[1,1,6],
[1,2,5],
[1,7],
[2,6]
]
this is the my code i have done until now
void main() {
var candidates = [10, 1, 2, 7, 6, 1, 5], target = 8;
var answer = [];
for (int i = 0; i < candidates.length; i++) {
for (int j = 0; j < candidates.length; j++) {
if (candidates[i] + candidates[j] == target && i != j && i < j) {
answer.add([candidates[i], candidates[j]]);
}
}
}
}
I am sure this can be done more efficient but since the solution is for some Leetcode assignment, I don't really want to spend too much time on optimizations.
I have tried added some comments in the code which explains my way of doing it:
void main() {
getSumLists([10, 1, 2, 7, 6, 1, 5], 8).forEach(print);
// [5, 1, 2]
// [1, 6, 1]
// [1, 7]
// [6, 2]
getSumLists([2, 5, 2, 1, 2], 5).forEach(print);
// [2, 1, 2]
// [5]
}
Iterable<List<int>> getSumLists(
List<int> candidates,
int target, {
List<int>? tempAnswer,
int sum = 0,
}) sync* {
// We cannot use default value in parameter since that makes list const
final tempAnswerNullChecked = tempAnswer ?? [];
if (sum == target) {
// We got a result we can return.
// OPTIMIZATION: If you know the returned list from each found result is not
// being used between found results, you can remove the `.toList()` part.
yield tempAnswerNullChecked.toList();
} else if (sum > target) {
// No need to search further in this branch since we are over the target
return;
}
// Make a copy so we don't destroy the input list but also so it works even
// if provided list as input is non-growing / non-modifiable
final newCandidates = candidates.toList();
while (newCandidates.isNotEmpty) {
// We take numbers from the end of the list since that is more efficient.
final number = newCandidates.removeLast();
// Recursive call where we return all results we are going to find given
// the new parameters
yield* getSumLists(
newCandidates,
target,
tempAnswer: tempAnswerNullChecked..add(number),
sum: sum + number,
);
// Instead of creating a new tempAnswerNullChecked, we just reuse it and
// make sure we remove any value we are temporary adding
tempAnswerNullChecked.removeLast();
// Ensure we don't get duplicate combinations. So if we have checked the
// number `1` we remove all `1` so we don't try the second `1`.
newCandidates.removeWhere((element) => element == number);
}
}
Situation : Chart with some analogic-series and some digital-series (0-1).
The problem is with the digital series. I would like to make sure that the series do not overlap like the image 1.
My idea is to set an "y-offset" on the digital-series, to have a result like the image 2.
This is a part of the y Axis configuration of the digital series. All the digital-series is related to a single y Axis (with id digital).
id : "digital",
min: 0,
max : 1,
ceiling:1,
floor : 0,
tickInterval: 1
Image 1.
Image 2.
In the documentation i can't find anything that can help me. And this is not my case.
UPDATE
Example on JSFIDDLE. Look (yes it's impossible currently) at the digital series with green color.
If you add a function in the load event, you can change the value of y in a way that makes it look offset. Like this:
chart: {
events: {
load: function() {
var series = this.series;
for (var i = 0; i < series.length; i++) {
if (series[i].yAxis.userOptions.id == "digital") {
for (var j = 0; j < series[i].data.length; j++) {
if (series[i].data[j].y == 1) {
series[i].data[j].y = 1 + 0.1 * i;
} else {
series[i].data[j].y = 0
}
}
this.update(series, true, false);
}
}
}
}
}
This sets the new y value equal to 0.1 * the series index.
Working example: https://jsfiddle.net/u2pzrhgk/27/
I my case - at input I have List<List<Float>> (list of word representation vectors). And - have one Double at output from one sequence.
So I building next structure (first index - example number, second - sentence item number, third - word vector element number) : http://pastebin.com/KGdjwnki
And in output : http://pastebin.com/fY8zrxEL
But when I masting one of next (http://pastebin.com/wvFFC4Hw) to model.output - I getting vector [0.25, 0.24, 0.25, 0.25], not one value.
What can be wrong? Attached code (at Kotlin). classCount is one.
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.nn.conf.NeuralNetConfiguration.Builder
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.nn.conf.layers.GravesLSTM
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer
import org.deeplearning4j.nn.conf.BackpropType
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.cpu.nativecpu.NDArray
import org.nd4j.linalg.indexing.NDArrayIndex
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.lossfunctions.LossFunctions
import java.util.*
class ClassifierNetwork(wordVectorSize: Int, classCount: Int) {
data class Dimension(val x: Array<Int>, val y: Array<Int>)
val model: MultiLayerNetwork
val optimization = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT
val iterations = 1
val learningRate = 0.1
val rmsDecay = 0.95
val seed = 12345
val l2 = 0.001
val weightInit = WeightInit.XAVIER
val updater = Updater.RMSPROP
val backtropType = BackpropType.TruncatedBPTT
val tbpttLength = 50
val epochs = 50
var dimensions = Dimension(intArrayOf(0).toTypedArray(), intArrayOf(0).toTypedArray())
init {
val baseConfiguration = Builder().optimizationAlgo(optimization)
.iterations(iterations).learningRate(learningRate).rmsDecay(rmsDecay).seed(seed).regularization(true).l2(l2)
.weightInit(weightInit).updater(updater)
.list()
baseConfiguration.layer(0, GravesLSTM.Builder().nIn(wordVectorSize).nOut(64).activation("tanh").build())
baseConfiguration.layer(1, GravesLSTM.Builder().nIn(64).nOut(32).activation("tanh").build())
baseConfiguration.layer(2, GravesLSTM.Builder().nIn(32).nOut(16).activation("tanh").build())
baseConfiguration.layer(3, RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT)
.activation("softmax").weightInit(WeightInit.XAVIER).nIn(16).nOut(classCount).build())
val cfg = baseConfiguration.build()!!
cfg.backpropType = backtropType
cfg.tbpttBackLength = tbpttLength
cfg.tbpttFwdLength = tbpttLength
cfg.isPretrain = false
cfg.isBackprop = true
model = MultiLayerNetwork(cfg)
}
private fun dataDimensions(x: List<List<Array<Double>>>, y: List<Array<Double>>): Dimension {
assert(x.size == y.size)
val exampleCount = x.size
assert(x.size > 0)
val sentenceLength = x[0].size
assert(sentenceLength > 0)
val wordVectorLength = x[0][0].size
assert(wordVectorLength > 0)
val classCount = y[0].size
assert(classCount > 0)
return Dimension(
intArrayOf(exampleCount, wordVectorLength, sentenceLength).toTypedArray(),
intArrayOf(exampleCount, classCount).toTypedArray()
)
}
data class Fits(val x: INDArray, val y: INDArray)
private fun fitConversion(x: List<List<Array<Double>>>, y: List<Array<Double>>): Fits {
val dim = dataDimensions(x, y)
val xItems = ArrayList<INDArray>()
for (i in 0..dim.x[0]-1) {
val itemList = ArrayList<DoubleArray>();
for (j in 0..dim.x[1]-1) {
var rowList = ArrayList<Double>()
for (k in 0..dim.x[2]-1) {
rowList.add(x[i][k][j])
}
itemList.add(rowList.toTypedArray().toDoubleArray())
}
xItems.add(Nd4j.create(itemList.toTypedArray()))
}
val xFits = Nd4j.create(xItems, dim.x.toIntArray(), 'c')
val yItems = ArrayList<DoubleArray>();
for (i in 0..y.size-1) {
yItems.add(y[i].toDoubleArray())
}
val yFits = Nd4j.create(yItems.toTypedArray())
return Fits(xFits, yFits)
}
private fun error(epoch: Int, x: List<List<Array<Double>>>, y: List<Array<Double>>) {
var totalDiff = 0.0
for (i in 0..x.size-1) {
val source = x[i]
val result = y[i]
val realResult = predict(source)
var diff = 0.0
for (j in 0..result.size-1) {
val elementDiff = result[j] - realResult[j]
diff += Math.pow(elementDiff, 2.0)
}
diff = Math.sqrt(diff)
totalDiff += Math.pow(diff, 2.0)
}
totalDiff = Math.sqrt(totalDiff)
print("Epoch ")
print(epoch)
print(", diff ")
println(totalDiff)
}
fun train(x: List<List<Array<Double>>>, y: List<Array<Double>>) {
dimensions = dataDimensions(x, y)
val(xFit, yFit) = fitConversion(x, y)
for (i in 0..epochs-1) {
model.input = xFit
model.labels = yFit
model.fit()
error(i+1, x, y)
}
}
fun predict(x: List<Array<Double>>): Array<Double> {
val xList = ArrayList<DoubleArray>();
for (i in 0..dimensions.x[1]-1) {
var row = ArrayList<Double>()
for (j in 0..dimensions.x[2]-1) {
row.add(x[j][i])
}
xList.add(row.toDoubleArray())
}
val xItem = Nd4j.create(xList.toTypedArray())
val y = model.output(xItem)
val result = ArrayList<Double>()
return result.toTypedArray()
}
}
upd. Seems like next example have "near" task, so later I'll check it and post solution : https://github.com/deeplearning4j/dl4j-0.4-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/word2vecsentiment/Word2VecSentimentRNN.java
LSTM input/output can only be rank 3: see:
http://deeplearning4j.org/usingrnns
next to the recommendation to post this in the very active gitter and the hint of Adam to check out the great documentation, which explains how to set up the in- and output being of rank 3, I want to point out a few other things in your code, as I was struggling with similar problems:
check out the basic example here in examples/recurrent/basic/BasicRNNExample.java, here you see that for RNN you don't use model.output(xItem), but model.rnnTimeStep(xItem);
with class count of one you seem to be performing a regression, for that also check out the regression examples at examples/feedforward/regression/RegressionSum.java and documenation here, here you see that as an activiation function you should use "identity". "softmax" actually normalizes the output to sum up to one (see in glossary), so if you have just one output it will always output 1 (at least it did for my problem).
Not sure if I understand your requirements correctly, but if you want single output (that is predict a number or regression), you usually go with Identity activation, and MSE loss function. You've used softmax, which is usually used in classificatoin.