Prediction with FanChenLinSupportVectorRegression - machine-learning

I want to use FanChenLinSupportVectorRegression in accord.net. The predictions are correct for the learning inputs but the model doesn't work for other inputs. I don't understand my mistake?
In the below example, the first prediction is good, however if we want to predict a configuration not learned, the prediction is always the same regardless of the inputs:
// Declare a very simple regression problem
// with only 2 input variables (x and y):
double[][] inputs =
{
new[] { 3.0, 1.0 },
new[] { 7.0, 1.0 },
new[] { 3.0, 1.0 },
new[] { 3.0, 2.0 },
new[] { 6.0, 1.0 },
};
// The task is to output a weighted sum of those numbers
// plus an independent constant term: 7.4x + 1.1y + 42
double[] outputs =
{
7.4*3.0 + 1.1*1.0 + 42.0,
7.4*7.0 + 1.1*1.0 + 42.0,
7.4*3.0 + 1.1*1.0 + 42.0,
7.4*3.0 + 1.1*2.0 + 42.0,
7.4*6.0 + 1.1*1.0 + 42.0,
};
// Create a LibSVM-based support vector regression algorithm
var teacher = new FanChenLinSupportVectorRegression<Gaussian>()
{
Tolerance = 1e-5,
// UseKernelEstimation = true,
// UseComplexityHeuristic = true
Complexity = 10000,
Kernel = new Gaussian(0.1)
};
// Use the algorithm to learn the machine
var svm = teacher.Learn(inputs, outputs);
// Get machine's predictions for inputs
double[] prediction = svm.Score(inputs);
// It's OK the predictions are correct
double[][] inputs1 =
{
new[] { 2.0, 2.0 },
new[] { 5.0, 1.0 },
};
prediction = svm.Score(inputs1);
// predictions are wrong! what is my mistake?

Related

Time Series Forecasting in Tensorflow js

I'm new in this field.
I made this code, but it doesn't work well, because I only see a medium price, but not a real forecasting.
I created a 3d tensor with some previous open,high,low and close prices, with a 5 time steps, and i need to forecast the next close number.
Example of input:
open high low close for 5 timesteps for 75 samples
[/*samples size (75)*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
],
[/*timestep2*/
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
[1904,1906,1902,**1903**]
],
The output simply is the close values from the 6 timesteps to the nexts steps
Example:
/*input*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
]
/*output*/
1903 (timestep2 last close) , ...
What is wrong?
/* global tf, tfvis */
async function getData() {
// Import from CSV
const dataSet = tf.data.csv('http://localhost:8888/ts2/eurusd2.csv');
// Extract x and y values to plot
const pointsDataSet = dataSet.map(record => ({
/*date: record["\<DTYYYYMMDD>"]+record["\<TIME>"],*/
open: record["\<OPEN\>"] * 10000,
high: record["\<HIGH\>"] * 10000,
low: record["\<LOW\>"] * 10000,
close: record["\<CLOSE\>"] * 10000
}));
const points = await pointsDataSet.toArray();
return points;
}
function preparaDatiInput(data, time_steps) {
//es 5 time steps
//il 6 è la previsione
if (data.length > time_steps) {
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
arr.push(data.slice(i, i + 5).map(d => {
return [d.open, d.high, d.low, d.close];
}));
}
return arr;
} else
{
return false;
}
}
function preparaDatiOutput(data, time_steps) {
/* l'output è sempre 1*/
if (data.length > time_steps) {
let arr = new Array();
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
const size = 75;
const time_steps = 5;
const input = preparaDatiInput(data.slice(0, size), time_steps);
const output = preparaDatiOutput(data.slice(0, size), time_steps);
const testing = preparaDatiInput(data.slice(size), time_steps);
const risultatiTesting = preparaDatiOutput(data.slice(size), time_steps);
/* primo campo per tensori 3d */
const trainingData = tf.tensor3d(input, [input.length, input[0].length, input[0][0].length]);
const outputData = tf.tensor1d(output);
const testingData = tf.tensor3d(testing, [testing.length, testing[0].length, testing[0][0].length]);
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.min();
const testingDataMin = testingData.max();
const outputDataMax = outputData.min();
const outputDataMin = outputData.max();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const model = tf.sequential();
/* time_steps, features */
model.add(tf.layers.lstm({units: 20, inputShape: [5, 4], returnSequences: false}));
/* 1 output */
model.add(tf.layers.dense({units: 1, activation: 'sigmoid'}));
model.summary();
const sgdoptimizer = tf.train.adam(0.03);
model.compile({
optimizer: sgdoptimizer,
loss: tf.losses.meanSquaredError,
metrics: ["mse"]
});
console.log('......Loss History.......');
for (let i = 0; i < 10; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 10});
console.log(`Iteration ${i}: ${res.history.loss[0]}`);
}
console.log('....Model Prediction .....');
const preds = model.predict(normalizedTestingData);
const unNormPreds = preds
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
console.log(unNormPreds);
const risultati_veri = risultatiTesting.map((d, i) => {
return {
x: i, y: d
};
});
const previsioni = Array.from(unNormPreds).map((d, i) => {
return {
x: i, y: d
};
});
tfvis.render.linechart(
{name: 'Model Predictions vs Original Data'},
{values: [risultati_veri, previsioni], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
eurusd2.csv example:
<TICKER>,<DTYYYYMMDD>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>
EURUSD,20010102,230100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230200,0.9506,0.9506,0.9505,0.9505,4
EURUSD,20010102,230300,0.9505,0.9507,0.9505,0.9506,4
EURUSD,20010102,230400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230600,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230700,0.9505,0.9507,0.9505,0.9507,4
EURUSD,20010102,230800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231100,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010102,231200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,232000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,233500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233700,0.9507,0.9508,0.9507,0.9508,4
EURUSD,20010102,233800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,233900,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234000,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234400,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234500,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,234900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235000,0.9507,0.9508,0.9506,0.9506,4
EURUSD,20010102,235100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235200,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235400,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,235500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235900,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000000,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,000100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000500,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010103,000600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,000800,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001200,0.9506,0.9506,0.9505,0.9506,4
EURUSD,20010103,001300,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001800,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002000,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002200,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,002300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,002400,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,002500,0.9508,0.9510,0.9508,0.9510,4
EURUSD,20010103,002600,0.9510,0.9510,0.9509,0.9509,4
EURUSD,20010103,002700,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002900,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003000,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003100,0.9507,0.9507,0.9507,0.9507,4
There are all kinds of things you can do in this space (TensorFlow & Time Series Analysis). Here is some sample code to help you get going.
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random
# set seed, so we can get the same results after rerunning several times
np.random.seed(314)
tf.random.set_seed(314)
random.seed(314)
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1,
test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
# see if ticker is already a loaded stock from yahoo finance
if isinstance(ticker, str):
# load it from yahoo_fin library
df = si.get_data(ticker)
elif isinstance(ticker, pd.DataFrame):
# already loaded, use it directly
df = ticker
# this will contain all the elements we want to return from this function
result = {}
# we will also return the original dataframe itself
result['df'] = df.copy()
# make sure that the passed feature_columns exist in the dataframe
for col in feature_columns:
assert col in df.columns, f"'{col}' does not exist in the dataframe."
if scale:
column_scaler = {}
# scale the data (prices) from 0 to 1
for column in feature_columns:
scaler = preprocessing.MinMaxScaler()
df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
column_scaler[column] = scaler
# add the MinMaxScaler instances to the result returned
result["column_scaler"] = column_scaler
# add the target column (label) by shifting by `lookup_step`
df['future'] = df['adjclose'].shift(-lookup_step)
# last `lookup_step` columns contains NaN in future column
# get them before droping NaNs
last_sequence = np.array(df[feature_columns].tail(lookup_step))
# drop NaNs
df.dropna(inplace=True)
sequence_data = []
sequences = deque(maxlen=n_steps)
for entry, target in zip(df[feature_columns].values, df['future'].values):
sequences.append(entry)
if len(sequences) == n_steps:
sequence_data.append([np.array(sequences), target])
# get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
# for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
# this last_sequence will be used to predict in future dates that are not available in the dataset
last_sequence = list(sequences) + list(last_sequence)
# shift the last sequence by -1
last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
# add to result
result['last_sequence'] = last_sequence
# construct the X's and y's
X, y = [], []
for seq, target in sequence_data:
X.append(seq)
y.append(target)
# convert to numpy arrays
X = np.array(X)
y = np.array(y)
# reshape X to fit the neural network
X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
# split the dataset
result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, test_size=test_size, shuffle=shuffle)
# return the result
return result
def create_model(sequence_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
model = Sequential()
for i in range(n_layers):
if i == 0:
# first layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True), input_shape=(None, sequence_length)))
else:
model.add(cell(units, return_sequences=True, input_shape=(None, sequence_length)))
elif i == n_layers - 1:
# last layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=False)))
else:
model.add(cell(units, return_sequences=False))
else:
# hidden layers
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True)))
else:
model.add(cell(units, return_sequences=True))
# add dropout after each layer
model.add(Dropout(dropout))
model.add(Dense(1, activation="linear"))
model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
return model
# Window size or the sequence length
N_STEPS = 100
# Lookup step, 1 is the next day
LOOKUP_STEP = 1
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
# whether to use bidirectional RNNs
BIDIRECTIONAL = False
### training parameters
# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 100
# Apple stock market
ticker = "AAPL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
model_name += "-b"
# create these folders if they does not exist
if not os.path.isdir("results"):
os.mkdir("results")
if not os.path.isdir("logs"):
os.mkdir("logs")
if not os.path.isdir("data"):
os.mkdir("data")
# load the data
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)
# save the dataframe
data["df"].to_csv(ticker_data_filename)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
history = model.fit(data["X_train"], data["y_train"],
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(data["X_test"], data["y_test"]),
callbacks=[checkpointer, tensorboard],
verbose=1)
model.save(os.path.join("results", model_name) + ".h5")
# after the model ends running...or during training, run this
# tensorboard --logdir="logs"
# http://localhost:6006/
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE,
feature_columns=FEATURE_COLUMNS, shuffle=False)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)
# evaluate the model
mse, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
# calculate the mean absolute error (inverse scaling)
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
print("Mean Absolute Error:", mean_absolute_error)
def predict(model, data, classification=False):
# retrieve the last sequence from data
last_sequence = data["last_sequence"][:N_STEPS]
# retrieve the column scalers
column_scaler = data["column_scaler"]
# reshape the last sequence
last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0]))
# expand dimension
last_sequence = np.expand_dims(last_sequence, axis=0)
# get the prediction (scaled from 0 to 1)
prediction = model.predict(last_sequence)
# get the price (by inverting the scaling)
predicted_price = column_scaler["adjclose"].inverse_transform(prediction)[0][0]
return predicted_price
# predict the future price
future_price = predict(model, data)
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")
# Result:
Mean Absolute Error: 3.4357253022539096
Future price after 1 days is 311.41$
def plot_graph(model, data):
y_test = data["y_test"]
X_test = data["X_test"]
y_pred = model.predict(X_test)
y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
# last 200 days, feel free to edit that
plt.plot(y_test[-200:], c='b')
plt.plot(y_pred[-200:], c='r')
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend(["Actual Price", "Predicted Price"])
plt.show()
plot_graph(model, data)
Run through 100 iterations...
Epoch 99/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.0276e-04 - mean_absolute_error: 0.0086
Epoch 00099: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.0276e-04 - mean_absolute_error: 0.0086 - val_loss: 3.8095e-05 - val_mean_absolute_error: 0.0057
Epoch 100/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.1034e-04 - mean_absolute_error: 0.0086
Epoch 00100: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.1040e-04 - mean_absolute_error: 0.0086 - val_loss: 2.9450e-05 - val_mean_absolute_error: 0.0035
Finally, you get this...
Again, you can go in many different directions with this!
I am improving my code doing this:
/* global tf, tfvis */
async function getData() {
//QOUA4VUTZJXS3M01
return new Promise((resolve, reject) => {
//const url='https://www.alphavantage.co/query?function=FX_INTRADAY&from_symbol=EUR&to_symbol=USD&interval=1min&outputsize=full&apikey=QOUA4VUTZJXS3M01';
const url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=QOUA4VUTZJXS3M01';
$.getJSON(url, function (data) {
let rawData = Object.values(data["Time Series (Daily)"]).map(d => ({open: parseFloat(d["1. open"]), high: parseFloat(d["2. high"]), low: parseFloat(d["3. low"]), close: parseFloat(d["4. close"])}));
resolve(rawData.reverse());
});
});
}
function prepareInputDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
function prepareInputTestingDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i <= data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputTestingDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i <= data.length; i++) {
if (data[i]) {
arr.push(data[i].close);
}
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
/* sometimes Chrome crashes and you need to open a new window */
const size = Math.floor(data.length / 100 * 98);
const time_steps = 30;//30;
const predict_size = data.length - size;
const start = data.length - size - predict_size;
const input = prepareInputDatas(data.slice(start, start + size), time_steps);
const output = prepareOutputDatas(data.slice(start, start + size), time_steps);
const testing = prepareInputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
const testingResults = prepareOutputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
/* Creating tensors (input 3d tensor, and output 1d tensor) */
const input_size_2 = input[0].length;
const input_size = input[0][0].length;
const trainingData = tf.tensor3d(input, [input.length, input_size_2, input_size]);
const outputData = tf.tensor1d(output);
const testing_size_2 = testing[0].length;
const testing_size = testing[0][0].length;
const testingData = tf.tensor3d(testing, [testing.length, testing_size_2, testing_size]);
const outputTestingData = tf.tensor1d(testingResults);
/* normalizing datas */
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.max();
const testingDataMin = testingData.min();
const outputDataMax = outputData.max();
const outputDataMin = outputData.min();
const outputTestingDataMax = outputTestingData.max();
const outputTestingDataMin = outputTestingData.min();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const normalizedTestingOutputData = outputTestingData.sub(outputTestingDataMin).div(outputTestingDataMax.sub(outputTestingDataMin));
/* creating model */
const model = tf.sequential();
model.add(tf.layers.lstm({inputShape: [input_size_2, input_size], units: input_size_2, returnSequences: false}));
/* eventual hidden layer (not needed because it is a LINEAR operation (regression) */
//model.add(tf.layers.lstm({units: Math.floor(input_size_2/2), returnSequences: false}));
model.add(tf.layers.dense({units: 1, activation: "sigmoid"}));
model.summary();
/* setting training */
const learningRate = 0.01;
/* selecting the best training optimizer */
const optimizer = tf.train.rmsprop(learningRate, 0.95);
/* compiling model with optimizer, loss and metrics */
model.compile({
optimizer: optimizer,
loss: tf.losses.meanSquaredError,
metrics: tf.metrics.meanAbsoluteError
});
/* training ... */
console.log('Loss Log');
for (let i = 0; i < 25; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 1});
console.log(`Iteration ${i + 1}: ${res.history.loss[0] }`);
}
/* training prediction (validation) */
const validation = model.predict(normalizedTrainingData);
const unNormValidation = validation
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
const trainingResults = output.map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
const trainingValidation = Array.from(unNormValidation).map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
/* creating training chart */
tfvis.render.linechart(
{name: 'Validation Results'},
{values: [trainingResults, trainingValidation], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
/* predicting */
console.log('Real prediction');
const preds = model.predict(normalizedTestingData);
const unNormPredictions = preds
.mul(outputTestingDataMax.sub(outputTestingDataMin))
.add(outputTestingDataMin).dataSync();
const realResults = testingResults.map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
const predictions = Array.from(unNormPredictions).map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
console.log("INPUT",testing);
console.log("OUTPUT",realResults);
console.log("PREDICTIONS",predictions);
/* creating prediction chart */
tfvis.render.linechart(
{name: 'Real Predictions'},
{values: [realResults, predictions], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
and i'm looking for good results.
I would be able to use some technical indicators, now, but I still don't know how the LSTM interpolate the "futures" array dimension.

Why concatenate features in machine learning?

I am learning Microsoft ML framework and confused why features need to be concatenated. In Iris flower example from Microsoft here:
https://learn.microsoft.com/en-us/dotnet/machine-learning/tutorials/iris-clustering
... features are concatenated:
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms
.Concatenate(featuresColumnName, "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
...
Are multiple features treated as a single feature in order to do calculations like linear regression? If so, how is this accurate? What is happening behind the scenes?
According to the official documentation,
concatenation is necessary because trainers take feature vectors as
inputs.
It essentially transforms the features in the form of separate columns into a single column of feature vectors. Feature values themselves remain intact; only their format and type is changed. It is more clear through this example:
Before transformation:
var samples = new List<InputData>()
{
new InputData(){ Feature1 = 0.1f, Feature2 = new[]{ 1.1f, 2.1f,
3.1f }, Feature3 = 1 },
new InputData(){ Feature1 = 0.2f, Feature2 = new[]{ 1.2f, 2.2f,
3.2f }, Feature3 = 2 },
new InputData(){ Feature1 = 0.3f, Feature2 = new[]{ 1.3f, 2.3f,
3.3f }, Feature3 = 3 },
new InputData(){ Feature1 = 0.4f, Feature2 = new[]{ 1.4f, 2.4f,
3.4f }, Feature3 = 4 },
new InputData(){ Feature1 = 0.5f, Feature2 = new[]{ 1.5f, 2.5f,
3.5f }, Feature3 = 5 },
new InputData(){ Feature1 = 0.6f, Feature2 = new[]{ 1.6f, 2.6f,
3.6f }, Feature3 = 6 },
};
After:
// "Features" column obtained post-transformation.
// 0.1 1.1 2.1 3.1 1
// 0.2 1.2 2.2 3.2 2
// 0.3 1.3 2.3 3.3 3
// 0.4 1.4 2.4 3.4 4
// 0.5 1.5 2.5 3.5 5
// 0.6 1.6 2.6 3.6 6

Deeplearning4j xor example

I'm trying to train an xor network with deeplearning4j, but i think i didn't really get how to use the dataset.
I wanted to create a NN with two inputs, two hidden neurons and one output neuron.
here is what i have:
package org.deeplearning4j.examples.xor;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
public class XorExample {
public static void main(String[] args) {
INDArray input = Nd4j.zeros(4, 2);
INDArray labels = Nd4j.zeros(4, 1);
input.putScalar(new int[] { 0, 0 }, 0);
input.putScalar(new int[] { 0, 1 }, 0);
input.putScalar(new int[] { 1, 0 }, 1);
input.putScalar(new int[] { 1, 1 }, 0);
input.putScalar(new int[] { 2, 0 }, 0);
input.putScalar(new int[] { 2, 1 }, 1);
input.putScalar(new int[] { 3, 0 }, 1);
input.putScalar(new int[] { 3, 1 }, 1);
labels.putScalar(new int[] { 0, 0 }, 0);
labels.putScalar(new int[] { 1, 0 }, 1);
labels.putScalar(new int[] { 2, 0 }, 1);
labels.putScalar(new int[] { 3, 0 }, 0);
DataSet ds = new DataSet(input,labels);
//Set up network configuration:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.learningRate(0.1)
.list(2)
.layer(0, new GravesLSTM.Builder().nIn(2).nOut(2)
.updater(Updater.RMSPROP)
.activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification
.updater(Updater.RMSPROP)
.nIn(2).nOut(1).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.pretrain(false).backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
//Print the number of parameters in the network (and for each layer)
Layer[] layers = net.getLayers();
int totalNumParams = 0;
for( int i=0; i<layers.length; i++ ){
int nParams = layers[i].numParams();
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
net.fit(ds);
Evaluation eval = new Evaluation(3);
INDArray output = net.output(ds.getFeatureMatrix());
eval.eval(ds.getLabels(), output);
System.out.println(eval.stats());
}
}
the output looks like that
Mär 20, 2016 7:03:06 PM com.github.fommil.jni.JniLoader liberalLoad
INFORMATION: successfully loaded C:\Users\LuckyPC\AppData\Local\Temp\jniloader5209513403648831212netlib-native_system-win-x86_64.dll
Number of parameters in layer 0: 46
Number of parameters in layer 1: 3
Total number of network parameters: 49
o.d.o.s.BaseOptimizer - Objective function automatically set to minimize. Set stepFunction in neural net configuration to change default settings.
o.d.o.l.ScoreIterationListener - Score at iteration 0 is 0.6931495070457458
Exception in thread "main" java.lang.IllegalArgumentException: Unable to getFloat row of non 2d matrix
at org.nd4j.linalg.api.ndarray.BaseNDArray.getRow(BaseNDArray.java:3640)
at org.deeplearning4j.eval.Evaluation.eval(Evaluation.java:107)
at org.deeplearning4j.examples.xor.XorExample.main(XorExample.java:80)
Here is a solution I came up with.
public static void main(String[] args) throws IOException, InterruptedException {
CSVDataSet dataSet = new CSVDataSet(new File("./train.csv"));
CSVDataSetIterator trainingSetIterator = new CSVDataSetIterator(dataSet, dataSet.size());
MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder()
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).iterations(1150)
.learningRate(1).seed(1)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD)
.list(2)
.backprop(true).pretrain(false)
.layer(0, new DenseLayer.Builder().nIn(2).nOut(3).updater(Updater.SGD).build())
.layer(1, new OutputLayer.Builder().nIn(3).nOut(1).build()).build();
MultiLayerNetwork network = new MultiLayerNetwork(configuration);
network.setListeners(new HistogramIterationListener(10), new ScoreIterationListener(100));
network.init();
long start = System.currentTimeMillis();
network.fit(trainingSetIterator);
System.out.println(System.currentTimeMillis() - start);
try(DataOutputStream dos = new DataOutputStream(Files.newOutputStream(Paths.get("xor-coefficients.bin")))){
Nd4j.write(network.params(), dos);
}
FileUtils.write(new File("xor-network-conf.json"), network.getLayerWiseConfigurations().toJson());
}
To test:
MultiLayerConfiguration configuration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File("xor-network-conf.json")));
try (DataInputStream dis = new DataInputStream(new FileInputStream("xor-coefficients.bin"))) {
INDArray parameters = Nd4j.read(dis);
MultiLayerNetwork network = new MultiLayerNetwork(configuration, parameters);
network.init();
List<INDArray> inputs = ImmutableList.of(Nd4j.create(new double[]{1, 0}),
Nd4j.create(new double[]{0, 1}),
Nd4j.create(new double[]{1, 1}),
Nd4j.create(new double[]{0, 0}));
List<INDArray> networkResults = inputs.stream().map(network::output).collect(toList());
System.out.println(networkResults);
}
}
with training data:
0,1,1
1,0,1
1,1,0
0,0,0
I believe there is a XOR example straight from their git repository!
The code is well documented and you can find it the repository here: https://github.com/deeplearning4j/dl4j-0.4-examples.git

LinearRegressionWithSGD() returns NaN

I am trying to use LinearRegressionWithSGD on Million Song Data Set and my model returns NaN's as weights and 0.0 as the intercept. What might be the issue for the error ? I am using Spark 1.40 in standalone mode.
Sample data: http://www.filedropper.com/part-00000
Here is my full code:
// Import Dependencies
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.mllib.regression.LinearRegressionModel
import org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
// Define RDD
val data =
sc.textFile("/home/naveen/Projects/millionSong/YearPredictionMSD.txt")
// Convert to Labelled Point
def parsePoint (line: String): LabeledPoint = {
val x = line.split(",")
val head = x.head.toDouble
val tail = Vectors.dense(x.tail.map(x => x.toDouble))
return LabeledPoint(head,tail)
}
// Find Range
val parsedDataInit = data.map(x => parsePoint(x))
val onlyLabels = parsedDataInit.map(x => x.label)
val minYear = onlyLabels.min()
val maxYear = onlyLabels.max()
// Shift Labels
val parsedData = parsedDataInit.map(x => LabeledPoint(x.label-minYear
, x.features))
// Training, validation, and test sets
val splits = parsedData.randomSplit(Array(0.8, 0.1, 0.1), seed = 123)
val parsedTrainData = splits(0).cache()
val parsedValData = splits(1).cache()
val parsedTestData = splits(2).cache()
val nTrain = parsedTrainData.count()
val nVal = parsedValData.count()
val nTest = parsedTestData.count()
// RMSE
def squaredError(label: Double, prediction: Double): Double = {
return scala.math.pow(label - prediction,2)
}
def calcRMSE(labelsAndPreds: RDD[List[Double]]): Double = {
return scala.math.sqrt(labelsAndPreds.map(x =>
squaredError(x(0),x(1))).mean())
}
val numIterations = 100
val stepSize = 1.0
val regParam = 0.01
val regType = "L2"
val algorithm = new LinearRegressionWithSGD()
algorithm.optimizer
.setNumIterations(numIterations)
.setStepSize(stepSize)
.setRegParam(regParam)
val model = algorithm.run(parsedTrainData)
I am not familiar with this specific implementation of SGD, but generally if a gradient descent solver goes to nan that means that the learning rate is too big. (in this case I think it is the stepSize variable).
Try to lower it by an order of magnitude each time until it starts to converge
I can think there are two possibilities.
stepSize is big. You should try something like 0.01, 0.03, 0.1,
0.3, 1.0, 3.0....
Your train data have NaN. If so, result will be likely NaN.

How to get a prediction using Torch7

I'm still familiarizing myself with Torch and so far so good. I have however hit a dead end that I'm not sure how to get around: how can I get Torch7 (or more specifically the dp library) to evaluate a single input and return the predicted output?
Here's my setup (basically the dp demo):
require 'dp'
--[[hyperparameters]]--
opt = {
nHidden = 100, --number of hidden units
learningRate = 0.1, --training learning rate
momentum = 0.9, --momentum factor to use for training
maxOutNorm = 1, --maximum norm allowed for output neuron weights
batchSize = 128, --number of examples per mini-batch
maxTries = 100, --maximum number of epochs without reduction in validation error.
maxEpoch = 1000 --maximum number of epochs of training
}
--[[data]]--
datasource = dp.Mnist{input_preprocess = dp.Standardize()}
print("feature size: ", datasource:featureSize())
--[[Model]]--
model = dp.Sequential{
models = {
dp.Neural{
input_size = datasource:featureSize(),
output_size = opt.nHidden,
transfer = nn.Tanh(),
sparse_init = true
},
dp.Neural{
input_size = opt.nHidden,
output_size = #(datasource:classes()),
transfer = nn.LogSoftMax(),
sparse_init = true
}
}
}
--[[Propagators]]--
train = dp.Optimizer{
loss = dp.NLL(),
visitor = { -- the ordering here is important:
dp.Momentum{momentum_factor = opt.momentum},
dp.Learn{learning_rate = opt.learningRate},
dp.MaxNorm{max_out_norm = opt.maxOutNorm}
},
feedback = dp.Confusion(),
sampler = dp.ShuffleSampler{batch_size = opt.batchSize},
progress = true
}
valid = dp.Evaluator{
loss = dp.NLL(),
feedback = dp.Confusion(),
sampler = dp.Sampler{}
}
test = dp.Evaluator{
loss = dp.NLL(),
feedback = dp.Confusion(),
sampler = dp.Sampler{}
}
--[[Experiment]]--
xp = dp.Experiment{
model = model,
optimizer = train,
validator = valid,
tester = test,
observer = {
dp.FileLogger(),
dp.EarlyStopper{
error_report = {'validator','feedback','confusion','accuracy'},
maximize = true,
max_epochs = opt.maxTries
}
},
random_seed = os.time(),
max_epoch = opt.maxEpoch
}
xp:run(datasource)
You have two options.
One. Use the encapsulated nn.Module to forward your torch.Tensor:
mlp = model:toModule(datasource:trainSet():sub(1,2))
mlp:float()
input = torch.FloatTensor(1, 1, 32, 32) -- replace this with your input
output = mlp:forward(input)
Two. Encapsulate your torch.Tensor into a dp.ImageView and forward that through your dp.Model :
input = torch.FloatTensor(1, 1, 32, 32) -- replace with your input
inputView = dp.ImageView('bchw', input)
outputView = mlp:forward(inputView, dp.Carry{nSample=1})
output = outputView:forward('b')

Resources