I'm new in this field.
I made this code, but it doesn't work well, because I only see a medium price, but not a real forecasting.
I created a 3d tensor with some previous open,high,low and close prices, with a 5 time steps, and i need to forecast the next close number.
Example of input:
open high low close for 5 timesteps for 75 samples
[/*samples size (75)*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
],
[/*timestep2*/
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
[1904,1906,1902,**1903**]
],
The output simply is the close values from the 6 timesteps to the nexts steps
Example:
/*input*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
]
/*output*/
1903 (timestep2 last close) , ...
What is wrong?
/* global tf, tfvis */
async function getData() {
// Import from CSV
const dataSet = tf.data.csv('http://localhost:8888/ts2/eurusd2.csv');
// Extract x and y values to plot
const pointsDataSet = dataSet.map(record => ({
/*date: record["\<DTYYYYMMDD>"]+record["\<TIME>"],*/
open: record["\<OPEN\>"] * 10000,
high: record["\<HIGH\>"] * 10000,
low: record["\<LOW\>"] * 10000,
close: record["\<CLOSE\>"] * 10000
}));
const points = await pointsDataSet.toArray();
return points;
}
function preparaDatiInput(data, time_steps) {
//es 5 time steps
//il 6 è la previsione
if (data.length > time_steps) {
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
arr.push(data.slice(i, i + 5).map(d => {
return [d.open, d.high, d.low, d.close];
}));
}
return arr;
} else
{
return false;
}
}
function preparaDatiOutput(data, time_steps) {
/* l'output è sempre 1*/
if (data.length > time_steps) {
let arr = new Array();
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
const size = 75;
const time_steps = 5;
const input = preparaDatiInput(data.slice(0, size), time_steps);
const output = preparaDatiOutput(data.slice(0, size), time_steps);
const testing = preparaDatiInput(data.slice(size), time_steps);
const risultatiTesting = preparaDatiOutput(data.slice(size), time_steps);
/* primo campo per tensori 3d */
const trainingData = tf.tensor3d(input, [input.length, input[0].length, input[0][0].length]);
const outputData = tf.tensor1d(output);
const testingData = tf.tensor3d(testing, [testing.length, testing[0].length, testing[0][0].length]);
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.min();
const testingDataMin = testingData.max();
const outputDataMax = outputData.min();
const outputDataMin = outputData.max();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const model = tf.sequential();
/* time_steps, features */
model.add(tf.layers.lstm({units: 20, inputShape: [5, 4], returnSequences: false}));
/* 1 output */
model.add(tf.layers.dense({units: 1, activation: 'sigmoid'}));
model.summary();
const sgdoptimizer = tf.train.adam(0.03);
model.compile({
optimizer: sgdoptimizer,
loss: tf.losses.meanSquaredError,
metrics: ["mse"]
});
console.log('......Loss History.......');
for (let i = 0; i < 10; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 10});
console.log(`Iteration ${i}: ${res.history.loss[0]}`);
}
console.log('....Model Prediction .....');
const preds = model.predict(normalizedTestingData);
const unNormPreds = preds
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
console.log(unNormPreds);
const risultati_veri = risultatiTesting.map((d, i) => {
return {
x: i, y: d
};
});
const previsioni = Array.from(unNormPreds).map((d, i) => {
return {
x: i, y: d
};
});
tfvis.render.linechart(
{name: 'Model Predictions vs Original Data'},
{values: [risultati_veri, previsioni], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
eurusd2.csv example:
<TICKER>,<DTYYYYMMDD>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>
EURUSD,20010102,230100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230200,0.9506,0.9506,0.9505,0.9505,4
EURUSD,20010102,230300,0.9505,0.9507,0.9505,0.9506,4
EURUSD,20010102,230400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230600,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230700,0.9505,0.9507,0.9505,0.9507,4
EURUSD,20010102,230800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231100,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010102,231200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,232000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,233500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233700,0.9507,0.9508,0.9507,0.9508,4
EURUSD,20010102,233800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,233900,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234000,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234400,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234500,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,234900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235000,0.9507,0.9508,0.9506,0.9506,4
EURUSD,20010102,235100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235200,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235400,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,235500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235900,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000000,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,000100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000500,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010103,000600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,000800,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001200,0.9506,0.9506,0.9505,0.9506,4
EURUSD,20010103,001300,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001800,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002000,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002200,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,002300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,002400,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,002500,0.9508,0.9510,0.9508,0.9510,4
EURUSD,20010103,002600,0.9510,0.9510,0.9509,0.9509,4
EURUSD,20010103,002700,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002900,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003000,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003100,0.9507,0.9507,0.9507,0.9507,4
There are all kinds of things you can do in this space (TensorFlow & Time Series Analysis). Here is some sample code to help you get going.
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random
# set seed, so we can get the same results after rerunning several times
np.random.seed(314)
tf.random.set_seed(314)
random.seed(314)
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1,
test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
# see if ticker is already a loaded stock from yahoo finance
if isinstance(ticker, str):
# load it from yahoo_fin library
df = si.get_data(ticker)
elif isinstance(ticker, pd.DataFrame):
# already loaded, use it directly
df = ticker
# this will contain all the elements we want to return from this function
result = {}
# we will also return the original dataframe itself
result['df'] = df.copy()
# make sure that the passed feature_columns exist in the dataframe
for col in feature_columns:
assert col in df.columns, f"'{col}' does not exist in the dataframe."
if scale:
column_scaler = {}
# scale the data (prices) from 0 to 1
for column in feature_columns:
scaler = preprocessing.MinMaxScaler()
df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
column_scaler[column] = scaler
# add the MinMaxScaler instances to the result returned
result["column_scaler"] = column_scaler
# add the target column (label) by shifting by `lookup_step`
df['future'] = df['adjclose'].shift(-lookup_step)
# last `lookup_step` columns contains NaN in future column
# get them before droping NaNs
last_sequence = np.array(df[feature_columns].tail(lookup_step))
# drop NaNs
df.dropna(inplace=True)
sequence_data = []
sequences = deque(maxlen=n_steps)
for entry, target in zip(df[feature_columns].values, df['future'].values):
sequences.append(entry)
if len(sequences) == n_steps:
sequence_data.append([np.array(sequences), target])
# get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
# for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
# this last_sequence will be used to predict in future dates that are not available in the dataset
last_sequence = list(sequences) + list(last_sequence)
# shift the last sequence by -1
last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
# add to result
result['last_sequence'] = last_sequence
# construct the X's and y's
X, y = [], []
for seq, target in sequence_data:
X.append(seq)
y.append(target)
# convert to numpy arrays
X = np.array(X)
y = np.array(y)
# reshape X to fit the neural network
X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
# split the dataset
result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, test_size=test_size, shuffle=shuffle)
# return the result
return result
def create_model(sequence_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
model = Sequential()
for i in range(n_layers):
if i == 0:
# first layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True), input_shape=(None, sequence_length)))
else:
model.add(cell(units, return_sequences=True, input_shape=(None, sequence_length)))
elif i == n_layers - 1:
# last layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=False)))
else:
model.add(cell(units, return_sequences=False))
else:
# hidden layers
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True)))
else:
model.add(cell(units, return_sequences=True))
# add dropout after each layer
model.add(Dropout(dropout))
model.add(Dense(1, activation="linear"))
model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
return model
# Window size or the sequence length
N_STEPS = 100
# Lookup step, 1 is the next day
LOOKUP_STEP = 1
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
# whether to use bidirectional RNNs
BIDIRECTIONAL = False
### training parameters
# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 100
# Apple stock market
ticker = "AAPL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
model_name += "-b"
# create these folders if they does not exist
if not os.path.isdir("results"):
os.mkdir("results")
if not os.path.isdir("logs"):
os.mkdir("logs")
if not os.path.isdir("data"):
os.mkdir("data")
# load the data
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)
# save the dataframe
data["df"].to_csv(ticker_data_filename)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
history = model.fit(data["X_train"], data["y_train"],
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(data["X_test"], data["y_test"]),
callbacks=[checkpointer, tensorboard],
verbose=1)
model.save(os.path.join("results", model_name) + ".h5")
# after the model ends running...or during training, run this
# tensorboard --logdir="logs"
# http://localhost:6006/
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE,
feature_columns=FEATURE_COLUMNS, shuffle=False)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)
# evaluate the model
mse, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
# calculate the mean absolute error (inverse scaling)
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
print("Mean Absolute Error:", mean_absolute_error)
def predict(model, data, classification=False):
# retrieve the last sequence from data
last_sequence = data["last_sequence"][:N_STEPS]
# retrieve the column scalers
column_scaler = data["column_scaler"]
# reshape the last sequence
last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0]))
# expand dimension
last_sequence = np.expand_dims(last_sequence, axis=0)
# get the prediction (scaled from 0 to 1)
prediction = model.predict(last_sequence)
# get the price (by inverting the scaling)
predicted_price = column_scaler["adjclose"].inverse_transform(prediction)[0][0]
return predicted_price
# predict the future price
future_price = predict(model, data)
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")
# Result:
Mean Absolute Error: 3.4357253022539096
Future price after 1 days is 311.41$
def plot_graph(model, data):
y_test = data["y_test"]
X_test = data["X_test"]
y_pred = model.predict(X_test)
y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
# last 200 days, feel free to edit that
plt.plot(y_test[-200:], c='b')
plt.plot(y_pred[-200:], c='r')
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend(["Actual Price", "Predicted Price"])
plt.show()
plot_graph(model, data)
Run through 100 iterations...
Epoch 99/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.0276e-04 - mean_absolute_error: 0.0086
Epoch 00099: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.0276e-04 - mean_absolute_error: 0.0086 - val_loss: 3.8095e-05 - val_mean_absolute_error: 0.0057
Epoch 100/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.1034e-04 - mean_absolute_error: 0.0086
Epoch 00100: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.1040e-04 - mean_absolute_error: 0.0086 - val_loss: 2.9450e-05 - val_mean_absolute_error: 0.0035
Finally, you get this...
Again, you can go in many different directions with this!
I am improving my code doing this:
/* global tf, tfvis */
async function getData() {
//QOUA4VUTZJXS3M01
return new Promise((resolve, reject) => {
//const url='https://www.alphavantage.co/query?function=FX_INTRADAY&from_symbol=EUR&to_symbol=USD&interval=1min&outputsize=full&apikey=QOUA4VUTZJXS3M01';
const url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=QOUA4VUTZJXS3M01';
$.getJSON(url, function (data) {
let rawData = Object.values(data["Time Series (Daily)"]).map(d => ({open: parseFloat(d["1. open"]), high: parseFloat(d["2. high"]), low: parseFloat(d["3. low"]), close: parseFloat(d["4. close"])}));
resolve(rawData.reverse());
});
});
}
function prepareInputDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
function prepareInputTestingDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i <= data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputTestingDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i <= data.length; i++) {
if (data[i]) {
arr.push(data[i].close);
}
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
/* sometimes Chrome crashes and you need to open a new window */
const size = Math.floor(data.length / 100 * 98);
const time_steps = 30;//30;
const predict_size = data.length - size;
const start = data.length - size - predict_size;
const input = prepareInputDatas(data.slice(start, start + size), time_steps);
const output = prepareOutputDatas(data.slice(start, start + size), time_steps);
const testing = prepareInputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
const testingResults = prepareOutputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
/* Creating tensors (input 3d tensor, and output 1d tensor) */
const input_size_2 = input[0].length;
const input_size = input[0][0].length;
const trainingData = tf.tensor3d(input, [input.length, input_size_2, input_size]);
const outputData = tf.tensor1d(output);
const testing_size_2 = testing[0].length;
const testing_size = testing[0][0].length;
const testingData = tf.tensor3d(testing, [testing.length, testing_size_2, testing_size]);
const outputTestingData = tf.tensor1d(testingResults);
/* normalizing datas */
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.max();
const testingDataMin = testingData.min();
const outputDataMax = outputData.max();
const outputDataMin = outputData.min();
const outputTestingDataMax = outputTestingData.max();
const outputTestingDataMin = outputTestingData.min();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const normalizedTestingOutputData = outputTestingData.sub(outputTestingDataMin).div(outputTestingDataMax.sub(outputTestingDataMin));
/* creating model */
const model = tf.sequential();
model.add(tf.layers.lstm({inputShape: [input_size_2, input_size], units: input_size_2, returnSequences: false}));
/* eventual hidden layer (not needed because it is a LINEAR operation (regression) */
//model.add(tf.layers.lstm({units: Math.floor(input_size_2/2), returnSequences: false}));
model.add(tf.layers.dense({units: 1, activation: "sigmoid"}));
model.summary();
/* setting training */
const learningRate = 0.01;
/* selecting the best training optimizer */
const optimizer = tf.train.rmsprop(learningRate, 0.95);
/* compiling model with optimizer, loss and metrics */
model.compile({
optimizer: optimizer,
loss: tf.losses.meanSquaredError,
metrics: tf.metrics.meanAbsoluteError
});
/* training ... */
console.log('Loss Log');
for (let i = 0; i < 25; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 1});
console.log(`Iteration ${i + 1}: ${res.history.loss[0] }`);
}
/* training prediction (validation) */
const validation = model.predict(normalizedTrainingData);
const unNormValidation = validation
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
const trainingResults = output.map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
const trainingValidation = Array.from(unNormValidation).map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
/* creating training chart */
tfvis.render.linechart(
{name: 'Validation Results'},
{values: [trainingResults, trainingValidation], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
/* predicting */
console.log('Real prediction');
const preds = model.predict(normalizedTestingData);
const unNormPredictions = preds
.mul(outputTestingDataMax.sub(outputTestingDataMin))
.add(outputTestingDataMin).dataSync();
const realResults = testingResults.map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
const predictions = Array.from(unNormPredictions).map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
console.log("INPUT",testing);
console.log("OUTPUT",realResults);
console.log("PREDICTIONS",predictions);
/* creating prediction chart */
tfvis.render.linechart(
{name: 'Real Predictions'},
{values: [realResults, predictions], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
and i'm looking for good results.
I would be able to use some technical indicators, now, but I still don't know how the LSTM interpolate the "futures" array dimension.