Time Series Forecasting in Tensorflow js - machine-learning

I'm new in this field.
I made this code, but it doesn't work well, because I only see a medium price, but not a real forecasting.
I created a 3d tensor with some previous open,high,low and close prices, with a 5 time steps, and i need to forecast the next close number.
Example of input:
open high low close for 5 timesteps for 75 samples
[/*samples size (75)*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
],
[/*timestep2*/
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
[1904,1906,1902,**1903**]
],
The output simply is the close values from the 6 timesteps to the nexts steps
Example:
/*input*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
]
/*output*/
1903 (timestep2 last close) , ...
What is wrong?
/* global tf, tfvis */
async function getData() {
// Import from CSV
const dataSet = tf.data.csv('http://localhost:8888/ts2/eurusd2.csv');
// Extract x and y values to plot
const pointsDataSet = dataSet.map(record => ({
/*date: record["\<DTYYYYMMDD>"]+record["\<TIME>"],*/
open: record["\<OPEN\>"] * 10000,
high: record["\<HIGH\>"] * 10000,
low: record["\<LOW\>"] * 10000,
close: record["\<CLOSE\>"] * 10000
}));
const points = await pointsDataSet.toArray();
return points;
}
function preparaDatiInput(data, time_steps) {
//es 5 time steps
//il 6 è la previsione
if (data.length > time_steps) {
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
arr.push(data.slice(i, i + 5).map(d => {
return [d.open, d.high, d.low, d.close];
}));
}
return arr;
} else
{
return false;
}
}
function preparaDatiOutput(data, time_steps) {
/* l'output è sempre 1*/
if (data.length > time_steps) {
let arr = new Array();
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
const size = 75;
const time_steps = 5;
const input = preparaDatiInput(data.slice(0, size), time_steps);
const output = preparaDatiOutput(data.slice(0, size), time_steps);
const testing = preparaDatiInput(data.slice(size), time_steps);
const risultatiTesting = preparaDatiOutput(data.slice(size), time_steps);
/* primo campo per tensori 3d */
const trainingData = tf.tensor3d(input, [input.length, input[0].length, input[0][0].length]);
const outputData = tf.tensor1d(output);
const testingData = tf.tensor3d(testing, [testing.length, testing[0].length, testing[0][0].length]);
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.min();
const testingDataMin = testingData.max();
const outputDataMax = outputData.min();
const outputDataMin = outputData.max();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const model = tf.sequential();
/* time_steps, features */
model.add(tf.layers.lstm({units: 20, inputShape: [5, 4], returnSequences: false}));
/* 1 output */
model.add(tf.layers.dense({units: 1, activation: 'sigmoid'}));
model.summary();
const sgdoptimizer = tf.train.adam(0.03);
model.compile({
optimizer: sgdoptimizer,
loss: tf.losses.meanSquaredError,
metrics: ["mse"]
});
console.log('......Loss History.......');
for (let i = 0; i < 10; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 10});
console.log(`Iteration ${i}: ${res.history.loss[0]}`);
}
console.log('....Model Prediction .....');
const preds = model.predict(normalizedTestingData);
const unNormPreds = preds
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
console.log(unNormPreds);
const risultati_veri = risultatiTesting.map((d, i) => {
return {
x: i, y: d
};
});
const previsioni = Array.from(unNormPreds).map((d, i) => {
return {
x: i, y: d
};
});
tfvis.render.linechart(
{name: 'Model Predictions vs Original Data'},
{values: [risultati_veri, previsioni], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
eurusd2.csv example:
<TICKER>,<DTYYYYMMDD>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>
EURUSD,20010102,230100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230200,0.9506,0.9506,0.9505,0.9505,4
EURUSD,20010102,230300,0.9505,0.9507,0.9505,0.9506,4
EURUSD,20010102,230400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230600,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,230700,0.9505,0.9507,0.9505,0.9507,4
EURUSD,20010102,230800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,230900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231100,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010102,231200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,231600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,232000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,232400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233000,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,233500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,233700,0.9507,0.9508,0.9507,0.9508,4
EURUSD,20010102,233800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,233900,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234000,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010102,234100,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234400,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234500,0.9508,0.9508,0.9508,0.9508,4
EURUSD,20010102,234700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,234900,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235000,0.9507,0.9508,0.9506,0.9506,4
EURUSD,20010102,235100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235200,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010102,235300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235400,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010102,235500,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235600,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235700,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235800,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010102,235900,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000000,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,000100,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000200,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000400,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,000500,0.9507,0.9507,0.9506,0.9507,4
EURUSD,20010103,000600,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,000800,0.9507,0.9507,0.9506,0.9506,4
EURUSD,20010103,000900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001200,0.9506,0.9506,0.9505,0.9506,4
EURUSD,20010103,001300,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001400,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001500,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001700,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001800,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,001900,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002000,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002100,0.9506,0.9506,0.9506,0.9506,4
EURUSD,20010103,002200,0.9506,0.9507,0.9506,0.9507,4
EURUSD,20010103,002300,0.9507,0.9507,0.9507,0.9507,4
EURUSD,20010103,002400,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,002500,0.9508,0.9510,0.9508,0.9510,4
EURUSD,20010103,002600,0.9510,0.9510,0.9509,0.9509,4
EURUSD,20010103,002700,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002800,0.9509,0.9509,0.9509,0.9509,4
EURUSD,20010103,002900,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003000,0.9508,0.9508,0.9507,0.9507,4
EURUSD,20010103,003100,0.9507,0.9507,0.9507,0.9507,4

There are all kinds of things you can do in this space (TensorFlow & Time Series Analysis). Here is some sample code to help you get going.
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random
# set seed, so we can get the same results after rerunning several times
np.random.seed(314)
tf.random.set_seed(314)
random.seed(314)
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1,
test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
# see if ticker is already a loaded stock from yahoo finance
if isinstance(ticker, str):
# load it from yahoo_fin library
df = si.get_data(ticker)
elif isinstance(ticker, pd.DataFrame):
# already loaded, use it directly
df = ticker
# this will contain all the elements we want to return from this function
result = {}
# we will also return the original dataframe itself
result['df'] = df.copy()
# make sure that the passed feature_columns exist in the dataframe
for col in feature_columns:
assert col in df.columns, f"'{col}' does not exist in the dataframe."
if scale:
column_scaler = {}
# scale the data (prices) from 0 to 1
for column in feature_columns:
scaler = preprocessing.MinMaxScaler()
df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
column_scaler[column] = scaler
# add the MinMaxScaler instances to the result returned
result["column_scaler"] = column_scaler
# add the target column (label) by shifting by `lookup_step`
df['future'] = df['adjclose'].shift(-lookup_step)
# last `lookup_step` columns contains NaN in future column
# get them before droping NaNs
last_sequence = np.array(df[feature_columns].tail(lookup_step))
# drop NaNs
df.dropna(inplace=True)
sequence_data = []
sequences = deque(maxlen=n_steps)
for entry, target in zip(df[feature_columns].values, df['future'].values):
sequences.append(entry)
if len(sequences) == n_steps:
sequence_data.append([np.array(sequences), target])
# get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
# for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
# this last_sequence will be used to predict in future dates that are not available in the dataset
last_sequence = list(sequences) + list(last_sequence)
# shift the last sequence by -1
last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
# add to result
result['last_sequence'] = last_sequence
# construct the X's and y's
X, y = [], []
for seq, target in sequence_data:
X.append(seq)
y.append(target)
# convert to numpy arrays
X = np.array(X)
y = np.array(y)
# reshape X to fit the neural network
X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
# split the dataset
result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, test_size=test_size, shuffle=shuffle)
# return the result
return result
def create_model(sequence_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
model = Sequential()
for i in range(n_layers):
if i == 0:
# first layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True), input_shape=(None, sequence_length)))
else:
model.add(cell(units, return_sequences=True, input_shape=(None, sequence_length)))
elif i == n_layers - 1:
# last layer
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=False)))
else:
model.add(cell(units, return_sequences=False))
else:
# hidden layers
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True)))
else:
model.add(cell(units, return_sequences=True))
# add dropout after each layer
model.add(Dropout(dropout))
model.add(Dense(1, activation="linear"))
model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
return model
# Window size or the sequence length
N_STEPS = 100
# Lookup step, 1 is the next day
LOOKUP_STEP = 1
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
# whether to use bidirectional RNNs
BIDIRECTIONAL = False
### training parameters
# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 100
# Apple stock market
ticker = "AAPL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
model_name += "-b"
# create these folders if they does not exist
if not os.path.isdir("results"):
os.mkdir("results")
if not os.path.isdir("logs"):
os.mkdir("logs")
if not os.path.isdir("data"):
os.mkdir("data")
# load the data
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)
# save the dataframe
data["df"].to_csv(ticker_data_filename)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
history = model.fit(data["X_train"], data["y_train"],
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(data["X_test"], data["y_test"]),
callbacks=[checkpointer, tensorboard],
verbose=1)
model.save(os.path.join("results", model_name) + ".h5")
# after the model ends running...or during training, run this
# tensorboard --logdir="logs"
# http://localhost:6006/
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE,
feature_columns=FEATURE_COLUMNS, shuffle=False)
# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)
# evaluate the model
mse, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
# calculate the mean absolute error (inverse scaling)
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
print("Mean Absolute Error:", mean_absolute_error)
def predict(model, data, classification=False):
# retrieve the last sequence from data
last_sequence = data["last_sequence"][:N_STEPS]
# retrieve the column scalers
column_scaler = data["column_scaler"]
# reshape the last sequence
last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0]))
# expand dimension
last_sequence = np.expand_dims(last_sequence, axis=0)
# get the prediction (scaled from 0 to 1)
prediction = model.predict(last_sequence)
# get the price (by inverting the scaling)
predicted_price = column_scaler["adjclose"].inverse_transform(prediction)[0][0]
return predicted_price
# predict the future price
future_price = predict(model, data)
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")
# Result:
Mean Absolute Error: 3.4357253022539096
Future price after 1 days is 311.41$
def plot_graph(model, data):
y_test = data["y_test"]
X_test = data["X_test"]
y_pred = model.predict(X_test)
y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
# last 200 days, feel free to edit that
plt.plot(y_test[-200:], c='b')
plt.plot(y_pred[-200:], c='r')
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend(["Actual Price", "Predicted Price"])
plt.show()
plot_graph(model, data)
Run through 100 iterations...
Epoch 99/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.0276e-04 - mean_absolute_error: 0.0086
Epoch 00099: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.0276e-04 - mean_absolute_error: 0.0086 - val_loss: 3.8095e-05 - val_mean_absolute_error: 0.0057
Epoch 100/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.1034e-04 - mean_absolute_error: 0.0086
Epoch 00100: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.1040e-04 - mean_absolute_error: 0.0086 - val_loss: 2.9450e-05 - val_mean_absolute_error: 0.0035
Finally, you get this...
Again, you can go in many different directions with this!

I am improving my code doing this:
/* global tf, tfvis */
async function getData() {
//QOUA4VUTZJXS3M01
return new Promise((resolve, reject) => {
//const url='https://www.alphavantage.co/query?function=FX_INTRADAY&from_symbol=EUR&to_symbol=USD&interval=1min&outputsize=full&apikey=QOUA4VUTZJXS3M01';
const url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=QOUA4VUTZJXS3M01';
$.getJSON(url, function (data) {
let rawData = Object.values(data["Time Series (Daily)"]).map(d => ({open: parseFloat(d["1. open"]), high: parseFloat(d["2. high"]), low: parseFloat(d["3. low"]), close: parseFloat(d["4. close"])}));
resolve(rawData.reverse());
});
});
}
function prepareInputDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i < data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i < data.length; i++) {
arr.push(data[i].close);
}
return arr;
} else
{
return false;
}
}
function prepareInputTestingDatas(data, time_steps) {
/* if the date is major then time steps */
if (data.length > time_steps) {
/* indicator examples */
/*
let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});
for (let i = 0; i < data.length; i++) {
data[i].sma = 0;
}
let d = 0;
for (let i = time_steps - 1; i < data.length; i++) {
data[i].sma = sma[d];
d++;
}
for (let i = 1; i < data.length; i++) {
if (data[i].close > data[i - 1].close) {
data[i].ind = 1;
} else if (data[i].close < data[i - 1].close) {
data[i].ind = 0;
} else {
data[i].ind = 0.5;
}
}
*/
let arr = new Array();
for (let i = 0; i <= data.length - time_steps; i++) {
/*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/
/* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
arr.push(data.slice(i, i + time_steps).map(d => {
return [d.open, d.high, d.low, d.close /*,d.sma*/];
}));
}
return arr;
} else
{
return false;
}
}
function prepareOutputTestingDatas(data, time_steps) {
if (data.length > time_steps) {
let arr = new Array();
/* create output training set (or testing values) (y values) */
for (let i = time_steps; i <= data.length; i++) {
if (data[i]) {
arr.push(data[i].close);
}
}
return arr;
} else
{
return false;
}
}
async function train_data(data) {
/* sometimes Chrome crashes and you need to open a new window */
const size = Math.floor(data.length / 100 * 98);
const time_steps = 30;//30;
const predict_size = data.length - size;
const start = data.length - size - predict_size;
const input = prepareInputDatas(data.slice(start, start + size), time_steps);
const output = prepareOutputDatas(data.slice(start, start + size), time_steps);
const testing = prepareInputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
const testingResults = prepareOutputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
/* Creating tensors (input 3d tensor, and output 1d tensor) */
const input_size_2 = input[0].length;
const input_size = input[0][0].length;
const trainingData = tf.tensor3d(input, [input.length, input_size_2, input_size]);
const outputData = tf.tensor1d(output);
const testing_size_2 = testing[0].length;
const testing_size = testing[0][0].length;
const testingData = tf.tensor3d(testing, [testing.length, testing_size_2, testing_size]);
const outputTestingData = tf.tensor1d(testingResults);
/* normalizing datas */
const trainingDataMax = trainingData.max();
const trainingDataMin = trainingData.min();
const testingDataMax = testingData.max();
const testingDataMin = testingData.min();
const outputDataMax = outputData.max();
const outputDataMin = outputData.min();
const outputTestingDataMax = outputTestingData.max();
const outputTestingDataMin = outputTestingData.min();
const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
const normalizedTestingOutputData = outputTestingData.sub(outputTestingDataMin).div(outputTestingDataMax.sub(outputTestingDataMin));
/* creating model */
const model = tf.sequential();
model.add(tf.layers.lstm({inputShape: [input_size_2, input_size], units: input_size_2, returnSequences: false}));
/* eventual hidden layer (not needed because it is a LINEAR operation (regression) */
//model.add(tf.layers.lstm({units: Math.floor(input_size_2/2), returnSequences: false}));
model.add(tf.layers.dense({units: 1, activation: "sigmoid"}));
model.summary();
/* setting training */
const learningRate = 0.01;
/* selecting the best training optimizer */
const optimizer = tf.train.rmsprop(learningRate, 0.95);
/* compiling model with optimizer, loss and metrics */
model.compile({
optimizer: optimizer,
loss: tf.losses.meanSquaredError,
metrics: tf.metrics.meanAbsoluteError
});
/* training ... */
console.log('Loss Log');
for (let i = 0; i < 25; i++) {
let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 1});
console.log(`Iteration ${i + 1}: ${res.history.loss[0] }`);
}
/* training prediction (validation) */
const validation = model.predict(normalizedTrainingData);
const unNormValidation = validation
.mul(outputDataMax.sub(outputDataMin))
.add(outputDataMin).dataSync();
const trainingResults = output.map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
const trainingValidation = Array.from(unNormValidation).map((d, i) => {
if (d) {
return {
x: i, y: d
};
}
});
/* creating training chart */
tfvis.render.linechart(
{name: 'Validation Results'},
{values: [trainingResults, trainingValidation], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
/* predicting */
console.log('Real prediction');
const preds = model.predict(normalizedTestingData);
const unNormPredictions = preds
.mul(outputTestingDataMax.sub(outputTestingDataMin))
.add(outputTestingDataMin).dataSync();
const realResults = testingResults.map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
const predictions = Array.from(unNormPredictions).map((d, i) => {
if (d) {
return {
x: i, y: d.toFixed(4)
};
}
});
console.log("INPUT",testing);
console.log("OUTPUT",realResults);
console.log("PREDICTIONS",predictions);
/* creating prediction chart */
tfvis.render.linechart(
{name: 'Real Predictions'},
{values: [realResults, predictions], series: ['original', 'predicted']},
{
xLabel: 'contatore',
yLabel: 'prezzo',
height: 300,
zoomToFit: true
}
);
}
async function main() {
const data = await getData();
await train_data(data);
}
main();
and i'm looking for good results.
I would be able to use some technical indicators, now, but I still don't know how the LSTM interpolate the "futures" array dimension.

Related

Struggling with Normalization values when training model

I am trying to train an adversarial patch located at the bottom left corner of the image to cause a misclassification. Currently, I am using these parameters to normalize the CIFAR10 dataset.
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))]
This would result in the images having a maximum and minimum value of around 2.55 and -2.55 respectively. However, I'm not sure how to work with this range when training my patch. I struggle between converting the patch from a range of (0,1) to (-2.55,2.55). Any help is appreciated!
My code for training is below: (I don't think its training properly for now)
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
import torchattacks
import random
import torch.nn.functional as F
dictionary ={
'0':'airplane',
'1':'automobile',
'2':'bird',
'3':'cat',
'4':'deer',
'5':'dog',
'6':'frog',
'7':'horse',
'8':'ship',
'9':'truck',
}
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))])
#transform1 = transforms.Compose([transforms.ToTensor()])
normalize = transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.201))
mean =(0.4914,0.4822,0.4465)
std =(0.2023,0.1994,0.201)
inv_normalize = transforms.Normalize(
mean=[-0.4914/0.2023, -0.4822/0.1994, -0.4465/0.201],
std=[1/0.2023, 1/0.1994, 1/0.201])
batch_size = 1
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=False, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=True, num_workers=2)
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)
model = model.cuda()
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
patch = np.random.rand(3,32,32)
model.eval()
def mask_generation(mask_type='rectangle', patch = patch, image_size=(3, 7, 7)):
applied_patch = np.zeros(image_size) #0,1
#patch = torch.tensor(patch)
#padding = (3,3,3,3)
#patch = F.pad(patch, padding)
if mask_type == 'rectangle':
rotation_angle = 0
for i in range(patch.shape[0]):
patch[i] = np.rot90(patch[i], rotation_angle)
x_location , y_location = 25,0
for i in range(patch.shape[0]):
applied_patch[:, x_location:x_location + patch.shape[1], y_location:y_location + patch.shape[2]] = patch
mask = applied_patch.copy()
mask[mask != 0] = 1.0
return patch , applied_patch, mask, x_location, y_location , rotation_angle
def patch_attack(image, applied_patch, mask, target, probability_threshold, model, lr, max_iteration):
applied_patch = torch.from_numpy(applied_patch)
mask = torch.from_numpy(mask)
image = inv_normalize(image)
target_probability, count = 0,0
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1 - mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = normalize(perturbated_image)
while target_probability < probability_threshold and count < max_iteration:
count += 1
# Optimize the patch
perturbated_image = Variable(perturbated_image.data, requires_grad=True)
per_image = perturbated_image.cuda()
output = model(per_image)
target_log_softmax = torch.nn.functional.log_softmax(output, dim=1)[0][target]
target_log_softmax.backward()
patch_grad = perturbated_image.grad.clone().cpu()
applied_patch = (lr * patch_grad) + applied_patch.type(torch.FloatTensor)
applied_patch = torch.clamp(applied_patch,0,1)
perturbated_image.grad.data.zero_()
# Test the patch
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1-mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = normalize(perturbated_image)
perturbated_image = perturbated_image.cuda()
output = model(perturbated_image)
target_probability = torch.nn.functional.softmax(output, dim=1).data[0][target]
perturbated_image = perturbated_image.detach().cpu().numpy()
applied_patch = applied_patch.cpu().numpy()
return perturbated_image, applied_patch
def test_patch(patch_type, target, patch, test_loader, model):
test_total, test_actual_total, test_success = 0, 0, 0
for (image, label) in test_loader:
test_total += label.shape[0]
assert image.shape[0] == 1, 'Only one picture should be loaded each time.'
image = image.cuda() #-3,3
label = label.cuda()
output = model(image)
_, predicted = torch.max(output.data, 1)
if predicted[0] != label and predicted[0].data.cpu().numpy() != target:
test_actual_total += 1
patch ,applied_patch, mask, x_location, y_location = mask_generation('rectangle', patch, (3, 32, 32))
applied_patch = torch.from_numpy(applied_patch)
mask = torch.from_numpy(mask)
mask = normalize(mask)
applied_patch = normalize(applied_patch)
perturbated_image = torch.mul(mask.type(torch.FloatTensor), applied_patch.type(torch.FloatTensor)) + torch.mul((1 - mask.type(torch.FloatTensor)), image.type(torch.FloatTensor))
perturbated_image = perturbated_image.cuda() #-3,3
output = model(perturbated_image)
_, predicted = torch.max(output.data, 1)
if predicted[0].data.cpu().numpy() == target:
test_success += 1
return test_success / test_actual_total
#training parameters
epochs = 1
target = 0
probability_threshold = 0.99
lr = 1/255
max_iteration = 1
runs = 0
for epoch in range(epochs):
train_total, train_actual_total, train_success = 0, 0, 0
for (image, label) in trainloader:
runs+=1
assert image.shape[0] == 1
image = image.cuda()
label = label.cuda()
train_total += label.shape[0]
output = model(image)
_, predicted = torch.max(output.data, 1)
if predicted[0] != label or predicted[0].data.cpu().numpy() != target:
train_actual_total += 1
patch , applied_patch, mask, x_location, y_location ,rotation_angle = mask_generation('rectangle', patch, (3, 32, 32))
perturbated_image, applied_patch = patch_attack(image, applied_patch, mask, target, probability_threshold, model, lr,max_iteration)
perturbated_image = torch.from_numpy(perturbated_image).cuda()
output = model(perturbated_image)
_, predicted = torch.max(output.data, 1)
if predicted[0].data.cpu().numpy() == target:
train_success += 1
patch = applied_patch[0][:, x_location:x_location + patch.shape[1], y_location:y_location + patch.shape[2]]
patch = np.array(patch)
To convert a number x in the range [0,1] to the range [-2.55,2.55]:
Multiply by size of final range / size of original range or in this case 5.1/1.0.
Add min of final range - min of starting range to the result, so in this case -2.55+0 = 0.

Oversampling or SMOTE in Pyspark

I have 7 classes and the total number of records are 115 and I wanted to run Random Forest model over this data. But as the data is not enough to get a high accuracy. So i wanted to apply oversampling over all the classes in a way that the majority class itself get higher count and then minority accordingly. Is this possible in PySpark?
+---------+-----+
| SubTribe|count|
+---------+-----+
| Chill| 10|
| Cool| 18|
|Adventure| 18|
| Quirk| 13|
| Mystery| 25|
| Party| 18|
|Glamorous| 13|
+---------+-----+
Here is another implementation of Pyspark and Scala smote that I have used in the past. I have copped the code across and referenced the source because its quite small:
Pyspark:
import random
import numpy as np
from pyspark.sql import Row
from sklearn import neighbors
from pyspark.ml.feature import VectorAssembler
def vectorizerFunction(dataInput, TargetFieldName):
if(dataInput.select(TargetFieldName).distinct().count() != 2):
raise ValueError("Target field must have only 2 distinct classes")
columnNames = list(dataInput.columns)
columnNames.remove(TargetFieldName)
dataInput = dataInput.select((','.join(columnNames)+','+TargetFieldName).split(','))
assembler=VectorAssembler(inputCols = columnNames, outputCol = 'features')
pos_vectorized = assembler.transform(dataInput)
vectorized = pos_vectorized.select('features',TargetFieldName).withColumn('label',pos_vectorized[TargetFieldName]).drop(TargetFieldName)
return vectorized
def SmoteSampling(vectorized, k = 5, minorityClass = 1, majorityClass = 0, percentageOver = 200, percentageUnder = 100):
if(percentageUnder > 100|percentageUnder < 10):
raise ValueError("Percentage Under must be in range 10 - 100");
if(percentageOver < 100):
raise ValueError("Percentage Over must be in at least 100");
dataInput_min = vectorized[vectorized['label'] == minorityClass]
dataInput_maj = vectorized[vectorized['label'] == majorityClass]
feature = dataInput_min.select('features')
feature = feature.rdd
feature = feature.map(lambda x: x[0])
feature = feature.collect()
feature = np.asarray(feature)
nbrs = neighbors.NearestNeighbors(n_neighbors=k, algorithm='auto').fit(feature)
neighbours = nbrs.kneighbors(feature)
gap = neighbours[0]
neighbours = neighbours[1]
min_rdd = dataInput_min.drop('label').rdd
pos_rddArray = min_rdd.map(lambda x : list(x))
pos_ListArray = pos_rddArray.collect()
min_Array = list(pos_ListArray)
newRows = []
nt = len(min_Array)
nexs = percentageOver/100
for i in range(nt):
for j in range(nexs):
neigh = random.randint(1,k)
difs = min_Array[neigh][0] - min_Array[i][0]
newRec = (min_Array[i][0]+random.random()*difs)
newRows.insert(0,(newRec))
newData_rdd = sc.parallelize(newRows)
newData_rdd_new = newData_rdd.map(lambda x: Row(features = x, label = 1))
new_data = newData_rdd_new.toDF()
new_data_minor = dataInput_min.unionAll(new_data)
new_data_major = dataInput_maj.sample(False, (float(percentageUnder)/float(100)))
return new_data_major.unionAll(new_data_minor)
dataInput = spark.read.format('csv').options(header='true',inferSchema='true').load("sam.csv").dropna()
SmoteSampling(vectorizerFunction(dataInput, 'Y'), k = 2, minorityClass = 1, majorityClass = 0, percentageOver = 90, percentageUnder = 5)
Scala:
// Import the necessary packages
import org.apache.spark.ml.feature.BucketedRandomProjectionLSH
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.sql.expressions.Window
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.functions.rand
import org.apache.spark.sql.functions._
object smoteClass{
def KNNCalculation(
dataFinal:org.apache.spark.sql.DataFrame,
feature:String,
reqrows:Int,
BucketLength:Int,
NumHashTables:Int):org.apache.spark.sql.DataFrame = {
val b1 = dataFinal.withColumn("index", row_number().over(Window.partitionBy("label").orderBy("label")))
val brp = new BucketedRandomProjectionLSH().setBucketLength(BucketLength).setNumHashTables(NumHashTables).setInputCol(feature).setOutputCol("values")
val model = brp.fit(b1)
val transformedA = model.transform(b1)
val transformedB = model.transform(b1)
val b2 = model.approxSimilarityJoin(transformedA, transformedB, 2000000000.0)
require(b2.count > reqrows, println("Change bucket lenght or reduce the percentageOver"))
val b3 = b2.selectExpr("datasetA.index as id1",
"datasetA.feature as k1",
"datasetB.index as id2",
"datasetB.feature as k2",
"distCol").filter("distCol>0.0").orderBy("id1", "distCol").dropDuplicates().limit(reqrows)
return b3
}
def smoteCalc(key1: org.apache.spark.ml.linalg.Vector, key2: org.apache.spark.ml.linalg.Vector)={
val resArray = Array(key1, key2)
val res = key1.toArray.zip(key2.toArray.zip(key1.toArray).map(x => x._1 - x._2).map(_*0.2)).map(x => x._1 + x._2)
resArray :+ org.apache.spark.ml.linalg.Vectors.dense(res)}
def Smote(
inputFrame:org.apache.spark.sql.DataFrame,
feature:String,
label:String,
percentOver:Int,
BucketLength:Int,
NumHashTables:Int):org.apache.spark.sql.DataFrame = {
val groupedData = inputFrame.groupBy(label).count
require(groupedData.count == 2, println("Only 2 labels allowed"))
val classAll = groupedData.collect()
val minorityclass = if (classAll(0)(1).toString.toInt > classAll(1)(1).toString.toInt) classAll(1)(0).toString else classAll(0)(0).toString
val frame = inputFrame.select(feature,label).where(label + " == " + minorityclass)
val rowCount = frame.count
val reqrows = (rowCount * (percentOver/100)).toInt
val md = udf(smoteCalc _)
val b1 = KNNCalculation(frame, feature, reqrows, BucketLength, NumHashTables)
val b2 = b1.withColumn("ndtata", md($"k1", $"k2")).select("ndtata")
val b3 = b2.withColumn("AllFeatures", explode($"ndtata")).select("AllFeatures").dropDuplicates
val b4 = b3.withColumn(label, lit(minorityclass).cast(frame.schema(1).dataType))
return inputFrame.union(b4).dropDuplicates
}
}
Source
Maybe this project can be useful for your goal:
Spark SMOTE
But I think that 115 records aren't enough for a random forest. You can use other simplest technique like decision trees
You can check this answer:
Is Random Forest suitable for very small data sets?

How to use multiple GPUs effectively when training deep networks?

I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.

Language Modelling with RNN and LSTM Cell in Tensorflow

My RNN for language modelling is predicting only "the" "and" and "unknown" what's wrong with my code?
Here I define the hyper parameters:
num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0
Here I generate the data (my input is given by word embeddings long 100 calculated with Word2Vec):
def generateData():
uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
x_tr = np.array([model_ted[word] for words in uniqueSent])
#Roll array elements along a given axis.
#Elements that roll beyond the last position are re-introduced at the first.
x_tr = x_tr.reshape((100, batch_size, -1)) # The first index changing slowest, subseries as rows
x = x_tr.transpose((1, 2, 0))
print("hi")
new_y = indexList[1: len(indexList)- 4]
new_y.append(indexList[len(indexList)-3])
y = np.array(new_y)
print(len(y))
y = y.reshape((batch_size, -1))
return (x, y)
Define the placeholders:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
Inputs and desired outputs:
labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder
Forward pass:
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__)
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)
init_state = tf.zeros([batch_size, cell.state_size])
outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)
iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits)
for logits, labels in zip(logits_series, labels_series)]
total_loss = tf.add_n(losses)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
x,y = generateData()
del(model_ted)
Training:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
print("start")
_current_state = np.zeros((batch_size, 2*state_size))
#avevo genrateData fuori e -currentstate dentro
for epoch_idx in range(num_epochs):
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx,:]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state, _predictions_series = sess.run(
[total_loss, train_step, current_state, predictions_series],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
del(batchX)
del(batchY)
perplexity = 2 ** (_total_loss/truncated_backprop_length )
print(perplexity)
del(perplexity)
_predictions_series = np.array(_predictions_series)
pr = _predictions_series.transpose([1, 0, 2])
pr_ind = []
for line in pr[0]:
pr_ind.append(np.argmax(line))
for index in pr_ind:
print(decoder[index], end = " " )
del(pr_ind)
print("\n learning rate: ", end = " ")
print(learning_rate)

LinearRegressionWithSGD() returns NaN

I am trying to use LinearRegressionWithSGD on Million Song Data Set and my model returns NaN's as weights and 0.0 as the intercept. What might be the issue for the error ? I am using Spark 1.40 in standalone mode.
Sample data: http://www.filedropper.com/part-00000
Here is my full code:
// Import Dependencies
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.mllib.regression.LinearRegressionModel
import org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
// Define RDD
val data =
sc.textFile("/home/naveen/Projects/millionSong/YearPredictionMSD.txt")
// Convert to Labelled Point
def parsePoint (line: String): LabeledPoint = {
val x = line.split(",")
val head = x.head.toDouble
val tail = Vectors.dense(x.tail.map(x => x.toDouble))
return LabeledPoint(head,tail)
}
// Find Range
val parsedDataInit = data.map(x => parsePoint(x))
val onlyLabels = parsedDataInit.map(x => x.label)
val minYear = onlyLabels.min()
val maxYear = onlyLabels.max()
// Shift Labels
val parsedData = parsedDataInit.map(x => LabeledPoint(x.label-minYear
, x.features))
// Training, validation, and test sets
val splits = parsedData.randomSplit(Array(0.8, 0.1, 0.1), seed = 123)
val parsedTrainData = splits(0).cache()
val parsedValData = splits(1).cache()
val parsedTestData = splits(2).cache()
val nTrain = parsedTrainData.count()
val nVal = parsedValData.count()
val nTest = parsedTestData.count()
// RMSE
def squaredError(label: Double, prediction: Double): Double = {
return scala.math.pow(label - prediction,2)
}
def calcRMSE(labelsAndPreds: RDD[List[Double]]): Double = {
return scala.math.sqrt(labelsAndPreds.map(x =>
squaredError(x(0),x(1))).mean())
}
val numIterations = 100
val stepSize = 1.0
val regParam = 0.01
val regType = "L2"
val algorithm = new LinearRegressionWithSGD()
algorithm.optimizer
.setNumIterations(numIterations)
.setStepSize(stepSize)
.setRegParam(regParam)
val model = algorithm.run(parsedTrainData)
I am not familiar with this specific implementation of SGD, but generally if a gradient descent solver goes to nan that means that the learning rate is too big. (in this case I think it is the stepSize variable).
Try to lower it by an order of magnitude each time until it starts to converge
I can think there are two possibilities.
stepSize is big. You should try something like 0.01, 0.03, 0.1,
0.3, 1.0, 3.0....
Your train data have NaN. If so, result will be likely NaN.

Resources