my LSTM time-series forecast predictions have a shift to the future - time-series

I am trying to make predcitions on a time-series data (univariate) using Tensorflow. However, when I see the results, the predictions have a time shift (please see the figure). Anyone ideas what the problem is?
Below is the function I use to prepare the set of inputs and outputs (X and Y) as well as normalizing the data
# define a fucntion to prepare the X and Y for training the model
def df_to_X_y_group(df_as_np, window_size, num): # input is numpy array. j_start and j_end show the features, num is the number of wavelength in a group
X=[] # make an empty list
y=[]
for j in range(0,df_as_np.shape[1]-num,num): #(start, stop, step). defining a window over features of size num wihtout any overlaps
for i in range(len(df_as_np)-window_size): # there is overlap between windows in time
row=[a for a in df_as_np[i:i+window_size, j:j+num]]
X.append(row)
label=[r for r in df_as_np[i+window_size, j:j+num]]
y.append(label)
return np.array(X), np.array(y)
# convert the data to X and y format
window_size_group=50
output_group=2
num=2
X_group , y_group= df_to_X_y_group(df_as_np, window_size_group, num) # 70085 rows (number of training samples), 6 is the size of the window (number of time steps) and 5 is the number of features
X_group.shape , y_group.shape
# Split the data to train, val and test by 70,20 and 10% of the data
X_train_group , y_train_group = X_group[:int(.7 * X_group.shape[0])], y_group[:int(.7 * X_group.shape[0])]
X_val_group , y_val_group = X_group[int(.7 * X_group.shape[0]):int(.9 * X_group.shape[0])] , y_group[int(.7 * X_group.shape[0]):int(.9 * X_group.shape[0])]
X_test_group , y_test_group = X_group[int(.9 * X_group.shape[0]):] , y_group[int(.9 * X_group.shape[0]):]
# Normalize the data
X_train_mean_group = np.mean(X_train_group)
X_train_std_group = np.std(X_train_group)
def preprocess_group(X):
X_norm = (X - X_train_mean_group) / X_train_std_group
return X_norm
# Now convert the data back to before normalization
def postprocess_group(pred):
actual = (pred * X_train_std_group) + X_train_mean_group
return actual
# apply the preprocess to the inputs and outputs
X_train_norm_group=preprocess_group(X_train_group)
X_val_norm_group=preprocess_group(X_val_group)
X_test_norm_group=preprocess_group(X_test_group)
y_train_norm_group=preprocess_group(y_train_group)
y_val_norm_group=preprocess_group(y_val_group)
y_test_norm_group=preprocess_group(y_test_group)
X_train_group.shape , y_train_group.shape , X_val_group.shape , y_val_group.shape , X_test_group.shape , y_test_group .shape
Here is the model:
# Build the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint # to save the model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.losses import Huber
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
model_group=Sequential()
model_group.add(InputLayer((X_group.shape[1],X_group.shape[2]))) # size of the input (windows (time steps) of size 6 and 1024 features )
model_group.add(LSTM(128))
model_group.add(Dense(20, 'relu'))
model_group.add(Dense(output_group))
model_group.summary()
# Train the model
cp2=ModelCheckpoint('model/group/overlapnum2' , save_best_only=True) # it saves the best model based on val error
model_group.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model_group.fit(X_train_norm_group, y_train_norm_group, validation_data=(X_val_norm_group, y_val_norm_group), epochs=15, , callbacks=[cp2])
Below is the function to predict and plot the data:
# We make a function which predict and plot
from sklearn.metrics import mean_squared_error as mse
def plot_predictions(model, X, y, start=0, end=100):
predictions = model.predict(X[start:end])
predictions_actual=postprocess(predictions)
y_actual=postprocess(y[start:end])
plt.plot(predictions_actual, c='r', label='Predictions')
plt.plot(y_actual, label= 'Actuals')
plt.xlabel("Order of the data (TIme)")
plt.ylabel("Number of Photons")
plt.legend() # so the label shows up
return mse(y_actual, predictions_actual)
# plot
plot_predictions(model, X_test_norm, y_test_norm, start=0, end=300)
Here is the predcitions results which are shifted in time (for the true results, y(t) should be replaced with y(t+1)):
enter image description here
I have tried tuning several hyperparameters such as batch size, window size, or adding layers but none of these helped.

Related

PyTorch - scaling data for training and then rescaling results back

I am working on an autoencoder network using pytorch. I have a dataset of rows that have 10 columns each containing values in roughly [-0.2, 0.2].
Since all builtin function for automated data preparation I know about work for images and other data types, I assume I have to rescale these into [0, 1] range myself, train the network and then scale every result back into the original dataset's size scale.
The scaling algorithm I used was (input is scaled data for training, output is result of network):
input -= min(data)
input /= max(input)
output *= (abs(min(data)) + max(data)) //last division was by "shifted" max
output += min(data)
Here is an actual code:
class AirfoilDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.airfoils = np.copy(data)
self.airfoils -= self.airfoils.min()
self.airfoils /= self.airfoils.max()
def __len__(self):
return len(self.airfoils)
def __getitem__(self, idx):
return torch.from_numpy(self.airfoils[idx]), idx
class Autoencoder(torch.nn.Module):
def __init__(self):
super().__init__()
self.encoder = torch.nn.Sequential(
torch.nn.Linear(10, 5),
torch.nn.Sigmoid()
)
self.decoder = torch.nn.Sequential(
torch.nn.Linear(5, 10),
torch.nn.Sigmoid()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
The results I get from this are really bad, but somehow deformed (don't know the proper terminology). It visibly follows the shape of the original dataset, but really badly.
On the other hand, if I don't scale the data put into training, the positive range of original dataset is represented perfectly by the autoencoder, without distortions. Obviously, the negative part is reduced to zero.
How to preserve "shape" of input dataset through training?
You can use sklearn for that
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit_transform(input)
#here your model
output = scaler.inverse_transform(output)

How to feed previous time-stamp prediction as additional input to the next time-stamp?

This question might have been asked, but I got confused.
I am trying to apply one of RNN types, e.g. LSTM for time-series forecasting. I have inputs, y (stock returns). For each timestamp, I'd like to get the predictions. Q1 - Am I correct choosing seq2seq approach?
I also want to use predictions from previous timestamp (initializing initial values with some constant) as additional (still using my existing inputs) input in the form of squared residuals, i.e. using
eps_{t-1} = (y_{t-1} - y^_{t-1})^2 as additional input at t (as well as previous inputs).
So, how can I do this in tensorflow or in pytorch?
I tried to depict what I want on the attached graph. The graph
p.s. Sorry, it the question is poorly formulated
Let say your input if of dimension (32,10,1) with batch_size 32, time steps of length 10 and dimension of 1. Same for your target (stock return). This code make use of the tf.scan function, which is usefull when implementing custom recurrent networks (it will iterate over the timesteps). It remains to use the residual of t-1 in t somewhere, as you would like to.
ps: it is a very basic implementation of lstm from scratch, without any bias or output activation.
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
BS = 32
TS = 10
inputs_dim = 1
target_dim = 1
inputs = tf.placeholder(shape=[BS, TS, inputs_dim], dtype=tf.float32)
stock_returns = tf.placeholder(shape=[BS, TS, target_dim], dtype=tf.float32)
state_size = 16
# initial hidden state
init_state = tf.placeholder(shape=[2, BS, state_size],
dtype=tf.float32, name='initial_state')
# initializer
xav_init = tf.contrib.layers.xavier_initializer
# params
W = tf.get_variable('W', shape=[4, state_size, state_size],
initializer=xav_init())
U = tf.get_variable('U', shape=[4, inputs_dim, state_size],
initializer=xav_init())
W_out = tf.get_variable('W_out', shape=[state_size, target_dim],
initializer=xav_init())
#the function to feed tf.scan with
def step(prev, inputs_):
#unpack all inputs and previous outputs
st_1, ct_1 = prev[0][0], prev[0][1]
x = inputs_[0]
target = inputs_[1]
#get previous squared residual
eps = prev[1]
"""
here do whatever you want with eps_t-1
like x += eps if x if of the same dimension
or include it somewhere in your graph
"""
# lstm gates (add bias if needed)
#
# input gate
i = tf.sigmoid(tf.matmul(x,U[0]) + tf.matmul(st_1,W[0]))
# forget gate
f = tf.sigmoid(tf.matmul(x,U[1]) + tf.matmul(st_1,W[1]))
# output gate
o = tf.sigmoid(tf.matmul(x,U[2]) + tf.matmul(st_1,W[2]))
# gate weights
g = tf.tanh(tf.matmul(x,U[3]) + tf.matmul(st_1,W[3]))
ct = ct_1*f + g*i
st = tf.tanh(ct)*o
"""
make prediction, compute residual in t
and pass it to t+1
Normaly, we would compute prediction outside the scan function,
but as we need it here, we could just keep it and return it back
as an output of the scan function
"""
prediction_t = tf.matmul(st, W_out) # + bias
eps = (target - prediction_t)**2
return [tf.stack((st, ct), axis=0), eps, prediction_t]
states, eps, preds = tf.scan(step, [tf.transpose(inputs, [1,0,2]),
tf.transpose(stock_returns, [1,0,2])], initializer=[init_state,
tf.zeros((32,1), dtype=tf.float32),
tf.zeros((32,1),dtype=tf.float32)])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
out = sess.run(preds, feed_dict=
{inputs:np.random.rand(BS,TS,inputs_dim),
stock_returns:np.random.rand(BS,TS,target_dim),
init_state:np.zeros((2,BS,state_size))})
out = tf.transpose(out,[1,0,2])
print(out)
And the output :
Tensor("transpose_2:0", shape=(32, 10, 1), dtype=float32)
Base code from here

keras neural network predicts the same number for every handwritten digit

I am new to machine learning so as a first project I've tried to built a handwritten digit recognition neural network based on the mnist dataset and when I test it with the test images provided by the data set itself it seems to work pretty well (that's what the function test_predict is for). Now I would like to step it up and have the network recognise some actual handwritten digits that I've taken photos of.
The function partial_img_rec takes on an image containing multiple digits and it will be called by multiple_digits. I know it might seem weird that I use recursion here and I'm sure there are some more efficient ways to do this but that's not the matter. In order to test partial_img_rec I provide some photos of individual digits that are stored in the folder .\individual_test and they all look something like this:
The problem is: My neural network's prediction for every single one of my test images is "5". The probability is always about 22% no matter the actual digit displayed. I totally get why the results are not as great as those achieved with the mnist dataset's test images but I certainly didn't expect this. Do you have any idea why this is happening? Any advise is welcome.
Thank you in advance.
Here's my code (edited, now working):
# import keras and the MNIST dataset
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.utils import np_utils
# numpy is necessary since keras uses numpy arrays
import numpy as np
# imports for pictures
from PIL import Image
from PIL import ImageOps
# imports for tests
import random
import os
class mnist_network():
def __init__(self):
""" load data, create and train model """
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# create model
self.model = Sequential()
self.model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
self.model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
# Compile model
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# train the model
self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
self.train_img = X_train
self.train_res = y_train
self.test_img = X_test
self.test_res = y_test
def test_all(self):
""" evaluates the success rate using all the test data """
scores = self.model.evaluate(self.test_img, self.test_res, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
def predict_result(self, img, num_pixels = None, show=False):
""" predicts the number in a picture (vector) """
assert type(img) == np.ndarray and img.shape == (784,)
"""if show:
# show the picture!!!! some problem here
plt.imshow(img, cmap='Greys')
plt.show()"""
num_pixels = img.shape[0]
# the actual number
res_number = np.argmax(self.model.predict(img.reshape(-1,num_pixels)), axis = 1)
# the probabilities
res_probabilities = self.model.predict(img.reshape(-1,num_pixels))
return (res_number[0], res_probabilities.tolist()[0]) # we only need the first element since they only have one
def test_predict(self, amount_test = 100):
""" test some random numbers from the test part of the data set """
assert type(amount_test) == int and amount_test <= 10000
cnt_right = 0
cnt_wrong = 0
for i in range(amount_test):
ind = random.randrange(0,10000) # there are 10000 images in the test part of the data set
""" correct_res is the actual result stored in the data set
It's represented as a list of 10 elements one of which being 1, the rest 0 """
correct_list = self.test_res.tolist()
correct_list = correct_list[ind] # the correct sublist
correct_res = correct_list.index(1.0)
predicted_res = self.predict_result(self.test_img[ind])[0]
if correct_res != predicted_res:
cnt_wrong += 1
print("Error in predict ! \
index = ", ind, " predicted result = ", predicted_res, " correct result = ", correct_res)
else:
cnt_right += 1
print("The machine predicted correctly ",cnt_right," out of ",amount_test," examples. That is a success rate of ", (cnt_right/amount_test)*100,"%.")
def partial_img_rec(self, image, upper_left, lower_right, results=[]):
""" partial is a part of an image """
left_x, left_y = upper_left
right_x, right_y = lower_right
print("current test part: ", upper_left, lower_right)
print("results: ", results)
# condition to stop recursion: we've reached the full width of the picture
width, height = image.size
if right_x > width:
return results
partial = image.crop((left_x, left_y, right_x, right_y))
# rescale image to 28 *28 dimension
partial = partial.resize((28,28), Image.ANTIALIAS)
partial.show()
# transform to vector
partial = ImageOps.invert(partial)
partial = np.asarray(partial, "float32")
partial = partial / 255.
partial[partial < 0.5] = 0.
# flatten image to 28*28 = 784 vector
num_pixels = partial.shape[0] * partial.shape[1]
partial = partial.reshape(num_pixels)
step = height // 10
# is there a number in this part of the image?
res, prop = self.predict_result(partial)
print("result: ", res, ". probabilities: ", prop)
# only count this result if the network is >= 50% sure
if prop[res] >= 0.5:
results.append(res)
# step is 80% of the partial image's size (which is equivalent to the original image's height)
step = int(height * 0.8)
print("found valid result")
else:
# if there is no number found we take smaller steps
step = height // 20
print("step: ", step)
# recursive call with modified positions ( move on step variables )
return self.partial_img_rec(image, (left_x+step, left_y), (right_x+step, right_y), results=results)
def test_individual_digits(self):
""" test partial_img_rec with some individual digits (square shaped images)
saved in the folder 'individual_test' following the pattern 'number_digit.jpg' """
cnt_right, cnt_wrong = 0,0
folder_content = os.listdir(".\individual_test")
for imageName in folder_content:
# image file must be a jpg or png
assert imageName[-4:] == ".jpg" or imageName[-4:] == ".png"
correct_res = int(imageName[0])
image = Image.open(".\\individual_test\\" + imageName).convert("L")
# only square images in this test
if image.size[0] != image.size[1]:
print(imageName, " has the wrong proportions: ", image.size,". It has to be a square.")
continue
predicted_res = self.partial_img_rec(image, (0,0), (image.size[0], image.size[1]), results=[])
if predicted_res == []:
print("No prediction possible for ", imageName)
else:
predicted_res = predicted_res[0]
if predicted_res != correct_res:
print("error in partial_img-rec! Predicted ", predicted_res, ". The correct result would have been ", correct_res)
cnt_wrong += 1
else:
cnt_right += 1
print("correctly predicted ",imageName)
print(cnt_right, " out of ", cnt_right + cnt_wrong," digits were correctly recognised. The success rate is therefore ", (cnt_right / (cnt_right + cnt_wrong)) * 100," %.")
def multiple_digits(self, img):
""" takes as input an image without unnecessary whitespace surrounding the digits """
#assert type(img) == myImage
width, height = img.size
# start with the first quadratic part of the image
res_list = self.partial_img_rec(img, (0,0),(height ,height))
res_str =""
for elem in res_list:
res_str += str(elem)
return res_str
network = mnist_network()
network.test_individual_digits()
EDIT
#Geecode's answer was very helpful and the network now predicts correctly some of the pictures including the one shown above. Yet the overall success rate is lower than 50%. Do you have any ideas how to improve this?
Examples for images returning bad results:
Nothing wrong with your image in itself, your model can correctly classify it.
The issue is that you made a Floor Division on your partial:
partial = partial // 255
which always results in 0. So you always get a black image.
You have to do a "normal" division and some preparation, because your model was trained on black i.e. 0. valued pixel backgrounded negative images:
# transform to vector
partial = ImageOps.invert(partial)
partial = np.asarray(partial, "float32")
partial = partial / 255.
partial[partial < 0.5] = 0.
After then your model will classify correctly:
Out:
result: 1 . probabilities: [0.000431705528171733, 0.7594985961914062, 0.0011404436081647873, 0.00018972357793245465, 0.03162384033203125, 0.008697531186044216, 0.0014472954208031297, 0.18429973721504211, 0.006838776171207428, 0.005832481198012829]
found valid result
Note, that of course you can play on the image preparation yet, that was not the purpose of this answer.
Update:
My detailed answer regarding how to achive better performance in this task, see here.

MXNet - application of GANs to MNIST

So this question is about GANs.
I am trying to do a trivial example for my own proof of concept; namely, generate images of hand written digits (MNIST). While most will approach this via deep convolutional gans (dgGANs), I am just trying to achieve this via the 1D array (i.e. instead of 28x28 gray-scale pixel values, a 28*28 1d array).
This git repo features a "vanilla" gans which treats the MNIST dataset as a 1d array of 784 values. Their output values look pretty acceptable so I wanted to do something similar.
Import statements
from __future__ import print_function
import matplotlib as mpl
from matplotlib import pyplot as plt
import mxnet as mx
from mxnet import nd, gluon, autograd
from mxnet.gluon import nn, utils
import numpy as np
import os
from math import floor
from random import random
import time
from datetime import datetime
import logging
ctx = mx.gpu()
np.random.seed(3)
Hyper parameters
batch_size = 100
epochs = 100
generator_learning_rate = 0.001
discriminator_learning_rate = 0.001
beta1 = 0.5
latent_z_size = 100
Load data
mnist = mx.test_utils.get_mnist()
# convert imgs to arrays
flattened_training_data = mnist["test_data"].reshape(10000, 28*28)
define models
G = nn.Sequential()
with G.name_scope():
G.add(nn.Dense(300, activation="relu"))
G.add(nn.Dense(28 * 28, activation="tanh"))
D = nn.Sequential()
with D.name_scope():
D.add(nn.Dense(128, activation="relu"))
D.add(nn.Dense(64, activation="relu"))
D.add(nn.Dense(32, activation="relu"))
D.add(nn.Dense(2, activation="tanh"))
loss = gluon.loss.SoftmaxCrossEntropyLoss()
init stuff
G.initialize(mx.init.Normal(0.02), ctx=ctx)
D.initialize(mx.init.Normal(0.02), ctx=ctx)
trainer_G = gluon.Trainer(G.collect_params(), 'adam', {"learning_rate": generator_learning_rate, "beta1": beta1})
trainer_D = gluon.Trainer(D.collect_params(), 'adam', {"learning_rate": discriminator_learning_rate, "beta1": beta1})
metric = mx.metric.Accuracy()
dynamic plot (for juptyer notebook)
import matplotlib.pyplot as plt
import time
def dynamic_line_plt(ax, y_data, colors=['r', 'b', 'g'], labels=['Line1', 'Line2', 'Line3']):
x_data = []
y_max = 0
y_min = 0
x_min = 0
x_max = 0
for y in y_data:
x_data.append(list(range(len(y))))
if max(y) > y_max:
y_max = max(y)
if min(y) < y_min:
y_min = min(y)
if len(y) > x_max:
x_max = len(y)
ax.set_ylim(y_min, y_max)
ax.set_xlim(x_min, x_max)
if ax.lines:
for i, line in enumerate(ax.lines):
line.set_xdata(x_data[i])
line.set_ydata(y_data[i])
else:
for i in range(len(y_data)):
l = ax.plot(x_data[i], y_data[i], colors[i], label=labels[i])
ax.legend()
fig.canvas.draw()
train
stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
logging.basicConfig(level=logging.DEBUG)
# arrays to store data for plotting
loss_D = nd.array([0], ctx=ctx)
loss_G = nd.array([0], ctx=ctx)
acc_d = nd.array([0], ctx=ctx)
labels = ['Discriminator Loss', 'Generator Loss', 'Discriminator Acc.']
%matplotlib notebook
fig, ax = plt.subplots(1, 1)
ax.set_xlabel('Time')
ax.set_ylabel('Loss')
dynamic_line_plt(ax, [loss_D.asnumpy(), loss_G.asnumpy(), acc_d.asnumpy()], labels=labels)
for epoch in range(epochs):
tic = time.time()
data_iter.reset()
for i, batch in enumerate(data_iter):
####################################
# Update Disriminator: maximize log(D(x)) + log(1-D(G(z)))
####################################
# extract batch of real data
data = batch.data[0].as_in_context(ctx)
# add noise
# Produce our noisey input to the generator
latent_z = mx.nd.random_normal(0,1,shape=(batch_size, latent_z_size), ctx=ctx)
# soft and noisy labels
# real_label = mx.nd.ones((batch_size, ), ctx=ctx) * nd.random_uniform(.7, 1.2, shape=(1)).asscalar()
# fake_label = mx.nd.ones((batch_size, ), ctx=ctx) * nd.random_uniform(0, .3, shape=(1)).asscalar()
# real_label = nd.random_uniform(.7, 1.2, shape=(batch_size), ctx=ctx)
# fake_label = nd.random_uniform(0, .3, shape=(batch_size), ctx=ctx)
real_label = mx.nd.ones((batch_size, ), ctx=ctx)
fake_label = mx.nd.zeros((batch_size, ), ctx=ctx)
with autograd.record():
# train with real data
real_output = D(data)
errD_real = loss(real_output, real_label)
# train with fake data
fake = G(latent_z)
fake_output = D(fake.detach())
errD_fake = loss(fake_output, fake_label)
errD = errD_real + errD_fake
errD.backward()
trainer_D.step(batch_size)
metric.update([real_label, ], [real_output,])
metric.update([fake_label, ], [fake_output,])
####################################
# Update Generator: maximize log(D(G(z)))
####################################
with autograd.record():
output = D(fake)
errG = loss(output, real_label)
errG.backward()
trainer_G.step(batch_size)
####
# Plot Loss
####
# append new data to arrays
loss_D = nd.concat(loss_D, nd.mean(errD), dim=0)
loss_G = nd.concat(loss_G, nd.mean(errG), dim=0)
name, acc = metric.get()
acc_d = nd.concat(acc_d, nd.array([acc], ctx=ctx), dim=0)
# plot array
dynamic_line_plt(ax, [loss_D.asnumpy(), loss_G.asnumpy(), acc_d.asnumpy()], labels=labels)
name, acc = metric.get()
metric.reset()
logging.info('Binary training acc at epoch %d: %s=%f' % (epoch, name, acc))
logging.info('time: %f' % (time.time() - tic))
output
img = G(mx.nd.random_normal(0,1,shape=(100, latent_z_size), ctx=ctx))[0].reshape((28, 28))
plt.imshow(img.asnumpy(),cmap='gray')
plt.show()
Now this doesn't get nearly as good as the repo's example from above. Although fairly similar.
Thus I was wondering if you could take a look and figure out why:
the colors are inverted
why the results are sub par
I have been fiddling around with this trying a lot of various things to improve the results (I will list this in a second), but for the MNIST dataset this really shouldn't be needed.
Things I have tried (and I have also tried a host of combinations):
increasing the generator network
increasing the discriminator network
using soft labeling
using noisy labeling
batch norm after every layer in the generator
batch norm of the data
normalizing all values between -1 and 1
leaky relus in the generator
drop out layers in the generator
increased learning rate of discriminator compared to generator
decreased learning rate of i compared to generator
Please let me know if you have any ideas.
1) If you look into original dataset:
training_set = mnist["train_data"].reshape(60000, 28, 28)
plt.imshow(training_set[10,:,:], cmap='gray')
you will notice that the digits are white on a black background. So, technically speaking, your results are not inversed - they match the pattern of original images you used as a real data.
If you want to invert colors for visualization purposes, you can easily do that by changing the pallete to reversed one by adding '_r' (it works for all color palletes):
plt.imshow(img.asnumpy(), cmap='gray_r')
You also can play with ranges of colors by changing vmin and vmax parameters. They control how big the difference between colors should be. By default it is calculated automatically based on provided set.
2) "Why the results are sub par" - I think this is exactly the reason why the community started to use dcGANs. To me the results in the git repo you provided are quite noisy. Surely, they are different from what you receive, and you can achieve the same quality just by changing your activation functions from tanh to sigmoid as in the example on github:
G = nn.Sequential()
with G.name_scope():
G.add(nn.Dense(300, activation="relu"))
G.add(nn.Dense(28 * 28, activation="sigmoid"))
D = nn.Sequential()
with D.name_scope():
D.add(nn.Dense(128, activation="relu"))
D.add(nn.Dense(64, activation="relu"))
D.add(nn.Dense(32, activation="relu"))
D.add(nn.Dense(2, activation="sigmoid"))
Sigmoid never goes below zero and it works better in this scenario. Here is a sample picture I get if I train updated model for 30 epochs (the rest of the hyperparameters are same).
If you decide to explore dcGAN to get even better results, take a look here - https://mxnet.incubator.apache.org/tutorials/unsupervised_learning/gan.html It is a well explained tutorial on how to build dcGAN with Mxnet and Gluon. By using dcGAN you will get way better results than that.

Tensorflow KNN : How can we assign the K parameter for defining number of neighbors in KNN?

I have started working on a machine learning project using K-Nearest-Neighbors method on python tensorflow library. I have no experience working with tensorflow tools, so I found some code in github and modified it for my data.
My dataset is like this:
2,2,2,2,0,0,3
2,2,2,2,0,1,0
2,2,2,4,2,2,1
...
2,2,2,4,2,0,0
And this is the code which actually works fine:
import tensorflow as tf
import numpy as np
# Whole dataset => 1428 samples
dataset = 'car-eval-data-1.csv'
# samples for train, remaining for test
samples = 1300
reader = np.loadtxt(open(dataset, "rb"), delimiter=",", skiprows=1, dtype=np.int32)
train_x, train_y = reader[:samples,:5], reader[:samples,6]
test_x, test_y = reader[samples:, :5], reader[samples:, 6]
# Placeholder you can assign values in future. its kind of a variable
# v = ("variable type",[None,4]) -- you can have multidimensional values here
training_values = tf.placeholder("float",[None,len(train_x[0])])
test_values = tf.placeholder("float",[len(train_x[0])])
# MANHATTAN distance
distance = tf.abs(tf.reduce_sum(tf.square(tf.subtract(training_values,test_values)),reduction_indices=1))
prediction = tf.arg_min(distance, 0)
init = tf.global_variables_initializer()
accuracy = 0.0
with tf.Session() as sess:
sess.run(init)
# Looping through the test set to compare against the training set
for i in range (len(test_x)):
# Tensor flow method to get the prediction near to the test parameters in the training set.
index_in_trainingset = sess.run(prediction, feed_dict={training_values:train_x,test_values:test_x[i]})
print("Test %d, and the prediction is %s, the real value is %s"%(i,train_y[index_in_trainingset],test_y[i]))
if train_y[index_in_trainingset] == test_y[i]:
# if prediction is right so accuracy increases.
accuracy += 1. / len(test_x)
print('Accuracy -> ', accuracy * 100, ' %')
The only thing I do not understand is that if it's the KNN method so there has to be some K parameter which defines the number of neighbors for predicting the label for each test sample.
How can we assign the K parameter to tune the number of nearest neighbors for the code?
Is there any way to modify this code to make use of K parameter?
You're right that the example above does not have the provision to select K-Nearest neighbours. In the code below, I have added the ability to add such a parameter(knn_size) along with other corrections
import tensorflow as tf
import numpy as np
# Whole dataset => 1428 samples
dataset = 'PATH_TO_DATASET_CSV'
knn_size = 1
# samples for train, remaining for test
samples = 1300
reader = np.loadtxt(open(dataset, "rb"), delimiter=",", skiprows=1, dtype=np.int32)
train_x, train_y = reader[:samples,:6], reader[:samples,6]
test_x, test_y = reader[samples:, :6], reader[samples:, 6]
# Placeholder you can assign values in future. its kind of a variable
# v = ("variable type",[None,4]) -- you can have multidimensional values here
training_values = tf.placeholder("float",[None, len(train_x[0])])
test_values = tf.placeholder("float",[len(train_x[0])])
# MANHATTAN distance
distance = tf.abs(tf.reduce_sum(tf.square(tf.subtract(training_values,test_values)),reduction_indices=1))
# Here, we multiply the distance by -1 to reverse the magnitude of distances, i.e. the largest distance becomes the smallest distance
# tf.nn.top_k returns the top k values and their indices, here k is controlled by the parameter knn_size
k_nearest_neighbour_values, k_nearest_neighbour_indices = tf.nn.top_k(tf.scalar_mul(-1,distance),k=knn_size)
#Based on the indices we obtain from the previous step, we locate the exact class label set of the k closest matches in the training data
best_training_labels = tf.gather(train_y,k_nearest_neighbour_indices)
if knn_size==1:
prediction = tf.squeeze(best_training_labels)
else:
# Now we make our prediction based on the class label that appears most frequently
# tf.unique_with_counts() gives us all unique values that appear in a 1-D tensor along with their indices and counts
values, indices, counts = tf.unique_with_counts(best_training_labels)
# This gives us the index of the class label that has repeated the most
max_count_index = tf.argmax(counts,0)
#Retrieve the required class label
prediction = tf.gather(values,max_count_index)
init = tf.global_variables_initializer()
accuracy = 0.0
with tf.Session() as sess:
sess.run(init)
# Looping through the test set to compare against the training set
for i in range (len(test_x)):
# Tensor flow method to get the prediction near to the test parameters in the training set.
prediction_value = sess.run([prediction], feed_dict={training_values:train_x,test_values:test_x[i]})
print("Test %d, and the prediction is %s, the real value is %s"%(i,prediction_value[0],test_y[i]))
if prediction_value[0] == test_y[i]:
# if prediction is right so accuracy increases.
accuracy += 1. / len(test_x)
print('Accuracy -> ', accuracy * 100, ' %')

Resources