I am new to machine learning, and I am trying to handle Keras to perform regression tasks. I have implemented this code, based on this example.
X = df[['full_sq','floor','build_year','num_room','sub_area_2','sub_area_3','state_2.0','state_3.0','state_4.0']]
y = df['price_doc']
X = np.asarray(X)
y = np.asarray(y)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=.2)
def baseline_model():
model = Sequential()
model.add(Dense(13, input_dim=9, kernel_initializer='normal',
activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=100, verbose=False)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X_train, Y_train, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
prediction = estimator.predict(X_test)
accuracy_score(Y_test, prediction)
When I run the code I get this error:
AttributeError: 'KerasRegressor' object has no attribute 'model'
How could I correctly 'insert' the model in KerasRegressor?
you have to fit the estimator again after cross_val_score to evaluate on the new data:
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=100, verbose=False)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X_train, Y_train, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
estimator.fit(X, y)
prediction = estimator.predict(X_test)
accuracy_score(Y_test, prediction)
Working Test version:
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score, KFold
from keras.models import Sequential
from sklearn.metrics import accuracy_score
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
seed = 1
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
def baseline_model():
model = Sequential()
model.add(Dense(10, input_dim=10, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=100, verbose=False)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
estimator.fit(X, y)
prediction = estimator.predict(X)
accuracy_score(y, prediction)
For evaluation of your system performance, you can calculate the error like following.
You also do not need to call KFold and cross_val_score.
import numpy as np
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score, KFold
from keras.models import Sequential
from sklearn.metrics import accuracy_score
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
seed = 1
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
def baseline_model():
model = Sequential()
model.add(Dense(10, input_dim=10, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=100, verbose=False)
estimator.fit(X, y)
prediction = estimator.predict(X)
train_error = np.abs(y - prediction)
mean_error = np.mean(train_error)
min_error = np.min(train_error)
max_error = np.max(train_error)
std_error = np.std(train_error)
Instead of kerasRegressor, you can directly use model itself.
These two snippets of the code give the exact same results:
estimator = KerasRegressor(build_fn=baseline_model)
estimator.fit(X, y, nb_epoch=100, batch_size=100, verbose=False, shuffle=False)
prediction = estimator.predict(X)
model = baseline_model()
model.fit(X, y, nb_epoch=100, batch_size=100, verbose=False, shuffle=False)
prediction = model.predict(X)
Please note that the shuffle argument of fit() function for both kerasRegressor and model needs to be False. Moreover, for having the fixed initial state and obtain reproducible results, you need to add these lines of code at the beginning of your script:
session = K.get_session()
init_op = tf.group(tf.tables_initializer(),tf.global_variables_initializer(), tf.local_variables_initializer())
session.run(init_op)
np.random.seed(1)
tf.set_random_seed(1)
you should train model on X_train and y_train
you can not train model on X and y unless you should have extra data for testing
train should be in Train
then test/predict should be on X_test.
Related
I am trying to design a LSTM model for forecasting price movement.
I have issues regarding the results I obtain for my predictions. I did not normalize my target set y (nor train nor test), only X because it's a classification (-1,0,1) but the predictions I obtain are float.
Maybe I did not normalize the righ sets. My code is below :
Many thanks for you help and feel free to add comments other my other lines of code too I am a beginner.
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from datetime import datetime as dt
from pandas_datareader import data as pdr
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM
startdate=dt(2018,3,31)
enddate=dt(2022,3,31)
tickers = ['ETH-USD']
Data=pdr.get_data_yahoo(tickers,start=startdate, end=enddate)['Adj Close']
df_change = Data.apply(lambda x: np.log(x) - np.log(x.shift(1)))
df_change.drop(index=df_change.index[0], axis=0, inplace=True)
df_change = df_change*100
pd.options.mode.chained_assignment = None #to not display the error of copy dataframe
df_y = df_change.copy()
df_y.columns = ['ETH-y']
def Target(df,column,df2,column2):
for i in range(len(df)):
if df[column].iloc[i] > 0:
df2[column2][i] = 1 #value is up par rapport au jour d'avant
elif -0.5 < df[column].iloc[i] < 0.5 :
df2[column2][i] = 0 #value is steady
else:
df2[column2][i] = -1 #value is down
Target(df_change,'ETH-USD',df_y,'ETH-y')
print(df_y['ETH-y'].value_counts())
Data.drop(index=Data.index[0], axis=0, inplace=True) #drop first row to have same values
X = Data
y = df_y
## split my train val and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify = y)
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler().fit(X_train)
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
#reshaping for 3D array
X_train = np.reshape(X_train,(1169,1,1))
X_test = np.reshape(X_test,(293,1,1))
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(y_train.shape[1]))
model.compile(optimizer='adam', loss='mse')
model.summary()
history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1, verbose=1)
pred = model.predict(X_test)
pred = sc.inverse_transform(pred)
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
How should I draw the ROC and PR curves for this NN model which I am training with 10 fold cross-validation?
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from numpy import loadtxt
import numpy as np
import pandas as pd
from google.colab import files
uploaded = files.upload()
dataset = loadtxt('mod_dfn.csv', delimiter=',')
X = dataset[:,0:25]
y = dataset[:,25]
kfold = KFold(n_splits=10, shuffle=True)
fold_no = 1
for train, test in kfold.split(X, y):
model = Sequential()
model.add(Dense(12, input_dim=25, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('------------------------------------------------------------------------')
print(f'Training for fold {fold_no} ...')
history = model.fit(X[train], y[train], batch_size=10, epochs=150, verbose=0)
scores = model.evaluate(X[test], y[test], verbose=0)
print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])
fold_no = fold_no + 1
You can use RocCurveDisplay and PrecisionRecallDisplay for this purpose.
Try adding these lines just before last line of your code fold_no = fold_no + 1, and see if it works for you.
pred = model.predict(X[test])
# ROC curve
fpr, tpr, thresholds = metrics.roc_curve(y[test], pred)
roc_auc = metrics.auc(fpr, tpr)
roc_display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc)
roc_display.plot()
roc_display.figure_.savefig(f'./out/ROC_curve_for_fold#{fold_no}.jpeg')
# PR Curve
precision, recall, _ = metrics.precision_recall_curve(y[test], pred)
pr_display = metrics.PrecisionRecallDisplay(precision=precision, recall=recall)
pr_display.plot()
pr_display.figure_.savefig(f'./out/PR_curve_for_fold#{fold_no}.jpeg')
This original work is presented here
How to go about plotting the confusion matrix based of a CNN model?
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics
##Need to put this block of code in for cuDNN to initialize properly
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)
#------------------------------------------------------------------------------------------------------------------
num_rows = 40
num_columns = 174
num_channels = 1
x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)
num_labels = yy.shape[1]
filter_size = 2
# Construct model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())
model.add(Dense(num_labels, activation='softmax'))
then trained as:
from keras.callbacks import ModelCheckpoint
from datetime import datetime
#num_epochs = 12
#num_batch_size = 128
num_epochs = 72
num_batch_size = 256
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5',
verbose=1, save_best_only=True)
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)
I have been trying a few things, one of which is:
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
plot_confusion_matrix=(model(),x_test, y_test)
plt.plot(plot_confusion_matrix)
but I cannot get the confusion matrix to plot.
I also looked at tf.math.confusion_matrix(), but what is the labels and predictions as defined from the CNN model above??
The confusion matrix is a multi-classification.
Is
y_true = np.argmax(y_test, 1)??
and
y_pred = model.predict_classes(x_test)??
labels: 1-D Tensor of real labels for the classification task.
predictions: 1-D Tensor of predictions for a given classification.
As they say in official documentation , labels are the names of Output classes and predictions, However as they say everything has to be 1D tensor it means labels will be Ground truth for one instance and the corresponding indexed value in the Predictions will hold its predicted value.
So what you can do is, get the predictions and labels for each instances,in your code,you have passed the x_test and y_test which arent the supposed to be passed elements.
instead use model.predict to get the output labels.
y_predict=model.predict(x_test)
y_true=y_test
res = tf.math.confusion_matrix(y_true,y_predict)
This res is a 2D matrix now to print it you need to
plot_confusion_matrix(classifier, X_test, y_test,
display_labels=class_names,
cmap=plt.cm.Blues,
normalize=normalize)
Here put classifer = "model",not functional model().
Hope this helps,here are some more resources.
Here You can see the multiclass classification Confusion matrix technique.
Multiclass plot github function
Another custom plot function
i'm trying to capture long-term dependencies using LSTM, by creating a unit pulse signal every 62 points.
The idea is to go back 62 time-steps and copy the value for the next time-step, so as to predict the pulse, but lstm is not doing this...
import sys
import os
import numpy as np
import math
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
import tensorflow as tf
from tensorflow.python.client import device_lib
K.clear_session() #pulire eventuali sessioni precedenti (cosi i nomi dei layer ripartono da 0)
print(K.tensorflow_backend._get_available_gpus())
print(device_lib.list_local_devices())
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
sess = tf.Session(config=config)
K.set_session(sess)
# hyper-parametri
params = {
"batch_size": 20,
"epochs": 1000,
"time_steps": 70,
}
OUTPUT_PATH = "/home/..."
TIME_STEPS = params["time_steps"]
BATCH_SIZE = params["batch_size"]
def generate_impulse(dim):
arr = np.zeros(dim)
frequency = 62
for i in range(0, len(arr)):
if i % frequency == 0:
arr[i] = 1
return arr
y = generate_impulse(1300)
plt.figure(figsize=(20,5))
plt.plot(y)
plt.title('unit impulse')
plt.ylabel('y')
plt.xlabel('x')
plt.show()
dataset
def create_timeseries(arr):
# Costruzione time series univariata, predict di un single-step.
# Prende i primi TIME_STEPS valori come input e calcola il sin del valore TIME_STEPS+1
dim_0 = len(arr) - TIME_STEPS
x = np.zeros((dim_0, TIME_STEPS))
y = np.zeros((dim_0,))
for i in range(dim_0):
x[i] = arr[i:TIME_STEPS+i] #TIME_STEPS+i non compreso
y[i] = arr[TIME_STEPS+i]
#print(x[i], y[i])
print("length of time-series i/o",x.shape,y.shape)
return x, y
x_ts, y_ts = create_timeseries(y)
len_train = int(len(x_ts)*80/100)
len_val = int(len(x_ts)*10/100)
#DATASET DI TRAINING: 80%
x_train = x_ts[0:len_train]
y_train = y_ts[0:len_train]
#DATASET DI VALIDATION: 10%
x_val = x_ts[len_train:len_train+len_val]
y_val = y_ts[len_train:len_train+len_val]
#DATASET DI TEST 10%
x_test = x_ts[len_train+len_val:]
y_test = y_ts[len_train+len_val:]
x_train =x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_val =x_val.reshape((x_val.shape[0], x_val.shape[1], 1))
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)
def create_model():
model = Sequential()
model.add(LSTM(1, input_shape=(TIME_STEPS, 1)))
model.compile(optimizer='adam', loss='mse')
return model
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,
patience=50, min_delta=0.0001)
model = create_model()
history = model.fit(x_train, y_train, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE, shuffle=False,
validation_data=(x_val, y_val), callbacks=[es])
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('MSE LOSS')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
mse loss
y_pred = model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
error = mean_squared_error(y_test, y_pred)
plt.figure(figsize=(20,5))
plt.plot(y_pred)
plt.plot(y_test)
plt.title('PREDICTION ON TEST SET')
plt.ylabel('sin(x)')
plt.xlabel('x')
plt.legend(['Prediction', 'Real'], loc='upper left')
plt.show()
prediction test set
Training set give me the same results (it is the same signal..). I tried others LSTM models with more neurons but it doesn't work anyway.
You might consider training for more epochs. I created a simplified model and training set based on what I believe is the core of your idea:
from keras.models import Sequential
from keras.layers import LSTM
import numpy as np
TIME_STEPS=10
x_train = np.array([ [ [1],[0],[0],[0],[0],[0],[0],[0],[0],[0] ],
[ [0],[1],[0],[0],[0],[0],[0],[0],[0],[0] ],
[ [0],[0],[1],[0],[0],[0],[0],[0],[0],[0] ],
[ [0],[0],[0],[1],[0],[0],[0],[0],[0],[0] ],
[ [0],[0],[0],[0],[1],[0],[0],[0],[0],[0] ],
[ [0],[0],[0],[0],[0],[1],[0],[0],[0],[0] ],
[ [0],[0],[0],[0],[0],[0],[1],[0],[0],[0] ],
[ [0],[0],[0],[0],[0],[0],[0],[1],[0],[0] ],
[ [0],[0],[0],[0],[0],[0],[0],[0],[1],[0] ],
[ [0],[0],[0],[0],[0],[0],[0],[0],[0],[1] ]])
y_train = np.array([[1],[0],[0],[0],[0],[0],[0],[0],[0],[0]])
print(x_train.shape)
print(y_train.shape)
model = Sequential()
model.add(LSTM(1, input_shape=(TIME_STEPS,1)))
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
model.fit(x_train, y_train, epochs=10000, verbose=0)
After training, I get the following predictions:
model.predict(x_train)
array([[ 0.9870746 ],
[ 0.00665453],
[-0.00303702],
[ 0.00697759],
[-0.02432432],
[-0.00701594],
[ 0.01387464],
[ 0.02281112],
[ 0.00439195],
[-0.04109564]], dtype=float32)
I'm not sure if it solves your problem completely, but it might give you a suggested direction to investigate. I hope this helps.
I have finished a PyTorch MLP model for the MNIST dataset, but got two different results: 0.90+ accuracy when using MNIST dataset from PyTorch, but ~0.10 accuracy when using MNIST dataset from Keras.
Below is my code with dependency: PyTorch 0.3.0.post4, keras 2.1.3, tensorflow backend 1.4.1 gpu version.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch as pt
import torchvision as ptv
from keras.datasets import mnist
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
# training data from PyTorch
train_set = ptv.datasets.MNIST("./data/mnist/train", train=True, transform=ptv.transforms.ToTensor(), download=True)
test_set = ptv.datasets.MNIST("./data/mnist/test", train=False, transform=ptv.transforms.ToTensor(), download=True)
train_dataset = DataLoader(train_set, batch_size=100, shuffle=True)
test_dataset = DataLoader(test_set, batch_size=10000, shuffle=True)
class MLP(pt.nn.Module):
"""The Multi-layer perceptron"""
def __init__(self):
super(MLP, self).__init__()
self.fc1 = pt.nn.Linear(784, 512)
self.fc2 = pt.nn.Linear(512, 128)
self.fc3 = pt.nn.Linear(128, 10)
self.use_gpu = True
def forward(self, din):
din = din.view(-1, 28 * 28)
dout = F.relu(self.fc1(din))
dout = F.relu(self.fc2(dout))
# return F.softmax(self.fc3(dout))
return self.fc3(dout)
model = MLP().cuda()
print(model)
# loss func and optim
optimizer = pt.optim.SGD(model.parameters(), lr=1)
criterion = pt.nn.CrossEntropyLoss().cuda()
def evaluate_acc(pred, label):
pred = pred.cpu().data.numpy()
label = label.cpu().data.numpy()
test_np = (np.argmax(pred, 1) == label)
test_np = np.float32(test_np)
return np.mean(test_np)
def evaluate_loader(loader):
print("evaluating ...")
accurarcy_list = []
for i, (inputs, labels) in enumerate(loader):
inputs = pt.autograd.Variable(inputs).cuda()
labels = pt.autograd.Variable(labels).cuda()
outputs = model(inputs)
accurarcy_list.append(evaluate_acc(outputs, labels))
print(sum(accurarcy_list) / len(accurarcy_list))
def training(d, epochs):
for x in range(epochs):
for i, data in enumerate(d):
optimizer.zero_grad()
(inputs, labels) = data
inputs = pt.autograd.Variable(inputs).cuda()
labels = pt.autograd.Variable(labels).cuda()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if i % 200 == 0:
print(i, ":", evaluate_acc(outputs, labels))
# Training MLP for 4 epochs with MNIST dataset from PyTorch
training(train_dataset, 4)
# The accuracy is ~0.96.
evaluate_loader(test_dataset)
print("###########################################################")
def load_mnist():
(x, y), (x_test, y_test) = mnist.load_data()
x = x.reshape((-1, 1, 28, 28)).astype(np.float32)
x_test = x_test.reshape((-1, 1, 28, 28)).astype(np.float32)
y = y.astype(np.int64)
y_test = y_test.astype(np.int64)
print("x.shape", x.shape, "y.shape", y.shape,
"\nx_test.shape", x_test.shape, "y_test.shape", y_test.shape,
)
return x, y, x_test, y_test
class TMPDataset(Dataset):
"""Dateset for loading Keras MNIST dataset."""
def __init__(self, a, b):
self.x = a
self.y = b
def __getitem__(self, item):
return self.x[item], self.y[item]
def __len__(self):
return len(self.y)
x_train, y_train, x_test, y_test = load_mnist()
# Create dataloader for MNIST dataset from Keras.
test_loader = DataLoader(TMPDataset(x_test, y_test), num_workers=1, batch_size=10000)
train_loader = DataLoader(TMPDataset(x_train, y_train), shuffle=True, batch_size=100)
# Evaluate the performance of MLP trained on PyTorch dataset and the accurach is ~0.96.
evaluate_loader(test_loader)
evaluate_loader(train_loader)
model = MLP().cuda()
print(model)
optimizer = pt.optim.SGD(model.parameters(), lr=1)
criterion = pt.nn.CrossEntropyLoss().cuda()
# Train now on MNIST dataset from Keras.
training(train_loader, 4)
# Evaluate the trianed model on MNIST dataset from Keras and result in performance ~0.10...
evaluate_loader(test_loader)
evaluate_loader(train_loader)
I had checked some samples from Keras MNIST dataset and found no error.
I am wondering what is wrong with the datasets?
The code can run without error, run it to see the results.
The MNIST data coming from Keras are not normalized; following the Keras MNIST MLP example, you should do it manually, i.e. you should include the following in your load_data() function:
x /= 255
x_test /= 255
Not sure about PyTorch, but it would seem that the MNIST data from their own utility functions come already normalized (as is the case with Tensorflow - see the third point in my answer here).
A 10% accuracy (i.e. equivalent to random guessing) in case of not-normalized input data is perfectly consistent.