I am new to the machine learning and TensorFlow. I am trying to train a simple model to recognize gender. I use small data-set of height, weight, and shoe size. However, I have encountered a problem with evaluating model's accuracy.
Here's the entire code:
import tflearn
import tensorflow as tf
import numpy as np
# [height, weight, shoe_size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
[190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42],
[181, 85, 43], [170, 52, 39]]
# 0 - for female, 1 - for male
Y = [1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0]
data = np.column_stack((X, Y))
np.random.shuffle(data)
# Split into train and test set
X_train, Y_train = data[:8, :3], data[:8, 3:]
X_test, Y_test = data[8:, :3], data[8:, 3:]
# Build neural network
net = tflearn.input_data(shape=[None, 3])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 1, activation='linear')
net = tflearn.regression(net, loss='mean_square')
# fix for tflearn with TensorFlow 12:
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
tf.add_to_collection(tf.GraphKeys.VARIABLES, x)
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(X_train, Y_train, n_epoch=100, show_metric=True)
score = model.evaluate(X_test, Y_test)
print('Training test score', score)
test_male = [176, 78, 42]
test_female = [170, 52, 38]
print('Test male: ', model.predict([test_male])[0])
print('Test female:', model.predict([test_female])[0])
Even though model's prediction is not very accurate
Test male: [0.7158362865447998]
Test female: [0.4076206684112549]
The model.evaluate(X_test, Y_test) always returns 1.0. How do I calculate real accuracy on the test data-set using TFLearn?
You want to do binary classification in this case. Your network is set to perform linear regression.
First, transform the labels (gender) to categorical features:
from tflearn.data_utils import to_categorical
Y_train = to_categorical(Y_train, nb_classes=2)
Y_test = to_categorical(Y_test, nb_classes=2)
The output layer of your network needs two output units for the two classes you want to predict. Also the activation needs to be softmax for classification. The tf.learn default loss is cross-entropy and the default metric is accuracy, so this is already correct.
# Build neural network
net = tflearn.input_data(shape=[None, 3])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
The output will now be a vector with the probability for each gender. For example:
[0.991, 0.009] #female
Bear in mind that you will hopelessly overfit the network with your tiny data set. This means that during training the accuracy will approach 1 while, the accuracy on your test set will be quite poor.
Related
I am trying to tune the hyperparameters of MLP sequential model but getting an error while performing this task. I have tried degrading/upgrading the scikit-learn version and using np.asarray(X).astype(np.int) and np.asarray(X).astype(np.float32) but still getting the error. Please someone help me with how to fix this error. Thanks.
Error after using np.asarray(X).astype(np.int/float32)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-184-8cee47d11b3d> in <module>
1 x_norm_train=np.asarray(x_norm_train).astype(np.float32)
2
----> 3 y_train=np.asarray(y_train).astype(np.float32)
TypeError: float() argument must be a string or a number, not 'Timestamp'
Below is the code:
def mlp_tune():
def create_model(layers, activation, optimizer):
model = Sequential()
for i, nodes in enumerate(layers):
if i==0:
model.add(Dense(nodes, input_dim = x_norm_train.shape[1]))
model.add(Activation(activation))
else:
model.add(Dense(nodes))
model.add(Activation(activation))
model.add(Dense(1, activation = 'linear')) # Note: no activation beyond this point
model.compile(optimizer = optimizer, loss='mse')
# optimizers.Adam(learning_rate = rate, beta_1 = 0.9, \
# beta_2 = 0.999, amsgrad=False)
return model
model = KerasRegressor(build_fn = create_model, verbose=1)
# specifying layer architecture
optimizer = ['adam', 'rmsprop', 'sgd','adagrad', 'adadelta']
layers = [(3,), (10,), (30,), (10, 10), (10, 20), (20, 20), \
(30, 30), (10, 10, 10), (20, 20, 20), \
(30, 30, 30), (10, 20, 30), (20, 20, 30)]
activations = ['relu', 'tanh', 'sigmoid']
param_grid = dict(layers=layers, optimizer = optimizer, activation=activations, \
batch_size = [10, 50, 100], epochs=[10, 50])
grid = GridSearchCV(estimator = model, param_grid = param_grid,\
scoring='neg_mean_squared_error')
grid_result = grid.fit(x_norm_train, y_train)
[grid_result.best_score_, grid_result.best_params_]
testPredict = grid.predict(x_norm_test)
# model evaluation
print()
print(mean_squared_error(y_test, testPredict))
print()
# list all the data in history
print(history.history.keys())
# summarize history for accuracy
plt.figure(figsize=(12, 8))
plt.plot(grid_result.history['mean_squared_error'])
plt.plot(grid_result.history['val_mean_squared_error'])
plt.title('MLP Model Accuracy (After Hyperparameter tuning)', fontsize=18, y=1.03)
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig("4 mlp model accuracy after tuning.png", dpi=300)
plt.show()
# summarize history for loss
plt.figure(figsize = (12, 8))
plt.plot(grid_result.history['loss'])
plt.plot(grid_result.history['val_loss'])
plt.title('MLP Model Loss (After Hyperparameter tuning)', fontsize=18, y=1.03)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig("5 mlp model loss after tuning.png", dpi=300)
plt.show()
# prepare data for plotting
y = y_test[:]
y.reset_index(inplace=True)
y.drop(['index'], axis = 1, inplace=True)
# plotting the results
sns.set_context('notebook', font_scale= 1.5)
plt.figure(figsize=(20, 10))
plt.plot(y['surge'])
plt.plot(testPredict, color= 'red')
plt.legend(['Observed Surge', 'Predicted Surge'],fontsize = 14)
plt.ylabel('Surge Height (m)')
plt.title("Observed vs. Predicted Storm Surge Height", fontsize=20, y=1.03)
plt.savefig("6 mlp observed vs predicted surge height (after tuning).png", dpi=300)
plt.show()
Error
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).
The error may be faulty data pre processing;make sure that everything is properly formatted.
Below shows what the model expects as inputs:
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
Pass the data to the model as the model expects. Thank You.
So i have binary classification problem for image, there are balanced dataset for class a and b.
I have 307 images for each class. i want to ask, when i split to train and test dataset, should the train and test also balanced for each class? or any method to split the dataset
You can use sklearn.model_selection.StratifiedShuffleSplit which uses stratified random sampling, proportional random sampling, or quota random sampling. This will give a better distribution.
https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedShuffleSplit.html
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
# dummy dataset
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([0, 0, 0, 1, 1, 1])
sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
sss.get_n_splits(X, y)
print(sss)
for train_index, test_index in sss.split(X, y):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
307 can be low for a CNN, you can also use data augmentation to increase your samples.
https://github.com/mdbloice/Augmentor
I'm working on a machine learning model, I have a dataframe with the data
I normalize the data with a standard distribution
scaler = StandardScaler()
df = scaler.fit_transform(df)
I divide the datasets into target and characteristics
X_df = df[X_characteristics_list]
y_df = df[target]
I split into train and test then I train the model
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size = 0.25)
forest = RandomForestRegressor()
forest.fit(X_train, y_train)
I predict the test to validate the effectiveness
y_test_pred = forest.predict(X2_test)
mse = mean_squared_error(y_test, y_test_pred)
But when is time to test in real life I need to leave the model ready to predict
If i Want to predict just one record
let say [100,20,34]
I can't because I need the record standardized, and transform it with StandardScaler does not work because it depends on standard deviation so I would need the original dataset
What's the best way to solve this problem.
See below:
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.preprocessing import StandardScaler
# Create our input and output matrices
>>> X, y = make_classification()
# Split train-test... "test" will be production/unobserved/"real-life" data
>>> X_train, X_test, y_train, y_test = train_test_split(X, y)
# What does X_train look like?
>>> X_train
array([[-0.08930702, -2.71113991, -0.93849926, ..., 0.21650905,
0.68952722, 0.61365789],
[-0.31143977, -1.87817904, 0.08287492, ..., -0.41332943,
-0.58967179, 1.7239411 ],
[-1.62287589, 1.10691318, -0.630556 , ..., -0.35060008,
1.11270562, 0.08106694],
...,
[-0.59797041, 0.90218081, 0.89983074, ..., -0.54374315,
1.18534841, -0.03397969],
[-1.2006559 , 1.01890955, -1.21617181, ..., 1.76263322,
1.38280423, -1.0192972 ],
[ 0.11883425, 1.42952643, -1.23647358, ..., 1.02509208,
-1.14308885, 0.72096531]])
# Let's scale it
>>> scaler = StandardScaler()
>>> X_train = scaler.fit_transform(X_train)
>>> X_train
array([[ 0.08867642, -1.97950269, -1.1214106 , ..., 0.22075623,
0.57844552, 0.46487917],
[-0.10736984, -1.34896243, 0.00808597, ..., -0.37670234,
-0.6045418 , 1.57819736],
[-1.26479555, 0.91071257, -0.78086855, ..., -0.3171979 ,
0.96979563, -0.06916763],
...,
[-0.36025134, 0.7557329 , 0.91152449, ..., -0.50041152,
1.03697478, -0.18452874],
[-0.89215959, 0.84409499, -1.42847749, ..., 1.68739437,
1.21957946, -1.17253964],
[ 0.27237431, 1.15492649, -1.4509284 , ..., 0.98777012,
-1.116335 , 0.57247992]])
# Fit the model
>>> model = LogisticRegression()
>>> model.fit(X_train, y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, l1_ratio=None, max_iter=100,
multi_class='auto', n_jobs=None, penalty='l2',
random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
warm_start=False)
# Now let's use the already-fitted StandardScaler object to simply transform
# *not fit_transform* the test data
>>> X_test = scaler.transform(X_test)
>>> model.predict(X_test)
array([1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0])
Note that using joblib or pickle you can save the scaler object and re-load it for scaling in "real-time" later on.
I'm using an MLPClassifier for classification of heart diseases. I used imblearn.SMOTE to balance the objects of each class. I was getting very good results (85% balanced acc.), but i was advised that i would not use SMOTE on test data, only for train data. After i made this changes, the performance of my classifier fell down too much (~35% balanced accuracy) and i don't know what can be wrong.
Here is a simple benchmark with training data balanced but test data unbalanced:
And this is the code:
def makeOverSamplesSMOTE(X,y):
from imblearn.over_sampling import SMOTE
sm = SMOTE(sampling_strategy='all')
X, y = sm.fit_sample(X, y)
return X,y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
## Normalize data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)
## SMOTE only on training data
X_train, y_train = makeOverSamplesSMOTE(X_train, y_train)
clf = MLPClassifier(hidden_layer_sizes=(20),verbose=10,
learning_rate_init=0.5, max_iter=2000,
activation='logistic', solver='sgd', shuffle=True, random_state=30)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
I'd like to know what i'm doing wrong, since this seems to be the proper way of preparing data.
The first mistake in your code is when you are transforming data into standard format. You only need to fit StandardScaler once and that is on X_train. You shouldn't refit it on X_test. So the correct code will be:
def makeOverSamplesSMOTE(X,y):
from imblearn.over_sampling import SMOTE
sm = SMOTE(sampling_strategy='all')
X, y = sm.fit_sample(X, y)
return X,y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
## Normalize data
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
## SMOTE only on training data
X_train, y_train = makeOverSamplesSMOTE(X_train, y_train)
clf = MLPClassifier(hidden_layer_sizes=(20),verbose=10,
learning_rate_init=0.5, max_iter=2000,
activation='logistic', solver='sgd', shuffle=True, random_state=30)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
For the machine learning model, try reducing the learning rate. it is too high. the default learning rate in sklearn is 0.001. Try changing the activation function and the number of layers. Also not every ML model works on every dataset so you might need to look at your data and choose ML model accordingly.
Hope you have already got better result for your model.I tried by changing few parameter, and I getting accuracy of 65%, when I change it to 90:10 sample I got an accuracy of 70%.
But accuracy can mislead,so I calculated F1 score which give you better picture of prediction.
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(1,),verbose=False,
learning_rate_init=0.001,
max_iter=2000,
activation='logistic', solver='sgd', shuffle=True, random_state=50)
clf.fit(X_train_res, y_train_res)
y_pred = clf.predict(X_test)
from sklearn.metrics import accuracy_score, confusion_matrix ,classification_report
score=accuracy_score(y_test, y_pred, )
print(score)
cr=classification_report(y_test, clf.predict(X_test))
print(cr)
Accuracy = 0.65
classification report :
precision recall f1-score support
0 0.82 0.97 0.89 33
1 0.67 0.31 0.42 13
2 0.00 0.00 0.00 6
3 0.00 0.00 0.00 4
4 0.29 0.80 0.42 5
micro avg 0.66 0.66 0.66 61
macro avg 0.35 0.42 0.35 61
weighted avg 0.61 0.66 0.61 61
confusion_matrix:
array([[32, 0, 0, 0, 1],
[ 4, 4, 2, 0, 3],
[ 1, 1, 0, 0, 4],
[ 1, 1, 0, 0, 2],
[ 1, 0, 0, 0, 4]], dtype=int64)
I have trained a linear classifier on the MNIST dataset with 92% accuracy. Then I fixed the weights and optimized the input image such that softmax probability for 8 was maximized. But the softmax loss doesn't decrease below 2.302 (-log(1/10)) which means that my training has been useless. What am I doing wrong?
Code for training the weights:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels,
mnist.test.images, mnist.test.labels
X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])
w = tf.Variable(tf.random_normal([784, 10], stddev=0.01))
b = tf.Variable(tf.zeros([10]))
o = tf.nn.sigmoid(tf.matmul(X, w)+b)
cost= tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=o, labels=Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(o, 1)
sess=tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(100):
for start, end in zip(range(0, len(trX), 256), range(256, len(trX)+1, 256)):
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
print(i, np.mean(np.argmax(teY, axis=1) == sess.run(predict_op, feed_dict={X: teX})))
Code for training the image for fixed weights:
#Copy trained weights into W,B and pass them as placeholders to new model
W=sess.run(w)
B=sess.run(b)
X=tf.Variable(tf.random_normal([1, 784], stddev=0.01))
Y=tf.constant([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
w=tf.placeholder("float")
b=tf.placeholder("float")
o = tf.nn.sigmoid(tf.matmul(X, w)+b)
cost= tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=o, labels=Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(o, 1)
sess.run(tf.global_variables_initializer())
for i in range(1000):
sess.run(train_op, feed_dict={w:W, b:B})
if i%50==0:
sess.run(cost, feed_dict={w:W, b:B})
print(i, sess.run(predict_op, feed_dict={w:W, b:B}))
You shouldn't call tf.sigmoid on the output of your net. softmax_cross_entropy_with_logits assumes your inputs are logits, i.e. unconstrained real numbers. Using
o = tf.matmul(X, w)+b
increases your accuracy to 92.8%.
With this modification, your second training works. The cost reaches 0 although the resulting image is anything but appealing.