How to implement grid searchcv for onevsrestclassifier of SVC classifier - machine-learning

I want to do a grid search on OnevsRest Classifier and my model is SVC but it shows me the following error on using the grid search --how to resolve??
Code-
from sklearn.model_selection import GridSearchCV
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'kernel': ['rbf']}
svc_model_orc = OneVsRestClassifier(SVC())
grid = GridSearchCV(svc_model_orc, param_grid, refit = True, verbose = 3)
# fitting the model for grid search
grid.fit(X_train, y_train)
# svc_pred_train=grid.predict(X_train)
# svc_pred_test = grid.predict(X_valid)
# print(accuracy_score(y_train, svc_pred_train))
# print(f1_score(y_train, svc_pred_train, average='weighted'))
# print(accuracy_score(y_valid, svc_pred_test))
# print(f1_score(y_valid, svc_pred_test, average='weighted'))
Error-
ValueError: Invalid parameter C for estimator OneVsRestClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None,
coef0=0.0, decision_function_shape='ovr',
degree=3, gamma='auto_deprecated',
kernel='rbf', max_iter=-1, probability=False,
random_state=None, shrinking=True, tol=0.001,
verbose=False),
n_jobs=None). Check the list of available parameters with `estimator.get_params().keys()`.

Since you're performing a GridSearch over nested estimators (even though you just have one, OneVsRestClassifier fits a classifier per class), you need to define the parameters with the syntax estimator__some_parameter.
In the case of having nested objects, such as in pipelines for instance, this is the syntax GridSerach expects to access the different model's parameters, i.e. <component>__<parameter> . In such case, you'd name each model and then set their parameters as SVC__some_parameter for example for a SVC parameter. But for this case, the classifier is under estimator, note that the actual model is accessed through the estimator attribute:
print(svc_model_orc.estimator)
SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
So in this case, you should set the parameter grid as:
param_grid = {'estimator__C': [0.1, 1, 10, 100, 1000],
'estimator__gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'estimator__kernel': ['rbf']}

Related

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int)

I am trying to tune the hyperparameters of MLP sequential model but getting an error while performing this task. I have tried degrading/upgrading the scikit-learn version and using np.asarray(X).astype(np.int) and np.asarray(X).astype(np.float32) but still getting the error. Please someone help me with how to fix this error. Thanks.
Error after using np.asarray(X).astype(np.int/float32)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-184-8cee47d11b3d> in <module>
1 x_norm_train=np.asarray(x_norm_train).astype(np.float32)
2
----> 3 y_train=np.asarray(y_train).astype(np.float32)
TypeError: float() argument must be a string or a number, not 'Timestamp'
Below is the code:
def mlp_tune():
def create_model(layers, activation, optimizer):
model = Sequential()
for i, nodes in enumerate(layers):
if i==0:
model.add(Dense(nodes, input_dim = x_norm_train.shape[1]))
model.add(Activation(activation))
else:
model.add(Dense(nodes))
model.add(Activation(activation))
model.add(Dense(1, activation = 'linear')) # Note: no activation beyond this point
model.compile(optimizer = optimizer, loss='mse')
# optimizers.Adam(learning_rate = rate, beta_1 = 0.9, \
# beta_2 = 0.999, amsgrad=False)
return model
model = KerasRegressor(build_fn = create_model, verbose=1)
# specifying layer architecture
optimizer = ['adam', 'rmsprop', 'sgd','adagrad', 'adadelta']
layers = [(3,), (10,), (30,), (10, 10), (10, 20), (20, 20), \
(30, 30), (10, 10, 10), (20, 20, 20), \
(30, 30, 30), (10, 20, 30), (20, 20, 30)]
activations = ['relu', 'tanh', 'sigmoid']
param_grid = dict(layers=layers, optimizer = optimizer, activation=activations, \
batch_size = [10, 50, 100], epochs=[10, 50])
grid = GridSearchCV(estimator = model, param_grid = param_grid,\
scoring='neg_mean_squared_error')
grid_result = grid.fit(x_norm_train, y_train)
[grid_result.best_score_, grid_result.best_params_]
testPredict = grid.predict(x_norm_test)
# model evaluation
print()
print(mean_squared_error(y_test, testPredict))
print()
# list all the data in history
print(history.history.keys())
# summarize history for accuracy
plt.figure(figsize=(12, 8))
plt.plot(grid_result.history['mean_squared_error'])
plt.plot(grid_result.history['val_mean_squared_error'])
plt.title('MLP Model Accuracy (After Hyperparameter tuning)', fontsize=18, y=1.03)
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig("4 mlp model accuracy after tuning.png", dpi=300)
plt.show()
# summarize history for loss
plt.figure(figsize = (12, 8))
plt.plot(grid_result.history['loss'])
plt.plot(grid_result.history['val_loss'])
plt.title('MLP Model Loss (After Hyperparameter tuning)', fontsize=18, y=1.03)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='best')
plt.savefig("5 mlp model loss after tuning.png", dpi=300)
plt.show()
# prepare data for plotting
y = y_test[:]
y.reset_index(inplace=True)
y.drop(['index'], axis = 1, inplace=True)
# plotting the results
sns.set_context('notebook', font_scale= 1.5)
plt.figure(figsize=(20, 10))
plt.plot(y['surge'])
plt.plot(testPredict, color= 'red')
plt.legend(['Observed Surge', 'Predicted Surge'],fontsize = 14)
plt.ylabel('Surge Height (m)')
plt.title("Observed vs. Predicted Storm Surge Height", fontsize=20, y=1.03)
plt.savefig("6 mlp observed vs predicted surge height (after tuning).png", dpi=300)
plt.show()
Error
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).
The error may be faulty data pre processing;make sure that everything is properly formatted.
Below shows what the model expects as inputs:
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]
Pass the data to the model as the model expects. Thank You.

dask xgboost giving different answers compared to xgboost

I am running the same piece of code on Normal XGBoost and Dask XGBoost.
I am getting different probabilities from both models.
Normal XGBoost Code
params = {'objective': 'binary:logistic', 'nround': 1000,
'max_depth': 16, 'eta': 0.01, 'subsample': 0.5,
'min_child_weight': 1, 'tree_method': 'hist',
'grow_policy': 'lossguide'}
model = XGBClassifier(params=params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
Output:-Normal XGBoost Code Output
Dask XGBoost Code
params = {'objective': 'binary:logistic', 'nround': 1000,
'max_depth': 16, 'eta': 0.01, 'subsample': 0.5,
'min_child_weight': 1, 'tree_method': 'hist',
'grow_policy': 'lossguide'}
bst = dxgb.train(client, params, X_train, y_train)
predictions2 = dxgb.predict(client, bst, X_test).persist()
Output:-
Dask XGBoost Code Output
Can someone please help me here?

Combining Principal Component Analysis and Support Vector Machine in a pipeline

I want to combine PCA and SVM to a pipeline, to find the best combination of hyperparameters in a GridSearch.
The following code
from sklearn.svm import SVC
from sklearn import decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
digits = datasets.load_digits()
X_train = digits.data
y_train = digits.target
#Use Principal Component Analysis to reduce dimensionality
# and improve generalization
pca = decomposition.PCA()
# Use a linear SVC
svm = SVC()
# Combine PCA and SVC to a pipeline
pipe = Pipeline(steps=[('pca', pca), ('svm', svm)])
# Check the training time for the SVC
n_components = [20, 40, 64]
svm_grid = [
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
estimator = GridSearchCV(pipe,
dict(pca__n_components=n_components,
svm=svm_grid))
estimator.fit(X_train, y_train)
Results in an
AttributeError: 'dict' object has no attribute 'get_params'
There is probably something wrong with the way I define and use svm_grid. How can I pass this parameter combination to GridSearchCV correctly?
The problem was that when the GridSearchCV tried to give the estimator the parameters:
if parameters is not None:
estimator.set_params(**parameters)
the estimator here was a Pipeline object, not the actual svm because of the naming in your parameters grid.
I believe it should be like this:
from sklearn.svm import SVC
from sklearn import decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
digits = datasets.load_digits()
X_train = digits.data
y_train = digits.target
# Use Principal Component Analysis to reduce dimensionality
# and improve generalization
pca = decomposition.PCA()
# Use a linear SVC
svm = SVC()
# Combine PCA and SVC to a pipeline
pipe = Pipeline(steps=[('pca', pca), ('svm', svm)])
# Check the training time for the SVC
n_components = [20, 40, 64]
params_grid = {
'svm__C': [1, 10, 100, 1000],
'svm__kernel': ['linear', 'rbf'],
'svm__gamma': [0.001, 0.0001],
'pca__n_components': n_components,
}
estimator = GridSearchCV(pipe, params_grid)
estimator.fit(X_train, y_train)
print estimator.best_params_, estimator.best_score_
Output:
{'pca__n_components': 64, 'svm__C': 10, 'svm__kernel': 'rbf', 'svm__gamma': 0.001} 0.976071229827
Incorporating all of your parameters in params_grid and naming them correspondingly to the named steps.
Hope this helps! Good luck!

TFLearn model evaluation

I am new to the machine learning and TensorFlow. I am trying to train a simple model to recognize gender. I use small data-set of height, weight, and shoe size. However, I have encountered a problem with evaluating model's accuracy.
Here's the entire code:
import tflearn
import tensorflow as tf
import numpy as np
# [height, weight, shoe_size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
[190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42],
[181, 85, 43], [170, 52, 39]]
# 0 - for female, 1 - for male
Y = [1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0]
data = np.column_stack((X, Y))
np.random.shuffle(data)
# Split into train and test set
X_train, Y_train = data[:8, :3], data[:8, 3:]
X_test, Y_test = data[8:, :3], data[8:, 3:]
# Build neural network
net = tflearn.input_data(shape=[None, 3])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 1, activation='linear')
net = tflearn.regression(net, loss='mean_square')
# fix for tflearn with TensorFlow 12:
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
tf.add_to_collection(tf.GraphKeys.VARIABLES, x)
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(X_train, Y_train, n_epoch=100, show_metric=True)
score = model.evaluate(X_test, Y_test)
print('Training test score', score)
test_male = [176, 78, 42]
test_female = [170, 52, 38]
print('Test male: ', model.predict([test_male])[0])
print('Test female:', model.predict([test_female])[0])
Even though model's prediction is not very accurate
Test male: [0.7158362865447998]
Test female: [0.4076206684112549]
The model.evaluate(X_test, Y_test) always returns 1.0. How do I calculate real accuracy on the test data-set using TFLearn?
You want to do binary classification in this case. Your network is set to perform linear regression.
First, transform the labels (gender) to categorical features:
from tflearn.data_utils import to_categorical
Y_train = to_categorical(Y_train, nb_classes=2)
Y_test = to_categorical(Y_test, nb_classes=2)
The output layer of your network needs two output units for the two classes you want to predict. Also the activation needs to be softmax for classification. The tf.learn default loss is cross-entropy and the default metric is accuracy, so this is already correct.
# Build neural network
net = tflearn.input_data(shape=[None, 3])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net)
The output will now be a vector with the probability for each gender. For example:
[0.991, 0.009] #female
Bear in mind that you will hopelessly overfit the network with your tiny data set. This means that during training the accuracy will approach 1 while, the accuracy on your test set will be quite poor.

Cost-sensitive learning in Tensorflow

I am trying to set up a cost-sensitive binary classification learning in TensorFlow, which would put different penalties on false positives and false negatives. Does anyone know how to create a loss function from a set of penalty weights $(w_1, w_2, w_3, w_4)$ for (true positive, false positive, false negative, true negative).
I went over the standard cost functions offered, but can't figure out how to combine them to get something similar to the above.
Following #Cauchyzhou's answer, if you have the logits, and the sparse labels as well as a cost_matrix whose shape is [L, L], where L is the number of unique labels, you can simply use the function below to calculate the loss
def sparse_cost_sensitive_loss (logits, labels, cost_matrix):
batch_cost_matrix = tf.nn.embedding_lookup(cost_matrix, labels)
eps = 1e-6
probability = tf.clip_by_value(tf.nn.softmax(logits), eps, 1-eps)
cost_values = tf.log(1-probability)*batch_cost_matrix
loss = tf.reduce_mean(-tf.reduce_sum(cost_values, axis=1))
return loss
I am not aware of anyone who has built a cost sensitive neural network classifier but Alejandro Correa Bahnsen has published academic papers for cost sensitive logistic regression and cost sensitive decision trees and a very well documented python cost sensitive classification library named CostCla. CostCla is pretty easy to use if you are familiar with scikit-learn.
You should be able to use the Bayes minimum risk model in the library to minimize the cost of your neural network since it fits a cost model to output prediction probabilities of any classifier.
Note that CostCla is intended to work with potentially different costs for each sample. You give it a cost matrix for your training and test samples. However, you can just make all the rows in the cost matrix the same if that applies to your problem.
Here are a couple of additional academic papers on the subject:
The Foundations of Cost-Sensitive Learning
Optimal ROC Curve for a Combination of Classifiers
cost_matrix:
[[0,1,100],
[1,0,1],
[1,20,0]]
label:
[1,2]
y*:
[[0,1,0],
[0,0,1]]
y(prediction):
[[0.2,0.3,0.5],
[0.1,0.2,0.7]]
label,cost_matrix-->cost_embedding:
[[1,0,1],
[1,20,0]]
It obvious 0.3 in [0.2,0.3,0.5] refers to right lable probility of [0,1,0], so it should not contibute to loss.
0.7 in [0.1,0.2,0.7] is the same. In other words, the pos with value 1 in y* not contibute to loss.
So I have (1-y*):
[[1,0,1],
[1,1,0]]
Then the entropy is target*log(predict) + (1-target) * log(1-predict),and value 0 in y*,should use (1-target)*log(1-predict), so I use (1-predict) said (1-y)
1-y:
[[0.8,*0.7*,0.5],
[0.9,0.8,*0.3*]]
(italic num is useless)
the custom loss is
[[1,0,1], [1,20,0]] * log([[0.8,0.7,0.5],[0.9,0.8,0.3]]) *
[[1,0,1],[1,1,0]]
and you can see the (1-y*) can be drop here
so the loss is -tf.reduce_mean(cost_embedding*log(1-y))
,to make it applicable , should be:
-tf.reduce_mean(cost_embedding*log(tf.clip((1-y),1e-10)))
the demo is below
import tensorflow as tf
import numpy as np
hidden_units = 50
num_class = 3
class Model():
def __init__(self,name_scope,is_custom):
self.name_scope = name_scope
self.is_custom = is_custom
self.input_x = tf.placeholder(tf.float32,[None,hidden_units])
self.input_y = tf.placeholder(tf.int32,[None])
self.instantiate_weights()
self.logits = self.inference()
self.predictions = tf.argmax(self.logits,axis=1)
self.losses,self.train_op = self.opitmizer()
def instantiate_weights(self):
with tf.variable_scope(self.name_scope + 'FC'):
self.W = tf.get_variable('W',[hidden_units,num_class])
self.b = tf.get_variable('b',[num_class])
self.cost_matrix = tf.constant(
np.array([[0,1,100],[1,0,100],[20,5,0]]),
dtype = tf.float32
)
def inference(self):
return tf.matmul(self.input_x,self.W) + self.b
def opitmizer(self):
if not self.is_custom:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits\
(labels=self.input_y,logits=self.logits)
else:
batch_cost_matrix = tf.nn.embedding_lookup(
self.cost_matrix,self.input_y
)
loss = - tf.log(1 - tf.nn.softmax(self.logits))\
* batch_cost_matrix
train_op = tf.train.AdamOptimizer().minimize(loss)
return loss,train_op
import random
batch_size = 128
norm_model = Model('norm',False)
custom_model = Model('cost',True)
split_point = int(0.9 * dataset_size)
train_set = datasets[:split_point]
test_set = datasets[split_point:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_index = random.sample(range(split_point),batch_size)
train_batch = train_set[batch_index]
train_labels = lables[batch_index]
_,eval_predict,eval_loss = sess.run([norm_model.train_op,
norm_model.predictions,norm_model.losses],
feed_dict={
norm_model.input_x:train_batch,
norm_model.input_y:train_labels
})
_,eval_predict1,eval_loss1 = sess.run([custom_model.train_op,
custom_model.predictions,custom_model.losses],
feed_dict={
custom_model.input_x:train_batch,
custom_model.input_y:train_labels
})
# print '默认',eval_predict,'\n自定义',eval_predict1
print np.sum(((eval_predict == train_labels)==True).astype(np.int)),\
np.sum(((eval_predict1 == train_labels)==True).astype(np.int))
if i%10 == 0:
print '默认测试',sess.run(norm_model.predictions,
feed_dict={
norm_model.input_x:test_set,
norm_model.input_y:lables[split_point:]
})
print '自定义测试',sess.run(custom_model.predictions,
feed_dict={
custom_model.input_x:test_set,
custom_model.input_y:lables[split_point:]
})
Here is other solution where you can use any tensorflow loss and make it cost sensitive using kwarg weights ... note that unlike most cases here you need to use cost as '1' instead of '0' when you want to keep loss as it is ...
Some advantages of this approach are:
it extends tf.losses.Loss and satisfies the call api
reduction kwarg of the original loss remains functional and the behaviour is propagated to CostSensitiveLoss
you can also pass your own extra weights to new loss instances. Note that internally generated weights are used by wrapped self.loss
import numpy as np
from keras.api._v2 import keras as tk
import tensorflow as tf
from keras.utils import losses_utils
import typing as t
class CostSensitiveLoss(tk.losses.Loss):
def __init__(
self,
cost_matrix: t.List, loss: tk.losses.Loss,
):
super().__init__(reduction=loss.reduction, name=loss.name)
self.loss = loss
self.cost_matrix = cost_matrix
self._cost_matrix = tf.constant(cost_matrix, dtype=tf.float32)
#classmethod
def from_config(cls, config):
config['loss'] = tk.losses.deserialize(config['loss'])
return cls(**config)
def get_config(self):
return {
'cost_matrix': self.cost_matrix,
'loss': tk.losses.serialize(self.loss),
'reduction': self.reduction, 'name': self.name
}
def call(self, y_true, y_pred):
# if y_true is one hot encoded then get integer indices
if y_true.ndim == 1:
y_true_index = y_true
elif y_true.ndim == 2:
y_true_index = tf.argmax(y_true, axis=1)
else:
raise Exception(f"`y_true.ndim` {y_true.ndim} not supported")
# get cost for batch
cost_for_batch = tf.nn.embedding_lookup(self._cost_matrix, y_true_index)
cost_for_batch *= y_pred
cost_for_batch = tf.reduce_sum(cost_for_batch, axis=1)
# get loss
return self.loss(y_true, y_pred, cost_for_batch)
if __name__ == '__main__':
# for debug purpose I have kept 'none' you can
# safely use other options like 'sum', 'auto'
_loss = tk.losses.MeanAbsoluteError(reduction='none')
# some cost matrices the first cost matrix is the case when you are
# not using cost sensitive weights
_cs_loss_1 = CostSensitiveLoss(
cost_matrix=[[1, 1, 1], [1, 1, 1], [1, 1, 1], ],
loss=_loss
)
_cs_loss_2 = CostSensitiveLoss(
cost_matrix=[[1, 2, 2], [4, 1, 4], [8, 8, 1], ],
loss=_loss
)
_cs_loss_3 = CostSensitiveLoss(
cost_matrix=[[1, 4, 8], [2, 1, 8], [2, 4, 1], ],
loss=_loss
)
_y_true = np.asarray(
[
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
]
)
_y_pred = np.asarray(
[
[0.8, 0.1, 0.1],
[0.1, 0.8, 0.1],
[0.1, 0.1, 0.8],
[0.1, 0.8, 0.1],
[0.1, 0.1, 0.8],
[0.8, 0.1, 0.1],
[0.1, 0.1, 0.8],
[0.8, 0.1, 0.1],
[0.1, 0.8, 0.1],
]
)
print("loss ........................")
print(_loss(_y_true, _y_pred).numpy())
print("cs_loss_1 ...................")
print(_cs_loss_1(_y_true, _y_pred).numpy())
print("cs_loss_2 ...................")
print(_cs_loss_2(_y_true, _y_pred).numpy())
print("cs_loss_3 ...................")
print(_cs_loss_3(_y_true, _y_pred).numpy())

Resources