how to let the minimize function understand my neural network model?

how to let the minimize function understand my neural network model? - scipy-optimize-minimize

I have a trained neural network model by keras, It is a regression problem, where I am trying to predict 1 output variable using some 16 input variables or features. As a next step, I would like to minimize my output and want to determine what configuration these 16 inputs would take to get to the minimal value of the output.
So, essentially, so in order to pass the trained model to be my objective function in the minimize function , I saved the model and call it , I keep receiving this error
ValueError: Input 0 of layer sequential_3 is incompatible with the layer: expected axis -1 of input shape to have value 16 but received input with shape [None, 1]
import keras
import tensorflow as tf
from setuptools.sandbox import save_path
tf.random.set_seed(7)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import seaborn as sb
sb.set(style="whitegrid")
# 2. read the csv file and add the names from another file
data = pd.read_csv("CBM.csv", names= ['Lever position', 'Ship speed',
'Gas Turbine shaft torque','Gas Turbine rate of revolutions','Gas Generator rate of revolutions',
'Starboard Propeller Torque','Port Propeller Torque','HP Turbine exit temperature','GT Compressor inlet air temperature',
'GT Compressor outlet air temperature','HP Turbine exit pressure','GT Compressor inlet air pressure',
'GT Compressor outlet air pressure','Gas Turbine exhaust gas pressure','Turbine Injecton Control',
'Fuel flow','GT Compressor decay state coefficient', 'GT Turbine decay state coefficient']
)
#-----------------------------------
#-------------------------------------------------------------------
# 3. Split the data to targets and features and transform to numpy array
targets = np.array(data[['GT Turbine decay state coefficient'])
print ("targets shape is \n",targets.shape)
features = data.drop(['GT Compressor decay state coefficient','GT Turbine decay state coefficient'], axis = 1)
"""
features = data.drop(['GT Compressor decay state coefficient','GT Compressor inlet air pressure',
'Fuel flow','Turbine Injecton Control','Gas Turbine exhaust gas pressure','Starboard Propeller Torque',
'Port Propeller Torque','Ship speed','Gas Turbine shaft torque',
'HP Turbine exit temperature','HP Turbine exit pressure','Gas Turbine rate of revolutions',
'GT Compressor inlet air temperature','GT Turbine decay state coefficient'], axis = 1)
"""
feature_list = list(features.columns) # to store the columns' names in a list for further useage
features = np.array(features[feature_list])
print ("features shape is \n",features.shape)
#----------------------------
# 4. normalize the data:
# a. some normalize all the features then split to train and test sets OR,
# b. others split the data to test and train sets then normalize the features sets
# import libraries for split
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(features)
X_train, X_test, Y_train, Y_test = train_test_split(X_scale, targets, test_size=0.25, random_state=1)
#X_train = min_max_scaler.fit_transform(Xs_train)
#X_test = min_max_scaler.fit_transform(Xs_test)
#print (Y_test.shape)
#plt.hist(data['GT Compressor decay state coefficient'])
"""
# try to draw the pair plot scatter
#x_label = feature_list[0:2]
#y_label = ['GT Compressor decay state coefficient','GT Turbine decay state coefficient']
#df = data[y_label + x_label]
# Pair plot
sb.pairplot(df, markers = '.')
plt.tight_layout()
plt.show()
"""
# 5. Build the neural model from keras library
# import libraries
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.losses import mean_absolute_percentage_error
from keras.losses import mean_squared_error
from keras.losses import mean_absolute_error
# define the keras model as a function
def get_model():
model = Sequential()
# add the the first layer, with the activation function and determine the number of inputs
model.add(Dense(12, activation = 'linear', input_dim = 16, kernel_initializer='uniform'))
# add hidden layers as much as needed
model.add(Dense(units=13, activation='sigmoid', kernel_initializer='uniform'))
# Adding the output layer, determine the number of outputs (in this case we have 2)
model.add(Dense(units=1, activation='linear'))
opt = Adam(lr=0.025)
model.compile(loss=mean_absolute_error, optimizer=opt, metrics=['mape'])
return model
# 6. Calculate the features importance (Permutation importance)
from sklearn.model_selection import cross_val_score
from keras.wrappers.scikit_learn import KerasRegressor
import eli5
from eli5.sklearn import PermutationImportance
estimator = KerasRegressor(build_fn=get_model,validation_split = 0.2, batch_size=100, epochs=1000)
history = estimator.fit(X_train, Y_train)
perm = PermutationImportance(estimator, random_state=1).fit(X_train,Y_train)
w = eli5.show_weights(perm, feature_names = feature_list)
result = pd.read_html(w.data)[0]# this to read the object
print(result)
# 7. Summarize the history loss per epoch
Y_pred = get_model().predict(X_test)
#------------------------------
model = get_model()
model.save("model")
#"""
from scipy.optimize import minimize
import scipy
my_model = keras.models.load_model("model")
def obj (params):
return my_model.predict(params)
s = [0]*16
guess_params= np.array([s,])
# Given a trained model, optimize the inputs to minimize the output.
optim_params = scipy.optimize.minimize(obj,guess_params,method='Nelder-Mead')

Related

After hyperparameter tuning accuracy remains the same

I was trying to hyper tune param but after I did it, the accuracy score has not changed at all, what I do wrong?
# Log reg
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C=0.3326530612244898,max_iter=100,tol=0.01)
logreg.fit(X_train,y_train)
from sklearn.metrics import confusion_matrix

y_pred = logreg.predict(X_test)

print('Accuracy of log reg is: ', logreg.score(X_test,y_test))

confusion_matrix(y_test,y_pred)
# 0.9181286549707602 - acurracy before tunning
Output:
Accuracy of log reg is: 0.9181286549707602
array([[ 54, 9],
[ 5, 103]])
Here is me Using Grid Search CV:
from sklearn.model_selection import GridSearchCV
params ={'tol':[0.01,0.001,0.0001],
'max_iter':[100,150,200],
'C':np.linspace(1,20)/10}
grid_model = GridSearchCV(logreg,param_grid=params,cv=5)
grid_model_result = grid_model.fit(X_train,y_train)
print(grid_model_result.best_score_,grid_model_result.best_params_)
Output:
0.8867405063291139 {'C': 0.3326530612244898, 'max_iter': 100, 'tol': 0.01}

The problem was that in the first chunk you evaluate the model's performance on the test set, while in the GridSearchCV you only looked at the performance on the training set after hyperparameter optimization.
The code below shows that both procedures, when used to predict the test set labels, perform equally well in terms of accuracy (~0.93).
Note, you might want to consider using a hyperparameter grid with other solvers and a larger range of max_iter because I obtained convergence warnings.
# Load packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
# Load the dataset and split in X and y
df = pd.read_csv('Breast_cancer_data.csv')
X = df.iloc[:, 0:5]
y = df.iloc[:, 5]
# Perform train and test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize a model
Log = LogisticRegression(n_jobs=-1)
# Initialize a parameter grid
params = [{'tol':[0.01,0.001,0.0001],
'max_iter':[100,150,200],
'C':np.linspace(1,20)/10}]
# Perform GridSearchCV and store the best parameters
grid_model = GridSearchCV(Log,param_grid=params,cv=5)
grid_model_result = grid_model.fit(X_train,y_train)
best_param = grid_model_result.best_params_
# This step is only to prove that both procedures actually result in the same accuracy score
Log2 = LogisticRegression(C=best_param['C'], max_iter=best_param['max_iter'], tol=best_param['tol'], n_jobs=-1)
Log2.fit(X_train, y_train)
# Perform two predictions one straight from the GridSearch and the other one with manually inputting the best params
y_pred1 = grid_model_result.best_estimator_.predict(X_test)
y_pred2 = Log2.predict(X_test)
# Compare the accuracy scores and see that both are the same
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred2))

AttributeError: 'DecisionTreeRegressor' object has no attribute 'save' in GCS

I was trying to deploy my custom DecisionTreeRegressor for house price prediction to GCS Vertex AI. The tutorial I followed was tutorial for MPG dataset tutorial
However, when I tried to build and test the container locally using commands:
docker build ./ -t $IMAGE_URI
docker run $IMAGE_URI
The error message came out:
AttributeError: 'DecisionTreeRegressor' object has no attribute 'save'
The code I run as train.py:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit
# Load the Boston housing dataset
data = pd.read_csv('trainer/housing.csv')
prices = data['MEDV']
features = data.drop('MEDV', axis = 1)
# Import 'train_test_split'
from sklearn.model_selection import train_test_split
# Shuffle and split the data into training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(features, prices, test_size=0.2, random_state = 42)
#Defining model fitting and tuning functions
# Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score # Import 'r2_score'
from sklearn.metrics import accuracy_score
# TODO: replace `your-gcs-bucket` with the name of the Storage bucket you created earlier
BUCKET = 'gs://gardena-dps-bucket'
def performance_metric(y_true, y_predict):
""" Calculates and returns the performance score between
true (y_true) and predicted (y_predict) values based on the metric chosen. """
score = r2_score(y_true, y_predict)
# Return the score
return score
def fit_model(X, y):
""" Performs grid search over the 'max_depth' parameter for a
decision tree regressor trained on the input data [X, y]. """
# Create cross-validation sets from the training data
cv_sets = ShuffleSplit(n_splits = 10, test_size = 0.20, random_state = 0)
# Create a decision tree regressor object
regressor = DecisionTreeRegressor()
# Create a dictionary for the parameter 'max_depth' with a range from 1 to 10
params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}
# Transform 'performance_metric' into a scoring function using 'make_scorer'
scoring_fnc = make_scorer(performance_metric)
# Create the grid search cv object --> GridSearchCV()
# Make sure to include the right parameters in the object:
# (estimator, param_grid, scoring, cv) which have values 'regressor', 'params', 'scoring_fnc', and 'cv_sets' respectively.
grid = GridSearchCV(estimator=regressor, param_grid=params, scoring=scoring_fnc, cv=cv_sets)
# Fit the grid search object to the data to compute the optimal model
grid = grid.fit(X, y)
# Return the optimal model after fitting the data
return grid.best_estimator_
# Fit the training data to the model using grid search
reg = fit_model(X_train, y_train)
# Produce a matrix for client data
client_data = [[12, 26.3, 16.99885]] # Client data in 2D array
# Show predictions
reprice = reg.predict(client_data).astype(int)
reprice
# Export model and save to GCS
reg.save(BUCKET + '/housing/model')

Scikit-learn estimators do not provide any method to save their states directly. From the Google documentation, the best way to store a fitted model to GCS is to use joblib to locally serialize your model and then upload it to GCS.
As follow:
from google.cloud import storage
from sklearn.externals import joblib
# Export the model to a file
model = 'model.joblib'
joblib.dump(pipeline, model)
# Upload the model to GCS
bucket = storage.Client().bucket(BUCKET_NAME)
blob = bucket.blob('{}/{}'.format(
datetime.datetime.now().strftime('model_%Y%m%d_%H%M%S'),
model))
blob.upload_from_filename(model)

Precision recall curve when results of estimator known

I have the results of an estimator running on X, as well as the ground truth, and I want to use plot_precision_recall_curve, but that requires passing in the estimator and X - which I can't do, the estimator is very complex and resides in another system... What should I do? (it would be nice to have a version of plot_precision_recall_curve that takes in y_pred and y_true ...).

You can use precision_recall_curve which accepts y_true and y_pred, and returns precision, recall, and thresholds, to be used further to find f1_score and auc, the latter can let you plot it manually.
This is an example:
# precision-recall curve and f1
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from matplotlib import pyplot
# generate 2 class dataset
X, y = make_classification(n_samples=1000, n_classes=2, random_state=1)
# split into train/test sets
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, random_state=2)
# fit a model
model = LogisticRegression(solver='lbfgs')
model.fit(trainX, trainy)
# predict probabilities
lr_probs = model.predict_proba(testX)
# keep probabilities for the positive outcome only
lr_probs = lr_probs[:, 1]
# predict class values
yhat = model.predict(testX)
lr_precision, lr_recall, _ = precision_recall_curve(testy, lr_probs)
lr_f1, lr_auc = f1_score(testy, yhat), auc(lr_recall, lr_precision)
# summarize scores
print('Logistic: f1=%.3f auc=%.3f' % (lr_f1, lr_auc))
# plot the precision-recall curves
no_skill = len(testy[testy==1]) / len(testy)
pyplot.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
pyplot.plot(lr_recall, lr_precision, marker='.', label='Logistic')
# axis labels
pyplot.xlabel('Recall')
pyplot.ylabel('Precision')
# show the legend
pyplot.legend()
# show the plot
pyplot.show()

increase accuracy of model in sklearn

The decision tree classification gives an accuracy of 0.52 but I want to increase the accuracy. How can I increase the accuracy by using any of the classification model available in sklearn.
I have used knn, decision tree, and cross-validation but all of them gives less accuracy.
Thanks
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
#read from the csv file and return a Pandas DataFrame.
nba = pd.read_csv('wine.csv')
# print the column names
original_headers = list(nba.columns.values)
print(original_headers)
#print the first three rows.
print(nba[0:3])
# "Position (pos)" is the class attribute we are predicting.
class_column = 'quality'
#The dataset contains attributes such as player name and team name.
#We know that they are not useful for classification and thus do not
#include them as features.
feature_columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH','sulphates', 'alcohol']
#Pandas DataFrame allows you to select columns.
#We use column selection to split the data into features and class.
nba_feature = nba[feature_columns]
nba_class = nba[class_column]
print(nba_feature[0:3])
print(list(nba_class[0:3]))
train_feature, test_feature, train_class, test_class = \
train_test_split(nba_feature, nba_class, stratify=nba_class, \
train_size=0.75, test_size=0.25)
training_accuracy = []
test_accuracy = []
knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=1)
knn.fit(train_feature, train_class)
prediction = knn.predict(test_feature)
print("Test set predictions:\n{}".format(prediction))
print("Test set accuracy: {:.2f}".format(knn.score(test_feature, test_class)))
train_class_df = pd.DataFrame(train_class,columns=[class_column])
train_data_df = pd.merge(train_class_df, train_feature, left_index=True, right_index=True)
train_data_df.to_csv('train_data.csv', index=False)
temp_df = pd.DataFrame(test_class,columns=[class_column])
temp_df['Predicted Pos']=pd.Series(prediction, index=temp_df.index)
test_data_df = pd.merge(temp_df, test_feature, left_index=True, right_index=True)
test_data_df.to_csv('test_data.csv', index=False)
tree = DecisionTreeClassifier(max_depth=4, random_state=0)
tree.fit(train_feature, train_class)
print("Training set score: {:.3f}".format(tree.score(train_feature, train_class)))
print("Test set score Decision: {:.3f}".format(tree.score(test_feature, test_class)))
prediction = tree.predict(test_feature)
print("Confusion matrix:")
print(pd.crosstab(test_class, prediction, rownames=['True'], colnames=['Predicted'], margins=True))
cancer = nba.as_matrix()
tree = DecisionTreeClassifier(max_depth=4, random_state=0)
scores = cross_val_score(tree, train_feature,train_class, cv=10)
print("Cross-validation scores: {}".format(scores))
print("Average cross-validation score: {:.2f}".format(scores.mean()))

Usually the next step after DT are RF (and it's neighbors) or XGBoost (but it's not sklearn). Try them. And DT are very simple to overfit.
Remove outliers. Check classes in your dataset: if they are unbalanced, most of errors may be there. In this case you need to use weights while fitting or in metric function (or use f1).
You can attach here your Confusion Matrix - could be great to see.
Also NN (even from sklearn) may show better results.

Improve your preprocessing.
Methods such as DT and kNN may be sensitive to how you preprocess your columns. For example, a DT can benefit much from well-chosen thresholds on the continuous variables.

Why different intermediate layer ouput of CNN in keras?

I am using this code to perform some experiment, I want to use intermediate layer representation of layer mainly before the fully connected layer(or last layer) of CNN.
from __future__ import print_function
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.datasets import imdb
# set parameters:
max_features = 5000
maxlen = 400
batch_size = 100
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 100
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Build model...')
model = Sequential()
# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
embedding_dims,
input_length=maxlen))
model.add(Dropout(0.2))
# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu',
strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())
# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))#<======== I need output after this.
# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
To get the intermediate layer representation of penultimate layer I used following code.
CODE1
get_layer_output = K.function([model.layers[0].input, K.learning_phase()],
[model.layers[6].output])
# output in test mode = 0
layer_output_test = get_layer_output([x_test, 0])[0]
# output in train mode = 1
layer_output_train = get_layer_output([x_train, 1])[0]
print(layer_output_train)
print(layer_output_train.shape)
CODE2
def get_activations(model, layer, X_batch):
get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer].output,])
activations = get_activations([X_batch,1])
return activations
import numpy as np
X_train=np.array(get_activations(model=model,layer=6, X_batch=x_train)[0], dtype=np.float32)
print(X_train)
print(X_train.shape)
Which one is correct as I am getting/printing different output for above two codes? I want to use the above correct output to multiply by weights and optimise by custom optimiser.

If you pass 1 to K.learning_phase() you will get different results every time. But both codes give the same result.

Using a higher level approach, you can do this:
from keras.models import Model
newModel = Model(model.inputs,model.layers[6].output)
Do whatever you want with newModel. You can train it (and affect the original model), and use it to predict values.

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

how to let the minimize function understand my neural network model? - scipy-optimize-minimize

Related

After hyperparameter tuning accuracy remains the same

AttributeError: 'DecisionTreeRegressor' object has no attribute 'save' in GCS

Precision recall curve when results of estimator known

increase accuracy of model in sklearn

Why different intermediate layer ouput of CNN in keras?

Categories

Resources