Pipeline deployment in Flask (python) - machine-learning

I'm trying to deploy my model built using Pipeline via Flask, however I'm facing the following Attribute error
'Can't get attribute 'FeatureSelector' on main' from 'app.py''
Here is my code for model.py:
(After loading the necessary libraries and reading the data, I have defined the class for my pipeline)
class FeatureSelector( BaseEstimator, TransformerMixin ):
#Class Constructor
def __init__( self, feature_names ):
self._feature_names = feature_names
#Return self nothing else to do here
def fit( self, X, y =None):
return self
#Method that describes what we need this transformer to do
def transform( self, X, y = None):
return X[ self._feature_names ]
LE = LabelEncoder()
class CategoricalTransformer( BaseEstimator, TransformerMixin ):
#Class constructor method that takes in a list of values as its argument
def __init__(self, cat_cols = ['Response', 'EmploymentStatus', 'Number of Open Complaints',
'Number of Policies', 'Policy Type', 'Renew Offer Type',
'Vehicle Class']):
self._cat_cols = cat_cols
#Return self nothing else to do here
def fit( self, X, y = None ):
return self
#Transformer method we wrote for this transformer
def transform(self, X , y = None ):
if self._cat_cols:
for i in X[cat_cols]:
X[i]= LE.fit_transform(X[i])
return X.values
class NumericalTransformer(BaseEstimator, TransformerMixin):
#Class Constructor
def __init__( self, MPA_log = True):
self._MPA_log = MPA_log
#Return self, nothing else to do here
def fit( self, X, y = None):
return self
#Custom transform method we wrote that creates aformentioned features and drops redundant ones
def transform(self, X, y = None):
if self._MPA_log:
X.loc[:,'MPA_log'] = np.log(X['Monthly Premium Auto'])
X.drop(['Monthly Premium Auto'], axis =1)
return X.values
I have created different pipelines for numerical and categorical festures. They have been combined using Feture Union in Full Pipeline.
full_pipeline = FeatureUnion( transformer_list = [ ( 'categorical_pipeline', categorical_pipeline ), ( 'numerical_pipeline', numerical_pipeline ) ] )
X = df.drop('Customer Lifetime Value', axis = 1)
y = df['Customer Lifetime Value']
y = np.log(y) #Transforming the y variable
full_pipeline_RF = Pipeline( steps = [('full_pipeline', full_pipeline),('model',
RandomForestRegressor(max_depth=21, min_samples_leaf= 8, random_state=0))])
full_pipeline_RF.fit(X, y)
# Saving model to disk
pickle.dump(full_pipeline_RF, open('model.pkl','wb'))
# Loading model to compare the results
model = pickle.load(open('model.pkl','rb'))
Model has been called in app.py file with the following code:
import numpy as np
from flask import Flask, request, jsonify, render_template
import pickle
app = Flask(__name__)
model = pickle.load(open('model.pkl', 'rb'))
#app.route('/')
def home():
return render_template('index.html')
#app.route('/predict',methods=['POST'])
def predict():
'''
For rendering results on HTML GUI
'''
int_features = [float(x) for x in request.form.values()]
final_features = [np.array(int_features)]
prediction = model.predict(final_features)
output = round(np.exp(prediction[0]),2)
return render_template('index.html', prediction_text='Customer Lifetime Value $ {}'.format(output))
if __name__ == "__main__":
app.run(debug=True)
The code works fine in Jupyter. Even while running in Spyder, it doesnt throw any errors. Please help me with this code, I'm stuck only on the execution bit.

This was actually simple. All i had to do was jn my app.py I had to pass the classes created during pipeline.
These are new classes customised for the case, hence those classes need to be pass.
Simply write 'pass' after every class is defined.

Related

ChildProcess close with all studio code 1

I was trying to deploy ml model using node_js with help of ChildProcess package ,while running __predict(), it is taking too long and end with code_1 error.
Here I share all related code to decode the issue :
Model python code -->
import keras
import time
start = time.time()
encoder = keras.models.load_model('enc', compile = False)
decoder = keras.models.load_model('dec', compile = False)
import numpy as np
from flask import Flask, request, jsonify , render_template
import tensorflow as tf
import pickle
import string
import re
from keras_preprocessing.sequence import pad_sequences
def initialize_hidden_state():
return tf.zeros((1, 1024))
eng_tokenizer , hin_tokenizer = pickle.load( open('tokenizer.pkl','rb'))
def clean(text):
text = text.lower()
special_char = set(string.punctuation+'।') # Set of all special characters
# Remove all the special characters
text = ''.join(word for word in text if word not in special_char)
seq = eng_tokenizer.texts_to_sequences([text])
seq = pad_sequences(seq, maxlen=23, padding='post')
return seq
def __predict(data):
# Get the data from the POST request.
#data = request.get_json(force=True)
clean_input = clean(data)
# Make prediction using model loaded from disk as per the data.
hidden_enc = initialize_hidden_state()
enc_out, enc_hidden = encoder(clean_input, hidden_enc)
result = ''
dec_hidden = enc_hidden
dec_input = tf.expand_dims(hin_tokenizer.texts_to_sequences(['<Start>'])[0], 0)
#------------------------------------------------------------------
for t in range(25):
predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)
predicted_id = tf.argmax(predictions[0]).numpy()
x = hin_tokenizer.sequences_to_texts([[predicted_id]])[0]
if x == 'end':
break
result += x + ' '
# the predicted ID is fed back into the model
dec_input = tf.expand_dims([predicted_id], 0)
CLEANR = re.compile(r"([A-Za-z])", re.DOTALL)
result = re.sub(CLEANR, '', result)
return result
# import json
# with open('data.json', 'r') as openfile:
# json_object = json.load(openfile).get('data')
data =__predict("file")
end= time.time()
# print(start-end)
data1 = data +"abcd"
print(data1)
# print("abcd")
# dictionary = {
# "data": data,
# }
# json_object = json.dumps(dictionary, indent=2)
# with open("result.json", "w") as outfile:
# outfile.write(json_object)
When I type print("abcd") or print(start-end), it is giving result ,ending with code_0. But when I type print("data") not giving any result and ending with code_1 .
Here is the childProcess code -->
app.get('/', (req, res) => {
let dataToSend
let largeDataSet = []
// spawn new child process to call the python script
const python = spawn('python', ['app.py'])
// console.log(python);
// collect data from script
python.stdout.on('data', function (data) {
console.log('Pipe data from python script ...')
//dataToSend = data;
largeDataSet.push(data)
})
// in close event we are sure that stream is from child process is closed
python.on('close', (code) => {
console.log(`child process close all stdio with code ${code}`)
// send data to browser
// largeDataSet = []
console.log(largeDataSet.join(''));
res.send(largeDataSet.join(''))
})
})
Here is the error --->
child process close all stdio with code 1
Pls help , I tried to understand the problem but failed severely even in understanding it.
Thanks in advance !!!

Saving an sklearn.svm.SVR model as JSON instead of pickling

I have a trained SVR model which needs to be saved in a JSON format instead of pickling.
The idea behind JSONifying the trained model is to simply capture the state of the weights and other 'fitted' attributes. Then, I can set these attributes later to make predictions. Here is an implementation of it I did:
# assume SVR has been trained
regressor = SVR()
regressor.fit(x_train, y_train)
# saving the regressor params in a JSON file for later retrieval
with open(f'saved_regressor_params.json', 'w', encoding='utf-8') as outfile:
json.dump(regressor.get_params(), outfile)
# finding the fitted attributes of SVR()
# if an attribute is trailed by '_', it's a fitted attribute
attrs = [i for i in dir(regressor) if i.endswith('_') and not i.endswith('__')]
remove_list = ['coef_', '_repr_html_', '_repr_mimebundle_'] # unnecessary attributes
for attr in remove_list:
if attr in attrs:
attrs.remove(attr)
# deserialize NumPy arrays and save trained attribute values into JSON file
attr_dict = {i: getattr(regressor, i) for i in attrs}
for k in attr_dict:
if isinstance(attr_dict[k], np.ndarray):
attr_dict[k] = attr_dict[k].tolist()
# dump JSON for prediction
with open(f'saved_regressor_{index}.json', 'w', encoding='utf-8') as outfile:
json.dump(attr_dict,
outfile,
separators=(',', ':'),
sort_keys=True,
indent=4)
This would create two separate json files. One file called saved_regressor_params.json which saves certain required parameters for SVR and another is called saved_regressor.json which stores attributes and their trained values as objects. Example (saved_regressor.json):
{
"_dual_coef_":[
[
-1.0,
-1.0,
-1.0,
]
],
"_intercept_":[
1.323423423
],
...
...
"_n_support_":[
3
]
}
Later, I can create a new SVR() model and simply set these parameters and attributes into it by calling them from the existing JSON files we just created. Then, call in the predict() method to predict. Like so (in a new file):
predict_svr = SVR()
#load the json from the files
obj_text = codecs.open('saved_regressor_params.json', 'r', encoding='utf-8').read()
params = json.loads(obj_text)
obj_text = codecs.open('saved_regressor.json', 'r', encoding='utf-8').read()
attributes = json.loads(obj_text)
#setting params
predict_svr.set_params(**params)
# setting attributes
for k in attributes:
if isinstance(attributes[k], list):
setattr(predict_svr, k, np.array(attributes[k]))
else:
setattr(predict_svr, k, attributes[k])
predict_svr.predict(...)
However, during this process, a particular attribute called: n_support_ cannot be set due to some reason. And even if I ignore n_support_ attribute, it creates additional errors. (Is my logic wrong or am I missing something here?)
Therefore, I am looking for different ways or ingenious methods to save an SVR model into JSON.
I have tried the existing third party helper libraries like: sklearn_json. These libraries tend to export perfectly for linear models but not for support vectors.
Making a reproducible example missing in the OP, based on the docs (version 1.1.2)
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
regressor = SVR(C=1.0, epsilon=0.2)
regressor.fit(X, y)
Then a sketch of the a JSON serialization/deserialization
import json
# serialize
serialized = json.dumps({
k: v.tolist() if isinstance(v, np.ndarray) else v
for k, v in regressor.__dict__.items()
})
# deserialize
regressor2 = SVR()
regressor2.__dict__ = {
k: np.asarray(v) if isinstance(v, list) else v
for k, v in json.loads(serialized).items()
}
# test
assert np.all(regressor.predict(X) == regressor2.predict(X))
EDIT: Serialization preserving data type
A not so elegant solution to address the first issue mentioned in a comment is to save the data type together with the data.
import json
# serialize
serialized = json.dumps({
k: [v.tolist(), 'np.ndarray', str(v.dtype)] if isinstance(v, np.ndarray) else v
for k, v in regressor.__dict__.items()
})
# deserialize
regressor2 = SVR()
regressor2.__dict__ = {
k: np.asarray(v[0], dtype=v[2]) if isinstance(v, list) and v[1] == 'np.ndarray' else v
for k, v in json.loads(serialized).items()
}
# test
assert np.all(regressor.predict(X) == regressor2.predict(X))

Custom dataset Loader pytorch

i am doing covid-19 classification.i took dataset from kaggle. it has folder named dataset which contain 3 folders normal pnuemonia and covid-19 each contaning images for these classes i am stucked in writting getitem in pytorch custom dataloader ?
Dataset has 189 covid images but by this get item i get 920 images of covid kindly help
class_names = ['normal', 'viral', 'covid']
root_dir = 'COVID-19 Radiography Database'
source_dirs = ['NORMAL', 'Viral Pneumonia', 'COVID-19']
if os.path.isdir(os.path.join(root_dir, source_dirs[1])):
os.mkdir(os.path.join(root_dir, 'test'))
for i, d in enumerate(source_dirs):
os.rename(os.path.join(root_dir, d), os.path.join(root_dir, class_names[i]))
for c in class_names:
os.mkdir(os.path.join(root_dir, 'test', c))
for c in class_names:
images = [x for x in os.listdir(os.path.join(root_dir, c)) if x.lower().endswith('png')]
selected_images = random.sample(images, 30)
for image in selected_images:
source_path = os.path.join(root_dir, c, image)
target_path = os.path.join(root_dir, 'test', c, image)
shutil.move(source_path, target_path)
Above code is used to create test dataset which has 30 images of each class
class ChestXRayDataset(torch.utils.data.Dataset):
def __init__(self, image_dirs, transform):
def get_images(class_name):
images = [x for x in os.listdir(image_dirs[class_name]) if
x[-3:].lower().endswith('png')]
print(f'Found {len(images)} {class_name} examples')
return images
self.images = {}
self.class_names = ['normal', 'viral', 'covid']
for class_name in self.class_names:
self.images[class_name] = get_images(class_name)
self.image_dirs = image_dirs
self.transform = transform
def __len__(self):
return sum([len(self.images[class_name]) for class_name in self.class_names])
def __getitem__(self, index):
class_name = random.choice(self.class_names)
index = index % len(self.images[class_name])
image_name = self.images[class_name][index]
image_path = os.path.join(self.image_dirs[class_name], image_name)
image = Image.open(image_path).convert('RGB')
return self.transform(image), self.class_names.index(class_name)
**Stucked in get item of this **
images in folder are arranged as follows
Dataset is as follows
**Code for confusion matrix is **
nb_classes = 3
confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
for data in tqdm_notebook(dl_train,total=len(dl_train),unit='batch'):
img,lab = data
print(lab)
img,lab = img.to(device),lab.to(device)
_,output = torch.max(model(img),1)
print(output)
for t, p in zip(lab.view(-1), output.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
output for confusion matrix only one class is getting trained
confusio matrix image
Putting you images in a dictionary complicates the manipulation, rather use a list. Also you Dataset should not have any randomness, shuffling of the data should happen from the DataLoader not from the Dataset.
Use something like below:
class ChestXRayDataset(torch.utils.data.Dataset):
def __init__(self, image_dirs, transform):
def get_images(class_name):
images = [x for x in os.listdir(image_dirs[class_name]) if
x[-3:].lower().endswith('png')]
print(f'Found {len(images)} {class_name} examples')
return images
self.images = []
self.labels = []
self.class_names = ['normal', 'viral', 'covid']
for class_name in self.class_names:
images = get_images(class_name)
# This is a list containing all the images
self.images.extend(images)
# This is a list containing all the corresponding image labels
self.labels.extend([class_name]*len(images))
self.image_dirs = image_dirs
self.transform = transform
def __len__(self):
return len(self.images)
# Will return the image and its label at the position `index`
def __getitem__(self, index):
# image at index position of all the images
image_name = self.images[index]
# Its label
class_name = self.labels[index]
image_path = os.path.join(self.image_dirs[class_name], image_name)
image = Image.open(image_path).convert('RGB')
return self.transform(image), self.class_names.index(class_name)
If you enumerate it say using
ds = ChestXRayDataset(image_dirs, transform)
for x, y in ds:
print (x.shape, y)
You should see all the images and the labels in the sequential order.
However in real case you would rather use a Torch DataLoader and pass it the ds object with shuffle parameter set to True. So the DataLoader will take care of shuffling the Dataset by calling the __getitem__ with shuffled index values.

PicklingError when running pipeline in sklearn

I am running the following pipeline in sklearn to perform grid search
def logistic():
sm = SMOTE()
poly = polynomial_transform()
stand = StandardScaler()
pca = PCA()
#I need to think about how I am training this...
logistic = LogisticRegression(max_iter=100, tol=0.01,solver = 'saga') #what is my binary scorer here
pipe = Pipeline(steps=[('smt', sm),('poly', poly),('standardise', stand),('pca', pca), ('logistic', logistic)],memory = mem)
# Parameters of pipelines can be set using ‘__’ separated parameter names:
param_grid = {
#'poly__degree': [1,2],
'pca__n_components':[50,100],
'logistic__C': [0.0001],
}
scorers = {
'precision_score': make_scorer(precision_score),
'recall_score': make_scorer(recall_score),
'accuracy_score': make_scorer(accuracy_score),
'f1_score': make_scorer(f1_score),
'tp': make_scorer(tp),
'tn': make_scorer(tn),
'fp': make_scorer(fp),
'fn': make_scorer(fn),
'ck': make_scorer(cohen_kappa_score)
}
#performing grid search
search = GridSearchCV(pipe, param_grid, n_jobs=-1,verbose=2,scoring= scorers,refit='accuracy_score')
search.fit(X_train.to_numpy(), y_train.to_numpy().ravel())
#results of cross validation grid search
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
return search
polynomial transform is a class I have created myself
class polynomial_transform(BaseEstimator):
def __init__(self,degree=None):
self.degree=degree
def fit(self,X,y=None):
return self
#Method that describes what we need this transformer to do
def transform( self, X, y = None ):
for i in range(1,self.degree):
X = np.hstack((X, X**i))
return X
def set_params(self, degree):
self.degree = degree
def get_params(self,deep=True):
return {'degree':self.degree}
When setting the memory parameter in the pipeline I am getting the following error
FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
_pickle.PicklingError: ("Can't pickle : it's not found as main.polynomial_transform"

OpenAI gym breakout-ram-v4 unable to learn

I am using Q learning and the program should be able to play the game after some tries but it is not learning even when the epsilon value if 0.1.
I have tried changing the batch size the memory size. I have changed the code to give -1 reward if the player dies.
import gym
import numpy as np
import random
import tensorflow as tf
import numpy as np
from time import time
import keyboard
import sys
import time
env = gym.make("Breakout-ram-v4")
observationSpace = env.observation_space
actionSpace= env.action_space
episode = 500
class Model_QNN :
def __init__(self):
self.memory = []
self.MAX_MEMORY_TO_USE = 60_000
self.gamma = 0.9
self.model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(128,1)),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(64,activation="relu"),
tf.keras.layers.Dense(actionSpace.n , activation= "softmax")
])
self.model.compile(optimizer="adam",loss="mse",metrics=["accuracy"])
def remember(self, steps , done):
self.memory.append([steps,done])
if(len(self.memory) >= self.MAX_MEMORY_TO_USE):
del self.memory[0]
def replay(self,batch_size= 32):
states, targets_f = [], []
if(len(self.memory)< batch_size) :
return
else:
mini = random.sample(self.memory,batch_size)
states ,targets = [], []
for steps , done in mini :
target= steps[2] ;
if not done :
target = steps[2] + (self.gamma* np.amax(self.model.predict(steps[3].reshape(1,128,1))[0]))
target_f = self.model.predict(steps[0].reshape(1,128,1))
target_f[0][steps[1]] = target
states.append(steps[0])
targets.append(target_f[0])
self.model.fit(np.array(states).reshape(len(states),128,1), np.array(targets),verbose=0,epochs=10)
def act(self,state,ep):
if(random.random()< ep):
action = actionSpace.sample()
else :
np.array([state]).shape
action= self.model.predict(state.reshape(1,128,1))
action = np.argmax(action)
return action;
def saveModel (self):
print("Saving")
self.model.save("NEWNAMEDONE")
def saveBackup(self,num):
self.model.save("NEWNAME"+str(int(num)))
def main():
agent= Model_QNN();
epsilon=0.9
t_end = time.time()
score= 0
for e in range(2000):
print("Working on episode : "+str(e)+" eps "+str(epsilon)+" Score " + str(score))
preState = env.reset()
preState,reward,done,_ = env.step(1)
mainLife=5
done = False
score= 0
icount = 0
render=False
if e % 400 ==0 and not e==0:
render =True
while not done:
icount+=1
if render:
env.render()
if keyboard.is_pressed('q'):
agent.saveBackup(100)
agent.saveModel()
quit()
rewrd=0
if ( _["ale.lives"] < mainLife ):
mainLife-=1
rewrd=-1
action=1
else:
action = agent.act(preState,epsilon)
newState,reward,done,_ = env.step(action)
if rewrd ==-1 :
reward =-1
agent.remember([preState/255,action,reward,newState/255],done);
preState= newState;
score+=reward
if done :
break
agent.replay(1024)
if epsilon >= 0.18 :
epsilon = epsilon * 0.995;
if ((e+1)%500==0):
agent.saveBackup((e+1)/20)
agent.saveModel()
if __name__=='__main__':
main()
There is no error message the program should learn and it is not
Why are you using Softmax on your output layer?
If you want to use Softmax use Cross-Entropy as your loss. However, it looks like you're trying to implement a value based learning system. The activation function on your output layer should be linear.
I suggest you try your implementation on Cartpole-v0 then LunarLanding-v2 first.
Those are solved environments and a great place to sanity check your code.
"There is no error message the program should learn and it is not."
Welcome to ML where things fail silently.

Resources