How can I use "if __name__ == '__main__':" in my code below? - spyder

After I run my code I get a warning as:
"RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if name == 'main':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable."
My question is that how and where I should add this line of code to avoid this warning/error in my code below:
from scipy import stats, optimize
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
#from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from theano import shared
np.random.seed(9)
#Load the Data
dataset = pd.read_csv(‘PV-PCM.csv’)
X=dataset.iloc[:,[0,1,2,3,4]].values
y=dataset.iloc[:,5].values
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size = 0.2, random_state=42)
#Shapes
X.shape, y.shape, X_tr.shape, X_te.shape
#Preprocess data for Modeling
shA_X = shared(X_tr)
#Generate Model
linear_model = pm.Model()
with linear_model:
# Priors for unknown model parameters
alpha = pm.Normal("alpha", mu=y_tr.mean(),sd=10)
betas = pm.Normal("betas", mu=0, sd=1000, shape=X.shape[1])
sigma = pm.HalfNormal("sigma", sd=100) # you could also try with a HalfCauchy that has longer/fatter tails
mu = alpha + pm.math.dot(betas, X_tr.T)
likelihood = pm.Normal("likelihood", mu=mu, sd=sigma, observed=y_tr)
step = pm.NUTS()
trace = pm.sample(1000, step)
chain = trace[100:]
#pm.traceplot(chain);
#Traceplot
pm.traceplot(trace)
ppc = pm.sample_prior_predictive(samples=1000, random_seed=9)
pm.plot_posterior(trace, figsize = (12, 10))
sns.kdeplot(y_tr, alpha=0.5, lw=4, c=‘b’)
for i in range(100):
sns.kdeplot(ppc[‘likelihood’][i], alpha=0.1, c=‘g’)
alpha_pred = chain[‘alpha’].mean()
betas_pred = chain[‘betas’].mean(axis=0)
y_pred = alpha_pred + np.dot(betas_pred, X_tr.T)
Thank you all.

Related

K Means Clustering with Python

FutureWarning: Unlike other reduction functions (e.g. skew,kurtosis), the default behavior of mode typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of keepdims will become False, the axis over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set keepdims to True or False to avoid this warning.
mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
This is my code and I am not being able to get read of this warning:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('Classified Data',index_col=0)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
from sklearn.model_selection import train_test_split
X = df_feat
y = df['TARGET CLASS']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train)
predictions = knn.predict(X_test)
The warning is showing up after running the last code line
enter image description here

PyTorch - RuntimeError: Error(s) in loading state_dict for VGG:

I've trained a model using PyTorch and saved a state dict file. I have loaded the pre-trained model using the code below. I am getting an error message regarding RuntimeError: Error(s) in loading state_dict for VGG:
RuntimeError: Error(s) in loading state_dict for VGG:
Missing key(s) in state_dict: "features.0.weight", "features.0.bias", "features.2.weight", "features.2.bias", "features.5.weight", "features.5.bias", "features.7.weight", "features.7.bias", "features.10.weight", "features.10.bias", "features.12.weight", "features.12.bias", "features.14.weight", "features.14.bias", "features.17.weight", "features.17.bias", "features.19.weight", "features.19.bias", "features.21.weight", "features.21.bias", "features.24.weight", "features.24.bias", "features.26.weight", "features.26.bias", "features.28.weight", "features.28.bias", "classifier.0.weight", "classifier.0.bias", "classifier.3.weight", "classifier.3.bias", "classifier.6.weight", "classifier.6.bias".
Unexpected key(s) in state_dict: "state_dict", "optimizer_state_dict", "globalStep", "train_paths", "test_paths".
I am following instruction available at this site: https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-across-devices
Many Thanks
import argparse
import datetime
import glob
import os
import random
import shutil
import time
from os.path import join
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import ToTensor
from tqdm import tqdm
import torch.optim as optim
from convnet3 import Convnet
from dataset2 import CellsDataset
from convnet3 import Convnet
from VGG import VGG
from dataset2 import CellsDataset
from torchvision import models
from Conv import Conv2d
parser = argparse.ArgumentParser('Predicting hits from pixels')
parser.add_argument('name',type=str,help='Name of experiment')
parser.add_argument('data_dir',type=str,help='Path to data directory containing images and gt.csv')
parser.add_argument('--weight_decay',type=float,default=0.0,help='Weight decay coefficient (something like 10^-5)')
parser.add_argument('--lr',type=float,default=0.0001,help='Learning rate')
args = parser.parse_args()
metadata = pd.read_csv(join(args.data_dir,'gt.csv'))
metadata.set_index('filename', inplace=True)
# create datasets:
dataset = CellsDataset(args.data_dir,transform=ToTensor(),return_filenames=True)
dataset = DataLoader(dataset,num_workers=4,pin_memory=True)
model_path = '/Users/nubstech/Documents/GitHub/CellCountingDirectCount/VGG_model_V1/checkpoints/checkpoint.pth'
class VGG(nn.Module):
def __init__(self, pretrained=True):
super(VGG, self).__init__()
vgg = models.vgg16(pretrained=pretrained)
# if pretrained:
vgg.load_state_dict(torch.load(model_path))
features = list(vgg.features.children())
self.features4 = nn.Sequential(*features[0:23])
self.de_pred = nn.Sequential(Conv2d(512, 128, 1, same_padding=True, NL='relu'),
Conv2d(128, 1, 1, same_padding=True, NL='relu'))
def forward(self, x):
x = self.features4(x)
x = self.de_pred(x)
return x
model=VGG()
#model.load_state_dict(torch.load(model_path),strict=False)
model.eval()
#optimizer = torch.optim.Adam(model.parameters(),lr=args.lr,weight_decay=args.weight_decay)
for images, paths in tqdm(dataset):
targets = torch.tensor([metadata['count'][os.path.split(path)[-1]] for path in paths]) # B
targets = targets.float()
# code to print training data to a csv file
#filename=CellsDataset(args.data_dir,transform=ToTensor(),return_filenames=True)
output = model(images) # B x 1 x 9 x 9 (analogous to a heatmap)
preds = output.sum(dim=[1,2,3]) # predicted cell counts (vector of length B)
print(preds)
paths_test = np.array([paths])
names_preds = np.hstack(paths)
print(names_preds)
df=pd.DataFrame({'Image_Name':names_preds, 'Target':targets.detach(), 'Prediction':preds.detach()})
print(df)
# save image name, targets, and predictions
df.to_csv(r'model.csv', index=False, mode='a')
Code for saving the state dict
torch.save({'state_dict':model.state_dict(),
'optimizer_state_dict':optimizer.state_dict(),
'globalStep':global_step,
'train_paths':dataset_train.files,
'test_paths':dataset_test.files},checkpoint_path)
The problem is that what is being saved is not the same as what is expected to be loaded. The code is trying to load only a state_dict; it is saving quite a bit more than that - looks like a state_dict inside another dict with additional info. The load method doesn't have any logic to look inside the dict.
This should work:
import torch, torchvision.models
model = torchvision.models.vgg16()
path = 'test.pth'
torch.save(model.state_dict(), path) # nothing else here
model.load_state_dict(torch.load(path))

Error in Machine Learning model into Flask Web Applications

I have created my machine learning model for heart disease prediction and now I want to deploy in my web application using FLASK. The Dataset as been acquired from Kaggle. Whenever I run the application I have some issues with my code whenever I execute it, it says:
C:\Users\Surface\Desktop\Flask_app>python app.py File "app.py", line 42
x_data = request.form['x_data']
^
IndentationError: unindent does not match any outer indentation level
Can anyone guide me Thankyou :)
from flask import Flask,render_template,url_for,request
import numpy as np
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
app = Flask(__name__)
#app.route('/')
def home():
return render_template('home.html')
#app.route('/predict',method=['POST'])
def predict():
df = pd.read_csv("heart.csv")
df = df.drop(columns = ['cp', 'thal', 'slope'])
#features and labels
y = df.target.values
x_data = df.drop(['target'], axis = 1)
#EXTRACT Features
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state=0)
# Random Forest Classification
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 1000, random_state = 1)
rf.fit(x_train.T, y_train.T)
print("Random Forest Algorithm Accuracy Score : {:.2f}%".format(rf.score(x_test.T,y_test.T)*100))
#persist model in a standard format
from sklearn.externals import joblib
joblib.dump(rf, 'HAP_model.pkl')
HAP_model = open('HAP_model.pkl','rb')
rf = joblib.load(HAP_model)
if request.method=='POST':
x_data = request.form['x_data']
data = [df.drop(['target'], axis = 1)]
vect = rf.transform(data).toarray()
my_prediction = rf.predict(vect)
return render_template('result.html',prediction = my_prediction)
if __name__ == '__main__':
app.run(debug=True)

Using an autoencoder to reduce dimensionality

Here is my version of an autoencoder written using PyTorch :
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics
import datetime
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
sns.set_style("darkgrid")
from ast import literal_eval
import numpy as np
from sklearn.preprocessing import scale
import seaborn as sns
sns.set_style("darkgrid")
import torch
%matplotlib inline
f = []
f.append(np.random.uniform(0,10,(1 , 10)).flatten())
f.append(np.random.uniform(10,20,(1 , 10)).flatten())
f.append(np.random.uniform(20,30,(1 , 10)).flatten())
x_data = torch.FloatTensor(np.array(f))
x_data
dimensions_input = 10
hidden_layer_nodes = 5
output_dimension = 10
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(dimensions_input,hidden_layer_nodes)
self.sigmoid = torch.nn.Sigmoid()
self.linear2 = torch.nn.Linear(hidden_layer_nodes,output_dimension)
def forward(self, x):
l_out1 = self.linear(x)
l_out2 = self.sigmoid(l_out1)
y_pred = self.linear2(l_out2)
return y_pred
model = Model()
criterion = torch.nn.MSELoss(size_average = False)
optim = torch.optim.SGD(model.parameters(), lr = 0.00001)
def train_model():
y_data = x_data.clone()
for i in range(150000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
if i % 5000 == 0:
print(loss)
optim.zero_grad()
loss.backward()
optim.step()
Using x_data.clone() I train the network to learn a feature representation of the input data.
I'm attempting to generate hidden layer weights that match the dimensionality of rows of the input data so that each vector of x_data has a corresponding encoding. But the hidden later is of is a vector of size 5. How to change this network so that a matrix is generated that represents a reduced dimensionality of the input data ?

cross validation + decision trees in sklearn

Attempting to create a decision tree with cross validation using sklearn and panads.
My question is in the code below, the cross validation splits the data, which i then use for both training and testing. I will be attempting to find the best depth of the tree by recreating it n times with different max depths set. In using cross validation should i instead be using k folds CV and if so how would I use that within the code I have?
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import cross_validation
features = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv('magic04.data',header=None,names=features)
df['class'] = df['class'].map({'g':0,'h':1})
x = df[features[:-1]]
y = df['class']
x_train,x_test,y_train,y_test = cross_validation.train_test_split(x,y,test_size=0.4,random_state=0)
depth = []
for i in range(3,20):
clf = tree.DecisionTreeClassifier(max_depth=i)
clf = clf.fit(x_train,y_train)
depth.append((i,clf.score(x_test,y_test)))
print depth
here is a link to the data that i am using in case that helps anyone. https://archive.ics.uci.edu/ml/datasets/MAGIC+Gamma+Telescope
In your code you are creating a static training-test split. If you want to select the best depth by cross-validation you can use sklearn.cross_validation.cross_val_score inside the for loop.
You can read sklearn's documentation for more information.
Here is an update of your code with CV:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.cross_validation import cross_val_score
from pprint import pprint
features = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv('magic04.data',header=None,names=features)
df['class'] = df['class'].map({'g':0,'h':1})
x = df[features[:-1]]
y = df['class']
# x_train,x_test,y_train,y_test = cross_validation.train_test_split(x,y,test_size=0.4,random_state=0)
depth = []
for i in range(3,20):
clf = tree.DecisionTreeClassifier(max_depth=i)
# Perform 7-fold cross validation
scores = cross_val_score(estimator=clf, X=x, y=y, cv=7, n_jobs=4)
depth.append((i,scores.mean()))
print(depth)
Alternatively, you can use sklearn.grid_search.GridSearchCV and not write the for loop yourself, especially if you want to optimize for more than one hyper-parameter.
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.model_selection import GridSearchCV
features = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv('magic04.data',header=None,names=features)
df['class'] = df['class'].map({'g':0,'h':1})
x = df[features[:-1]]
y = df['class']
parameters = {'max_depth':range(3,20)}
clf = GridSearchCV(tree.DecisionTreeClassifier(), parameters, n_jobs=4)
clf.fit(X=x, y=y)
tree_model = clf.best_estimator_
print (clf.best_score_, clf.best_params_)
Edit: changed how GridSearchCV is imported to accommodate learn2day's comment.

Resources