after simulating this code of federated learning for image classification, I would like to save my model so I add this two lines
ckpt_manager = FileCheckpointManager("model.h5")
ckpt_manager.save_checkpoint(ServerState.from_anon_tuple(state), round_num=2)
So here is all my code:
import collections
import time
import tensorflow as tf
tf.compat.v1.enable_v2_behavior()
import tensorflow_federated as tff
source, _ = tff.simulation.datasets.emnist.load_data()
def map_fn(example):
return collections.OrderedDict(
x=tf.reshape(example['pixels'], [-1, 784]), y=example['label'])
def client_data(n):
ds = source.create_tf_dataset_for_client(source.client_ids[n])
return ds.repeat(10).shuffle(500).batch(20).map(map_fn)
train_data = [client_data(n) for n in range(10)]
element_spec = train_data[0].element_spec
def model_fn():
model = tf.keras.models.Sequential([
tf.keras.layers.Input(shape=(784,)),
tf.keras.layers.Dense(units=10, kernel_initializer='zeros'),
tf.keras.layers.Softmax(),
])
return tff.learning.from_keras_model(
model,
input_spec=element_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
trainer = tff.learning.build_federated_averaging_process(
model_fn, client_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.02))
....
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
state, metrics = trainer.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
ckpt_manager = FileCheckpointManager("model.h5")
ckpt_manager.save_checkpoint(ServerState.from_anon_tuple(state), round_num=9)
But this error does appear:
NameError: name 'FileCheckpointManager' is not defined
I will appreciate it if you told me how I solve this problem
Looks like the code is missing an import for the module with the checkpoint manager.
FileCheckpointManger is defined in the checkpoint_manager module here: tensorflow_federated/python/research/utils/checkpoint_manager.py.
Try adding an import at the top of the file like this (the following example assumes the tensorflow federated github repo is in the import search path):
from tensorflow_federated.python.research.utils import checkpoint_manager
# ...
ckpt_manager = checkpoint_manager.FileCheckpointManager("model.h5")
Related
my code is
import streamlit as st
import pickle
import string
from nltk.corpus import stopwords
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
def transform_text(text):
text = text.lower()
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y[:]
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation:
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i))
return " ".join(y)
tfidf = pickle.load(open('vectorizer.pkl','rb'))
model = pickle.load(open('model.pkl','rb'))
st.title("Email/SMS Spam Classifier")
input_sms = st.text_area("Enter the message")
if st.button('Predict'):
# 1. preprocess
transformed_sms = transform_text(input_sms)
# 2. vectorize
vector_input = tfidf.transform([transformed_sms])
# 3. predict
result = model.predict(vector_input)[0]
# 4. Display
if result == 1:
st.header("Spam")
else:
st.header("Not Spam")
Code for a single raster file:
import geopandas as gpd
#import os
import rasterio
import scipy.sparse as sparse
import pandas as pd
import numpy as np
# Create an empty pandas dataframe called 'table'
table = pd.DataFrame(index = np.arange(0,1))
# Read the points shapefile using GeoPandas
stations = gpd.read_file(r'E:/anakonda/Shape files/AAQ_st1/AAQ_ST1.shp')
stations['lon'] = stations['geometry'].x
stations['lat'] = stations['geometry'].y
Matrix = pd.DataFrame()
# Iterate through the rasters and save the data as individual arrays to a Matrix
dataset = rasterio.open(r'E:/anakonda/LST_day/MOD11A1.006_LST_Day_1km_doy2019082_aid0001.tif')
data_array = dataset.read(1)
data_array_sparse = sparse.coo_matrix(data_array, shape = (351, 545))
for records_date in Matrix.columns.tolist():
a = Matrix
LST_day_value = a.loc[int(row)][int(col)]
table[records_date] = LST_day_value
transpose_mat = table.T
transpose_mat.rename(columns = {0: 'LST_Day(Kel)'}, inplace = True)
transpose_mat.to_csv(r'E:/anakonda/LST_day'+'\\'+station_name+'.csv')
Error code lines:
LST_day_value = a.loc[int(row)][int(col)]
transpose_mat.to_csv(r'E:/anakonda/LST_day'+'\'+station_name+'.csv')
Errors Shown:
Undefined Name 'row' (pyflakes E)
Undefined Name 'col' (pyflakes E)
NameError: name 'transpose_mat' is not defined
I'm using the above code for creating a Raster Time-series for Modis LST data. the code ran well till 'transposing the matrix'. the error shown is mentioned below the code. Im new to python, so kindly help me with this issue.
import os
import rasterio
import scipy.sparse as sparse
import pandas as pd
import numpy as np
# Create an empty pandas dataframe called 'table'
table = pd.DataFrame(index = np.arange(0,1))
# Read the points shapefile using GeoPandas
stations = gpd.read_file(r'E:/anakonda/Shape files/AAQ_st1/AAQ_ST1.shp')
stations['lon'] = stations['geometry'].x
stations['lat'] = stations['geometry'].y
Matrix = pd.DataFrame()
# Iterate through the rasters and save the data as individual arrays to a Matrix
for files in os.listdir(r'E:/anakonda/LST_Night'):
if files[-4: ] == '.tif':
dataset = rasterio.open(r'E:/anakonda/LST_Night'+'\\'+files)
data_array = dataset.read(1)
data_array_sparse = sparse.coo_matrix(data_array, shape = (351,545))
data = files[ :-20]
Matrix[data] = data_array_sparse.toarray().tolist()
print('Processing is done for the raster: '+ files[:-20])
# Iterate through the stations and get the corresponding row and column for the related x, y coordinates
for index, row in stations.iterrows():
station_name = str(row['Station'])
lon = float(row['lon'])
lat = float(row['lat'])
x,y = (lon, lat)
row, col = dataset.index(x, y)
print('Processing: '+ station_name)
# Pick the LST value from each stored raster array and record it into the previously created 'table'
for records_date in Matrix.columns.tolist():
a = Matrix[records_date]
LST_Night_value = a.loc[int(row)][int(col)]
table[records_date] = LST_Night_value
transpose_mat = table.T
transpose_mat.rename(columns = {0: 'LstNight(Kel)'}, inplace = True)
transpose_mat.to_csv(r'E:/anakonda/LST_Night'+'\\'+station_name+'.csv')```
This is the error shown:
```File "C:\Anaconda\envs\timeseries\lib\site-packages\pandas\core\indexes\range.py", line 357, in get_loc
raise KeyError(key) from err
KeyError: 2278```
Given a simple feature selection code below, I want to know the selected columns after the feature selection (The dataset includes a header V1 ... V20)
import pandas as pd
from sklearn.feature_selection import SelectFromModel, SelectKBest, f_regression
def feature_selection(data):
y = data['Class']
X = data.drop(['Class'], axis=1)
fs = SelectKBest(score_func=f_regression, k=10)
# Applying feature selection
X_selected = fs.fit_transform(X, y)
# TODO: determine the columns being selected
return X_selected
data = pd.read_csv("../dataset.csv")
new_data = feature_selection(data)
I appreciate any help.
I have used the iris dataset for my example but you can probably easily modify your code to match your use case.
The SelectKBest method has the scores_ attribute I used to sort the features.
Feel free to ask for any clarifications.
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectFromModel, SelectKBest, f_regression
from sklearn.datasets import load_iris
def feature_selection(data):
y = data[1]
X = data[0]
column_names = ["A", "B", "C", "D"] # Here you should use your dataframe's column names
k = 2
fs = SelectKBest(score_func=f_regression, k=k)
# Applying feature selection
X_selected = fs.fit_transform(X, y)
# Find top features
# I create a list like [[ColumnName1, Score1] , [ColumnName2, Score2], ...]
# Then I sort in descending order on the score
top_features = sorted(zip(column_names, fs.scores_), key=lambda x: x[1], reverse=True)
print(top_features[:k])
return X_selected
data = load_iris(return_X_y=True)
new_data = feature_selection(data)
I don't know the in-build method, but it can be easily coded.
n_columns_selected = X_new.shape[0]
new_columns = list(sorted(zip(fs.scores_, X.columns))[-n_columns_selected:])
# new_columns order is perturbed, we need to restore it. We use the names of the columns of X as a reference
new_columns = list(sorted(cols_new, key=lambda x: list(X.columns).index(x)))
I am using Q learning and the program should be able to play the game after some tries but it is not learning even when the epsilon value if 0.1.
I have tried changing the batch size the memory size. I have changed the code to give -1 reward if the player dies.
import gym
import numpy as np
import random
import tensorflow as tf
import numpy as np
from time import time
import keyboard
import sys
import time
env = gym.make("Breakout-ram-v4")
observationSpace = env.observation_space
actionSpace= env.action_space
episode = 500
class Model_QNN :
def __init__(self):
self.memory = []
self.MAX_MEMORY_TO_USE = 60_000
self.gamma = 0.9
self.model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(128,1)),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(64,activation="relu"),
tf.keras.layers.Dense(actionSpace.n , activation= "softmax")
])
self.model.compile(optimizer="adam",loss="mse",metrics=["accuracy"])
def remember(self, steps , done):
self.memory.append([steps,done])
if(len(self.memory) >= self.MAX_MEMORY_TO_USE):
del self.memory[0]
def replay(self,batch_size= 32):
states, targets_f = [], []
if(len(self.memory)< batch_size) :
return
else:
mini = random.sample(self.memory,batch_size)
states ,targets = [], []
for steps , done in mini :
target= steps[2] ;
if not done :
target = steps[2] + (self.gamma* np.amax(self.model.predict(steps[3].reshape(1,128,1))[0]))
target_f = self.model.predict(steps[0].reshape(1,128,1))
target_f[0][steps[1]] = target
states.append(steps[0])
targets.append(target_f[0])
self.model.fit(np.array(states).reshape(len(states),128,1), np.array(targets),verbose=0,epochs=10)
def act(self,state,ep):
if(random.random()< ep):
action = actionSpace.sample()
else :
np.array([state]).shape
action= self.model.predict(state.reshape(1,128,1))
action = np.argmax(action)
return action;
def saveModel (self):
print("Saving")
self.model.save("NEWNAMEDONE")
def saveBackup(self,num):
self.model.save("NEWNAME"+str(int(num)))
def main():
agent= Model_QNN();
epsilon=0.9
t_end = time.time()
score= 0
for e in range(2000):
print("Working on episode : "+str(e)+" eps "+str(epsilon)+" Score " + str(score))
preState = env.reset()
preState,reward,done,_ = env.step(1)
mainLife=5
done = False
score= 0
icount = 0
render=False
if e % 400 ==0 and not e==0:
render =True
while not done:
icount+=1
if render:
env.render()
if keyboard.is_pressed('q'):
agent.saveBackup(100)
agent.saveModel()
quit()
rewrd=0
if ( _["ale.lives"] < mainLife ):
mainLife-=1
rewrd=-1
action=1
else:
action = agent.act(preState,epsilon)
newState,reward,done,_ = env.step(action)
if rewrd ==-1 :
reward =-1
agent.remember([preState/255,action,reward,newState/255],done);
preState= newState;
score+=reward
if done :
break
agent.replay(1024)
if epsilon >= 0.18 :
epsilon = epsilon * 0.995;
if ((e+1)%500==0):
agent.saveBackup((e+1)/20)
agent.saveModel()
if __name__=='__main__':
main()
There is no error message the program should learn and it is not
Why are you using Softmax on your output layer?
If you want to use Softmax use Cross-Entropy as your loss. However, it looks like you're trying to implement a value based learning system. The activation function on your output layer should be linear.
I suggest you try your implementation on Cartpole-v0 then LunarLanding-v2 first.
Those are solved environments and a great place to sanity check your code.
"There is no error message the program should learn and it is not."
Welcome to ML where things fail silently.
I have an acceptable model, but I would like to improve it by adjusting its parameters in Spark ML Pipeline with CrossValidator and ParamGridBuilder.
As an Estimator I will place the existing pipeline.
In ParamMaps I would not know what to put, I do not understand it.
As Evaluator I will use the RegressionEvaluator already created previously.
I'm going to do it for 5 folds, with a list of 10 different depth values in the tree.
How can I select and show the best model for the lowest RMSE?
ACTUAL example:
from pyspark.ml import Pipeline
from pyspark.ml.regression import DecisionTreeRegressor
from pyspark.ml.feature import VectorIndexer
from pyspark.ml.evaluation import RegressionEvaluator
dt = DecisionTreeRegressor()
dt.setPredictionCol("Predicted_PE")
dt.setMaxBins(100)
dt.setFeaturesCol("features")
dt.setLabelCol("PE")
dt.setMaxDepth(8)
pipeline = Pipeline(stages=[vectorizer, dt])
model = pipeline.fit(trainingSetDF)
regEval = RegressionEvaluator(predictionCol = "Predicted_XX", labelCol = "XX", metricName = "rmse")
rmse = regEval.evaluate(predictions)
print("Root Mean Squared Error: %.2f" % rmse)
(1) Spark Jobs
(2) Root Mean Squared Error: 3.60
NEED:
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
dt2 = DecisionTreeRegressor()
dt2.setPredictionCol("Predicted_PE")
dt2.setMaxBins(100)
dt2.setFeaturesCol("features")
dt2.setLabelCol("PE")
dt2.setMaxDepth(10)
pipeline2 = Pipeline(stages=[vectorizer, dt2])
model2 = pipeline2.fit(trainingSetDF)
regEval2 = RegressionEvaluator(predictionCol = "Predicted_PE", labelCol = "PE", metricName = "rmse")
paramGrid = ParamGridBuilder().build() # ??????
crossval = CrossValidator(estimator = pipeline2, estimatorParamMaps = paramGrid, evaluator=regEval2, numFolds = 5) # ?????
rmse2 = regEval2.evaluate(predictions)
#bestPipeline = ????
#bestLRModel = ????
#bestParams = ????
print("Root Mean Squared Error: %.2f" % rmse2)
(1) Spark Jobs
(2) Root Mean Squared Error: 3.60 # the same ¿?
You need to call .fit() with your training data on the crossval object to create the cv model. That will do the cross validation. Then you get the best model (according to your evaluator metric) from that. Eg.
cvModel = crossval.fit(trainingData)
myBestModel = cvModel.bestModel