OpenAI gym breakout-ram-v4 unable to learn - machine-learning

I am using Q learning and the program should be able to play the game after some tries but it is not learning even when the epsilon value if 0.1.
I have tried changing the batch size the memory size. I have changed the code to give -1 reward if the player dies.
import gym
import numpy as np
import random
import tensorflow as tf
import numpy as np
from time import time
import keyboard
import sys
import time
env = gym.make("Breakout-ram-v4")
observationSpace = env.observation_space
actionSpace= env.action_space
episode = 500
class Model_QNN :
def __init__(self):
self.memory = []
self.MAX_MEMORY_TO_USE = 60_000
self.gamma = 0.9
self.model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(128,1)),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(64,activation="relu"),
tf.keras.layers.Dense(actionSpace.n , activation= "softmax")
])
self.model.compile(optimizer="adam",loss="mse",metrics=["accuracy"])
def remember(self, steps , done):
self.memory.append([steps,done])
if(len(self.memory) >= self.MAX_MEMORY_TO_USE):
del self.memory[0]
def replay(self,batch_size= 32):
states, targets_f = [], []
if(len(self.memory)< batch_size) :
return
else:
mini = random.sample(self.memory,batch_size)
states ,targets = [], []
for steps , done in mini :
target= steps[2] ;
if not done :
target = steps[2] + (self.gamma* np.amax(self.model.predict(steps[3].reshape(1,128,1))[0]))
target_f = self.model.predict(steps[0].reshape(1,128,1))
target_f[0][steps[1]] = target
states.append(steps[0])
targets.append(target_f[0])
self.model.fit(np.array(states).reshape(len(states),128,1), np.array(targets),verbose=0,epochs=10)
def act(self,state,ep):
if(random.random()< ep):
action = actionSpace.sample()
else :
np.array([state]).shape
action= self.model.predict(state.reshape(1,128,1))
action = np.argmax(action)
return action;
def saveModel (self):
print("Saving")
self.model.save("NEWNAMEDONE")
def saveBackup(self,num):
self.model.save("NEWNAME"+str(int(num)))
def main():
agent= Model_QNN();
epsilon=0.9
t_end = time.time()
score= 0
for e in range(2000):
print("Working on episode : "+str(e)+" eps "+str(epsilon)+" Score " + str(score))
preState = env.reset()
preState,reward,done,_ = env.step(1)
mainLife=5
done = False
score= 0
icount = 0
render=False
if e % 400 ==0 and not e==0:
render =True
while not done:
icount+=1
if render:
env.render()
if keyboard.is_pressed('q'):
agent.saveBackup(100)
agent.saveModel()
quit()
rewrd=0
if ( _["ale.lives"] < mainLife ):
mainLife-=1
rewrd=-1
action=1
else:
action = agent.act(preState,epsilon)
newState,reward,done,_ = env.step(action)
if rewrd ==-1 :
reward =-1
agent.remember([preState/255,action,reward,newState/255],done);
preState= newState;
score+=reward
if done :
break
agent.replay(1024)
if epsilon >= 0.18 :
epsilon = epsilon * 0.995;
if ((e+1)%500==0):
agent.saveBackup((e+1)/20)
agent.saveModel()
if __name__=='__main__':
main()
There is no error message the program should learn and it is not

Why are you using Softmax on your output layer?
If you want to use Softmax use Cross-Entropy as your loss. However, it looks like you're trying to implement a value based learning system. The activation function on your output layer should be linear.
I suggest you try your implementation on Cartpole-v0 then LunarLanding-v2 first.
Those are solved environments and a great place to sanity check your code.
"There is no error message the program should learn and it is not."
Welcome to ML where things fail silently.

Related

am getting NotFittedError: This MultinomialNB instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator

my code is
import streamlit as st
import pickle
import string
from nltk.corpus import stopwords
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()
def transform_text(text):
text = text.lower()
text = nltk.word_tokenize(text)
y = []
for i in text:
if i.isalnum():
y.append(i)
text = y[:]
y.clear()
for i in text:
if i not in stopwords.words('english') and i not in string.punctuation:
y.append(i)
text = y[:]
y.clear()
for i in text:
y.append(ps.stem(i))
return " ".join(y)
tfidf = pickle.load(open('vectorizer.pkl','rb'))
model = pickle.load(open('model.pkl','rb'))
st.title("Email/SMS Spam Classifier")
input_sms = st.text_area("Enter the message")
if st.button('Predict'):
# 1. preprocess
transformed_sms = transform_text(input_sms)
# 2. vectorize
vector_input = tfidf.transform([transformed_sms])
# 3. predict
result = model.predict(vector_input)[0]
# 4. Display
if result == 1:
st.header("Spam")
else:
st.header("Not Spam")

PicklingError when running pipeline in sklearn

I am running the following pipeline in sklearn to perform grid search
def logistic():
sm = SMOTE()
poly = polynomial_transform()
stand = StandardScaler()
pca = PCA()
#I need to think about how I am training this...
logistic = LogisticRegression(max_iter=100, tol=0.01,solver = 'saga') #what is my binary scorer here
pipe = Pipeline(steps=[('smt', sm),('poly', poly),('standardise', stand),('pca', pca), ('logistic', logistic)],memory = mem)
# Parameters of pipelines can be set using ‘__’ separated parameter names:
param_grid = {
#'poly__degree': [1,2],
'pca__n_components':[50,100],
'logistic__C': [0.0001],
}
scorers = {
'precision_score': make_scorer(precision_score),
'recall_score': make_scorer(recall_score),
'accuracy_score': make_scorer(accuracy_score),
'f1_score': make_scorer(f1_score),
'tp': make_scorer(tp),
'tn': make_scorer(tn),
'fp': make_scorer(fp),
'fn': make_scorer(fn),
'ck': make_scorer(cohen_kappa_score)
}
#performing grid search
search = GridSearchCV(pipe, param_grid, n_jobs=-1,verbose=2,scoring= scorers,refit='accuracy_score')
search.fit(X_train.to_numpy(), y_train.to_numpy().ravel())
#results of cross validation grid search
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
return search
polynomial transform is a class I have created myself
class polynomial_transform(BaseEstimator):
def __init__(self,degree=None):
self.degree=degree
def fit(self,X,y=None):
return self
#Method that describes what we need this transformer to do
def transform( self, X, y = None ):
for i in range(1,self.degree):
X = np.hstack((X, X**i))
return X
def set_params(self, degree):
self.degree = degree
def get_params(self,deep=True):
return {'degree':self.degree}
When setting the memory parameter in the pipeline I am getting the following error
FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
_pickle.PicklingError: ("Can't pickle : it's not found as main.polynomial_transform"

Find the best pipeline model using CrossValidator and ParamGridBuilder

I have an acceptable model, but I would like to improve it by adjusting its parameters in Spark ML Pipeline with CrossValidator and ParamGridBuilder.
As an Estimator I will place the existing pipeline.
In ParamMaps I would not know what to put, I do not understand it.
As Evaluator I will use the RegressionEvaluator already created previously.
I'm going to do it for 5 folds, with a list of 10 different depth values in the tree.
How can I select and show the best model for the lowest RMSE?
ACTUAL example:
from pyspark.ml import Pipeline
from pyspark.ml.regression import DecisionTreeRegressor
from pyspark.ml.feature import VectorIndexer
from pyspark.ml.evaluation import RegressionEvaluator
dt = DecisionTreeRegressor()
dt.setPredictionCol("Predicted_PE")
dt.setMaxBins(100)
dt.setFeaturesCol("features")
dt.setLabelCol("PE")
dt.setMaxDepth(8)
pipeline = Pipeline(stages=[vectorizer, dt])
model = pipeline.fit(trainingSetDF)
regEval = RegressionEvaluator(predictionCol = "Predicted_XX", labelCol = "XX", metricName = "rmse")
rmse = regEval.evaluate(predictions)
print("Root Mean Squared Error: %.2f" % rmse)
(1) Spark Jobs
(2) Root Mean Squared Error: 3.60
NEED:
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
dt2 = DecisionTreeRegressor()
dt2.setPredictionCol("Predicted_PE")
dt2.setMaxBins(100)
dt2.setFeaturesCol("features")
dt2.setLabelCol("PE")
dt2.setMaxDepth(10)
pipeline2 = Pipeline(stages=[vectorizer, dt2])
model2 = pipeline2.fit(trainingSetDF)
regEval2 = RegressionEvaluator(predictionCol = "Predicted_PE", labelCol = "PE", metricName = "rmse")
paramGrid = ParamGridBuilder().build() # ??????
crossval = CrossValidator(estimator = pipeline2, estimatorParamMaps = paramGrid, evaluator=regEval2, numFolds = 5) # ?????
rmse2 = regEval2.evaluate(predictions)
#bestPipeline = ????
#bestLRModel = ????
#bestParams = ????
print("Root Mean Squared Error: %.2f" % rmse2)
(1) Spark Jobs
(2) Root Mean Squared Error: 3.60 # the same ¿?
You need to call .fit() with your training data on the crossval object to create the cv model. That will do the cross validation. Then you get the best model (according to your evaluator metric) from that. Eg.
cvModel = crossval.fit(trainingData)
myBestModel = cvModel.bestModel

Can someone explain the train function in cifar10_train.py from cifar10 tutorials in tensorflow

I am following cifar10 tutorials from https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10.
In this project, there are 6 classes. After searching the internet I understood cifar10.py and cifar10_input.py classes. But I can't understand train function in cifar10_train.py. Here is the train function in cifar10_train.py class.
def train():
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
# get images and labels for cifar 10
# Force input pipeline to CPU:0 to avoid operations sometime ending on
# GPU and resulting in a slow down
with tf.device('/cpu:0'):
images, labels = cifar10.distorted_inputs()
logits = cifar10.inference(images)
loss = cifar10.loss(logits, labels)
train_op = cifar10.train(loss, global_step)
class _LoggerHook(tf.train.SessionRunHook):
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss)
def after_run(self, run_context, run_values):
if self._step % FLAGS.log_frequency == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
sec_per_batch = float(duration / FLAGS.log_frequency)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
Can someone please explain what is happening in _LoggerHook class?
It uses MonitoredSession and SessionRunHook for logging the loss when training.
_LoggerHook is an implementation of SessionRunHook that runs in an order described below:
call hooks.begin()
sess = tf.Session()
call hooks.after_create_session()
while not stop is requested:
call hooks.before_run()
try:
results = sess.run(merged_fetches, feed_dict=merged_feeds)
except (errors.OutOfRangeError, StopIteration):
break
call hooks.after_run()
call hooks.end()
sess.close()
It's from here.
It collects loss data before the session.run then outputs loss with a predefined format.
A tutorial: https://www.tensorflow.org/tutorials/layers
Hope this hopes.

Learning2Search (vowpal-wabbit) for NER gives weird results

We are trying to use Learning2Search from vowpal-wabbit for NER
We are using ATIS dataset.
In ATIS there are 127 Entities (including Others category)
Training set has 4978 and test has 893 sentences.
How ever when we run it on test set it is mapping everything either class 1(Airline name) or class 2(Airport code)
Which is wired.
We tried another dataset (https://github.com/glample/tagger/tree/master/dataset), same behavior.
Looks like I am not using it the right way. Any pointers will be of great help.
Code snippet :
with open("/tweetsdb/ner/datasets/atis.pkl") as f:
train, test, dicts = cPickle.load(f)
idx2words = {v: k for k, v in dicts['words2idx'].iteritems()}
idx2labels = {v: k for k, v in dicts['labels2idx'].iteritems()}
idx2tables = {v: k for k, v in dicts['tables2idx'].iteritems()}
#Convert the dataset into a format compatible with Vowpal Wabbit
training_set = []
for i in xrange(len(train[0])):
zip_label_ent_idx = zip(train[2][i], train[0][i])
label_ent_actual = [(int(i[0]), idx2words[i[1]]) for i in zip_label_ent_idx]
training_set.append(label_ent_actual)
# Do like wise to get test chunk
class SequenceLabeler(pyvw.SearchTask):
def __init__(self, vw, sch, num_actions):
pyvw.SearchTask.__init__(self, vw, sch, num_actions)
sch.set_options( sch.AUTO_HAMMING_LOSS | sch.AUTO_CONDITION_FEATURES )
def _run(self, sentence):
output = []
for n in range(len(sentence)):
pos,word = sentence[n]
with self.vw.example({'w': [word]}) as ex:
pred = self.sch.predict(examples=ex, my_tag=n+1, oracle=pos, condition=[(n,'p'), (n-1, 'q')])
output.append(pred)
return output
vw = pyvw.vw("--search 3 --search_task hook --ring_size 1024")
Code for training the model:
#Training
sequenceLabeler = vw.init_search_task(SequenceLabeler)
for i in xrange(3):
sequenceLabeler.learn(training_set[:10])
Code for Prediction:
pred = []
for i in random.sample(xrange(len(test_set)), 10):
test_example = [ (999, word[1]) for word in test_set[i] ]
test_labels = [ label[0] for label in test_set[i] ]
print 'input sentence:', ' '.join([word[1] for word in test_set[i]])
print 'actual labels:', ' '.join([str(label) for label in test_labels])
print 'predicted labels:', ' '.join([str(pred) for pred in sequenceLabeler.predict(test_example)])
To see the full code, pls refer to this notebook:
https://github.com/nsanthanam/ner/blob/master/vowpal_wabbit_atis.ipynb
I am also new to this algorithm, but did some pilot studies recently.
To your problem, the answer is that you set a wrong parameter in
vw = pyvw.vw("--search 3 --search_task hook --ring_size 1024")
Here, the search should be set as '127', and in this way, vw will use your 127 tags.
vw = pyvw.vw("--search 127 --search_task hook --ring_size 1024")
Also, my feeling is that vw doesn't work really well with so many tags. I might be wrong, please let me know your result :)

Resources