I am trying to solve the problem statement Jigsaw-multilingual-toxic-comment-classification but when I tried to preprocess the entire file ,I got the [Errorno 30].
I also tried to solve it by:
!cp -r /kaggle/input/jigsaw-multilingual-toxic-comment-classification /kaggle/working
Even then I am not able to solve the error.
Can someone please help me out solve this error?
The first code block defines the global variables.
!cp -r /kaggle/input/jigsaw-multilingual-toxic-comment-classification /kaggle/working
SEQUENCE_LENGTH = 128
DATA_PATH = "/kaggle/working/jigsaw-multilingual-toxic-comment-classification"
BERT_PATH = "../input/bert-multi"
BERT_PATH_SAVEDMODEL = os.path.join(BERT_PATH, "/kaggle/input/bert-multi/bert_multi_from_tfhub")
OUTPUT_PATH = "/kaggle/working"
The below code block is the block afer which I am getting the error
def preprocess_and_save_dataset(unprocessed_filename, text_label='comment_text',
seq_length=SEQUENCE_LENGTH, verbose=True):
"""Preprocess a CSV to the expected TF Dataset form for multilingual BERT,
and save the result."""
dataframe = pd.read_csv(os.path.join(DATA_PATH, unprocessed_filename),
index_col='id')
processed_filename = (unprocessed_filename.rstrip('.csv') +
"-processed-seqlen{}.csv".format(SEQUENCE_LENGTH))
pos = 0
start = time.time()
while pos < len(dataframe):
processed_df = dataframe[pos:pos + 10000].copy()
processed_df['input_word_ids'], processed_df['input_mask'], processed_df['all_segment_id'] = (
zip(*processed_df[text_label].apply(preprocess_sentence)))
if pos == 0:
processed_df.to_csv(processed_filename, index_label='id', mode='w')
else:
processed_df.to_csv(processed_filename, index_label='id', mode='a',
header=False)
if verbose:
print('Processed {} examples in {}'.format(
pos + 10000, time.time() - start))
pos += 10000
return
# Process the training dataset.
preprocess_and_save_dataset(wiki_toxic_comment_data)
I tried to move the directory to the Kaggle working folder.
Related
I am using Q learning and the program should be able to play the game after some tries but it is not learning even when the epsilon value if 0.1.
I have tried changing the batch size the memory size. I have changed the code to give -1 reward if the player dies.
import gym
import numpy as np
import random
import tensorflow as tf
import numpy as np
from time import time
import keyboard
import sys
import time
env = gym.make("Breakout-ram-v4")
observationSpace = env.observation_space
actionSpace= env.action_space
episode = 500
class Model_QNN :
def __init__(self):
self.memory = []
self.MAX_MEMORY_TO_USE = 60_000
self.gamma = 0.9
self.model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(128,1)),
tf.keras.layers.Dense(256,activation="relu"),
tf.keras.layers.Dense(64,activation="relu"),
tf.keras.layers.Dense(actionSpace.n , activation= "softmax")
])
self.model.compile(optimizer="adam",loss="mse",metrics=["accuracy"])
def remember(self, steps , done):
self.memory.append([steps,done])
if(len(self.memory) >= self.MAX_MEMORY_TO_USE):
del self.memory[0]
def replay(self,batch_size= 32):
states, targets_f = [], []
if(len(self.memory)< batch_size) :
return
else:
mini = random.sample(self.memory,batch_size)
states ,targets = [], []
for steps , done in mini :
target= steps[2] ;
if not done :
target = steps[2] + (self.gamma* np.amax(self.model.predict(steps[3].reshape(1,128,1))[0]))
target_f = self.model.predict(steps[0].reshape(1,128,1))
target_f[0][steps[1]] = target
states.append(steps[0])
targets.append(target_f[0])
self.model.fit(np.array(states).reshape(len(states),128,1), np.array(targets),verbose=0,epochs=10)
def act(self,state,ep):
if(random.random()< ep):
action = actionSpace.sample()
else :
np.array([state]).shape
action= self.model.predict(state.reshape(1,128,1))
action = np.argmax(action)
return action;
def saveModel (self):
print("Saving")
self.model.save("NEWNAMEDONE")
def saveBackup(self,num):
self.model.save("NEWNAME"+str(int(num)))
def main():
agent= Model_QNN();
epsilon=0.9
t_end = time.time()
score= 0
for e in range(2000):
print("Working on episode : "+str(e)+" eps "+str(epsilon)+" Score " + str(score))
preState = env.reset()
preState,reward,done,_ = env.step(1)
mainLife=5
done = False
score= 0
icount = 0
render=False
if e % 400 ==0 and not e==0:
render =True
while not done:
icount+=1
if render:
env.render()
if keyboard.is_pressed('q'):
agent.saveBackup(100)
agent.saveModel()
quit()
rewrd=0
if ( _["ale.lives"] < mainLife ):
mainLife-=1
rewrd=-1
action=1
else:
action = agent.act(preState,epsilon)
newState,reward,done,_ = env.step(action)
if rewrd ==-1 :
reward =-1
agent.remember([preState/255,action,reward,newState/255],done);
preState= newState;
score+=reward
if done :
break
agent.replay(1024)
if epsilon >= 0.18 :
epsilon = epsilon * 0.995;
if ((e+1)%500==0):
agent.saveBackup((e+1)/20)
agent.saveModel()
if __name__=='__main__':
main()
There is no error message the program should learn and it is not
Why are you using Softmax on your output layer?
If you want to use Softmax use Cross-Entropy as your loss. However, it looks like you're trying to implement a value based learning system. The activation function on your output layer should be linear.
I suggest you try your implementation on Cartpole-v0 then LunarLanding-v2 first.
Those are solved environments and a great place to sanity check your code.
"There is no error message the program should learn and it is not."
Welcome to ML where things fail silently.
I have a facedetection training code. It gives me some issues and i have no clue why.
I am using a MAC and seems like there is missing something. Can you please advise what should i do?
Thank you in advance
OpenCV(3.4.1) Error: Assertion failed (!empty()) in detectMultiScale, file /tmp/opencv-20180426-73279-16a912g/opencv-3.4.1/modules/objdetect/src/cascadedetect.cpp, line 1698
Traceback (most recent call last):
File "/Users/Desktop/OpenCV-Python-Series-master/src/faces-train.py", line 36, in <module>
faces = face_cascade.detectMultiScale(image_array, scaleFactor=1.5, minNeighbors=5)
cv2.error: OpenCV(3.4.1) /tmp/opencv-20180426-73279-16a912g/opencv-3.4.1/modules/objdetect/src/cascadedetect.cpp:1698: error: (-215) !empty() in function detectMultiScale
[Finished in 0.421s]
And my code is below.
import cv2
import os
import numpy as np
from PIL import Image
import pickle
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
image_dir = os.path.join(BASE_DIR, "images")
face_cascade = cv2.CascadeClassifier('cascades/data/haarcascade_frontalface_alt2.xml')
recognizer = cv2.face.LBPHFaceRecognizer_create()
current_id = 0
label_ids = {}
y_labels = []
x_train = []
for root, dirs, files in os.walk(image_dir):
for file in files:
if file.endswith("png") or file.endswith("jpg"):
path = os.path.join(root, file)
label = os.path.basename(root).replace(" ", "-").lower()
#print(label, path)
if not label in label_ids:
label_ids[label] = current_id
current_id += 1
id_ = label_ids[label]
#print(label_ids)
#y_labels.append(label) # some number
#x_train.append(path) # verify this image, turn into a NUMPY arrray, GRAY
pil_image = Image.open(path).convert("L") # grayscale
size = (550, 550)
final_image = pil_image.resize(size, Image.ANTIALIAS)
image_array = np.array(final_image, "uint8")
#print(image_array)
faces = face_cascade.detectMultiScale(image_array, scaleFactor=1.5, minNeighbors=5)
for (x,y,w,h) in faces:
roi = image_array[y:y+h, x:x+w]
x_train.append(roi)
y_labels.append(id_)
#print(y_labels)
#print(x_train)
with open("pickles/face-labels.pickle", 'wb') as f:
pickle.dump(label_ids, f)
recognizer.train(x_train, np.array(y_labels))
recognizer.save("recognizers/face-trainner.yml")
The assertion which fails indicates that your cascade is not loaded correctly. You can verify it by calling face_cascade.empty() just after the constructor. Please make sure that the path you provided ('cascades/data/haarcascade_frontalface_alt2.xml') is correct. When it points to a not existing file then there is no exception thrown by the constructor so you can easily miss it without calling empty() explicitly.
I am following cifar10 tutorials from https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10.
In this project, there are 6 classes. After searching the internet I understood cifar10.py and cifar10_input.py classes. But I can't understand train function in cifar10_train.py. Here is the train function in cifar10_train.py class.
def train():
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
# get images and labels for cifar 10
# Force input pipeline to CPU:0 to avoid operations sometime ending on
# GPU and resulting in a slow down
with tf.device('/cpu:0'):
images, labels = cifar10.distorted_inputs()
logits = cifar10.inference(images)
loss = cifar10.loss(logits, labels)
train_op = cifar10.train(loss, global_step)
class _LoggerHook(tf.train.SessionRunHook):
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss)
def after_run(self, run_context, run_values):
if self._step % FLAGS.log_frequency == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
sec_per_batch = float(duration / FLAGS.log_frequency)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
Can someone please explain what is happening in _LoggerHook class?
It uses MonitoredSession and SessionRunHook for logging the loss when training.
_LoggerHook is an implementation of SessionRunHook that runs in an order described below:
call hooks.begin()
sess = tf.Session()
call hooks.after_create_session()
while not stop is requested:
call hooks.before_run()
try:
results = sess.run(merged_fetches, feed_dict=merged_feeds)
except (errors.OutOfRangeError, StopIteration):
break
call hooks.after_run()
call hooks.end()
sess.close()
It's from here.
It collects loss data before the session.run then outputs loss with a predefined format.
A tutorial: https://www.tensorflow.org/tutorials/layers
Hope this hopes.
Working with a relatively new R package called "eegAnalysis" through rpy2 and getting an error for a time-series object required by the FeatureEEG function.
table_query = R_DBI.dbGetQuery(DBI_Connection, "SELECT * FROM {0}".format(a_table))
where table_query is returned from a PostgreSQL database, a small portion of the data looks like
'data.frame': 52000 obs. of 68 variables:
lfg1 : num 205 200 185 183 175 ...
lfg10: num -135.1 -124.1 -127.1 -116.1 -80.1 ...
lfg11: num -132 -109 -101 -103 -116 ...
lfg12: num -259 -246 -232 -196 -203 ...
soon there-after a time-series object is created using table_query from above
ts = R_ts.timeSeries(table_query, start = 1, end = table_query.nrow, frequency = 1)
several lines below used for the important line even further below
n_columns = ts.ncol
col_middle = n_columns / 2
if round(col_middle) != col_middle:
col_middle = col_middle + 0.5
group_1 = int(col_middle)
group_2 = int(n_columns - group_1)
classification = R_base.c(R_base.rep(1, group_1), R_base.rep(2, group_2))
the important line is here
featureEEG = R_EEGA.FeatureEEG(ts, rec_Id = ts.nrow, classes_Id = classification)
FeatureEEG is a function from the R eegAnalysis package. After processing for a few moments the
following error is returned
Traceback (most recent call last): File "D:\Projects\Kaggle\UPenn_and_Mayo_Clinic_Seizure_Detection\Scripts\py_test01.py", line 135, in featureEEG = R_EEGA.FeatureEEG(ts, rec_Id = ts.nrow, classes_Id = classification) File "C:\Python33\lib\site-packages\rpy2-2.3.8-py3.3-win32.egg\rpy2\robjects\functions.py", line 86, in call return super(SignatureTranslatedFunction, self).call(*args, **kwargs) File "C:\Python33\lib\site-packages\rpy2-2.3.8-py3.3-win32.egg\rpy2\robjects\functions.py", line 35, in call res = super(Function, self).call(*new_args, **new_kwargs) rpy2.rinterface.RRuntimeError: Error in UseMethod("months") : no applicable method for 'months' applied to an object of class "c('integer', 'numeric')"*
From what I can tell, the line creating featureEEG is processed, and it is in the processing where
the error occurs. Most of the message is understandable except for the part starting at the
RRuntimeError. Could someone explain what it is saying? Or where can I find out?
Thank You.
The RRuntimeError is reporting an error message generated by R.
Here it appears to try calling a generic months(). I am suspecting that ts in your code is not of the right type. The authors of eegAnalysis might be able to help.
As part of debugging a problem that might be related to my UIVIews, I want to write a python script to run from LLDB. I had thought to extract all settings for a view in a breakpoint and all view children, to allow me to compare states. I checked out the WWDC video on the topic and then spent time reading things at lldb.llvm.org/scripting.html, and didn't find them very helpful. A web search for examples led to nothing substantially different from those.
My problem is that I'm trying to figure out how to access iOS variables at my breakpoint. The examples I've seen do things like convert numbers and mimic shell commands. Interesting stuff but not useful for my purposes. I've been reading my way through the help info with "script help(lldb.SBValue)" and the like, but it is slow going as the results are huge and it is not clear what the use patterns are. I feel like one decent example of how to traverse a few iOS objects would help me understand the system. Does anyone know of one or can share a snippet of code?
UPDATE:
I wrote this to help me track down a bug in my UIView use. I want to do a bit more work to refine this to see if I could show the whole view tree, but this was sufficient to solve my problem, so I'll put it here to save others some time.
import lldb
max_depth = 6
filters = {'_view':'UIView *', '_layer':'CALayer *', '_viewFlags':'struct'}
def print_value(var, depth, prefix):
""" print values and recurse """
global max_depth
local_depth = max_depth - depth
pad = ' ' * local_depth
name = var.GetName()
typ = str(var.GetType()).split('\n')[0].split('{')[0].split(':')[0].strip()
found = name in filters.keys() # only visit filter items children
if found:
found = (filters.get(name) == typ)
value = var.GetValue()
if value is None or str(value) == '0x00000000':
value = ''
else:
value = ' Val: %s' % value
if var.GetNumChildren() == 0 and var.IsInScope():
path = lldb.SBStream()
var.GetExpressionPath(path)
path = ' pathData: %s' % path.GetData()
else:
path = ''
print '^' * local_depth, prefix, ' Adr:', var.GetAddress(), ' Name:', name, ' Type:', typ, value, path
if var.GetNumChildren() > 0:
if local_depth < 2 or found:
print pad, var.GetNumChildren(), 'children, to depth', local_depth + 1
counter = 0
for subvar in var:
subprefix = '%d/%d' % (counter, var.GetNumChildren())
print_value(subvar, depth - 1, subprefix)
counter += 1
def printvh (debugger, command_line, result, dict):
""" print view hierarchy """
global max_depth
args = command_line.split()
if len(args) > 0:
var = lldb.frame.FindVariable(args[0])
depth = max_depth
if len(args) > 1:
depth = int(args[1])
max_depth = depth
print_value(var, depth, 'ROOT')
else:
print 'pass a variable name and optional depth'
And I added the following to my .lldbinit :
script import os, sys
# So that files in my dir takes precedence.
script sys.path[:0] = [os.path.expanduser("~/lldbpy")]
script import views
command script add -f views.printvh printvh
so that I can just type "printvh self 3" at the LLDB prompt.
Maybe this will help. Here's an example of how to dump simple local variables when a breakpoint is hit. I'm not displaying char* arrays correctly, I'm not sure how I should get the data for these to display it like "frame variable" would display it but I'll figure that out later when I have a free minute.
struct datastore {
int val1;
int val2;
struct {
int val3;
} subdata;
char *name;
};
int main (int argc, char **argv)
{
struct datastore data = {1, 5, {3}, "a string"};
return data.val2;
}
Current executable set to 'a.out' (x86_64).
(lldb) br se -l 13
Breakpoint created: 1: file ='a.c', line = 13, locations = 1
(lldb) br comm add -s python
Enter your Python command(s). Type 'DONE' to end.
> def printvar_or_children(var):
> if var.GetNumChildren() == 0 and var.IsInScope():
> path = lldb.SBStream()
> var.GetExpressionPath(path)
> print '%s: %s' % (path.GetData(), var.GetValue())
> else:
> for subvar in var:
> printvar_or_children(subvar)
>
> print 'variables visible at breakpoint %s' % bp_loc
> for var in frame.arguments:
> printvar_or_children(var)
> for var in frame.locals:
> printvar_or_children(var)
>
> DONE
(lldb) r
variables visible at breakpoint 1.1: where = a.out`main + 51 at a.c:13, address = 0x0000000100000f33, resolved, hit count = 1
argc: 1
*(*(argv)): '/'
data.val1: 1
data.val2: 5
data.subdata.val3: 3
*(data.name): 'a'
Process 84865 stopped
* thread #1: tid = 0x1f03, 0x0000000100000f33 a.out`main + 51 at a.c:13, stop reason = breakpoint 1.1
frame #0: 0x0000000100000f33 a.out`main + 51 at a.c:13
10 int main (int argc, char **argv)
11 {
12 struct datastore data = {1, 5, {3}, "a string"};
-> 13 return data.val2;
(lldb)
Tip - for sanity's sake I worked on the python over in a side text editor and pasted it into lldb as I experimented.
If you use the frame variable command in lldb to explore your variables at a given stop location, that's the same basic way that you can access them via the SBFrame that is provided to your breakpoint python command in the 'frame' object.
Hope that helps to get you started.
Did you try looking at the python LLDB formatting templates stored in:
XCode.app/Contents/SharedFrameworks/LLDB.framework/Resources/Python/lldb/formatters/objc