LogesticRegression fit() function is throwing this error - machine-learning

i'm following datacamp pyspark tutorial series and on chapter 04 Model tuning and selection in fitting the model, I'm getting this error when i execute these line
best_lr = lr.fit(training)
Error
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-102-88042cb88c20> in <module>()
----> 1 best_lr = lr.fit(training)
/usr/hdp/current/spark2-client/python/lib/pyspark.zip/pyspark/ml/base.py in fit(self, dataset, params)
130 return self.copy(params)._fit(dataset)
131 else:
--> 132 return self._fit(dataset)
133 else:
134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
/usr/hdp/current/spark2-client/python/lib/pyspark.zip/pyspark/ml/wrapper.py in _fit(self, dataset)
286
287 def _fit(self, dataset):
--> 288 java_model = self._fit_java(dataset)
289 model = self._create_model(java_model)
290 return self._copyValues(model)
/usr/hdp/current/spark2-client/python/lib/pyspark.zip/pyspark/ml/wrapper.py in _fit_java(self, dataset)
283 """
284 self._transfer_params_to_java()
--> 285 return self._java_obj.fit(dataset._jdf)
286
287 def _fit(self, dataset):
/usr/hdp/current/spark2-client/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py in __call__(self, *args)
1158 answer = self.gateway_client.send_command(command)
1159 return_value = get_return_value(
-> 1160 answer, self.gateway_client, self.target_id, self.name)
1161
1162 for temp_arg in temp_args:
/usr/hdp/current/spark2-client/python/lib/pyspark.zip/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/usr/hdp/current/spark2-client/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
318 raise Py4JJavaError(
319 "An error occurred while calling {0}{1}{2}.\n".
--> 320 format(target_id, ".", name), value)
321 else:
322 raise Py4JError(
Py4JJavaError: An error occurred while calling o596.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 60.0 failed 1 times, most recent failure: Lost task 2.0 in stage 60.0 (TID 86, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$3: (struct<month_double_VectorAssembler_42f79ae7f99735f04859:double,air_time_double_VectorAssembler_42f79ae7f99735f04859:double,carrier_fact:vector,dest_fact:vector,plane_age_double_VectorAssembler_42f79ae7f99735f04859:double>) => vector)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.sort_addToSorter$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.SparkException: Values to assemble cannot be null.
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:163)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:146)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:146)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$3.apply(VectorAssembler.scala:99)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$3.apply(VectorAssembler.scala:98)
... 24 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2131)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1092)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1086)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1131)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:518)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:488)
at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:278)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$3: (struct<month_double_VectorAssembler_42f79ae7f99735f04859:double,air_time_double_VectorAssembler_42f79ae7f99735f04859:double,carrier_fact:vector,dest_fact:vector,plane_age_double_VectorAssembler_42f79ae7f99735f04859:double>) => vector)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.sort_addToSorter$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more
Caused by: org.apache.spark.SparkException: Values to assemble cannot be null.
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:163)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:146)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:146)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$3.apply(VectorAssembler.scala:99)
at org.apache.spark.ml.feature.VectorAssembler$$anonfun$3.apply(VectorAssembler.scala:98)
... 24 more
Tools
I'm using online pyspark cluter with Cloudxlabs.com(trail version)

May be there are some NULL values in the data set. You'll have to take care of those first.
As explained by the error "Values to assemble cannot be null."

Apart from removing or imputing missing values, they can be replaced with mean, median values.
Second option is using xgboost for regression which will automatically handle missing values.

df = pd.DataFrame({'Last_Name': ['Smith', None, 'Brown'],
'First_Name': ['John', 'Mike', 'Bill'],
'Age': [35, 45, None]})
print(df)
Last_Name First_Name Age
0 Smith John 35.0
1 None Mike 45.0
2 Brown Bill NaN
df2 = df.dropna()
print(df2)
Last_Name First_Name Age
0 Smith John 35.0
Also xgboost can be applied as below:
https://www.datacamp.com/community/tutorials/xgboost-in-python

Related

Merlin two tower model data training error (ValueError: high is out of bounds for int32)

model.compile(optimizer="adam", run_eagerly=False, metrics=[mm.RecallAt(10), mm.NDCGAt(10)])
model.fit(train, validation_data=valid, batch_size=4096, epochs=3)
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\merlin\models\loader\backend.py in _shuffle_indices(self)
326 #annotate("_shuffle_indices", color="darkgreen", domain="nvt_python")
327 def _shuffle_indices(self):
--> 328 generate_local_seed(self.global_rank, self.global_size)
329 if self.seed_fn:
330 new_seed = self.seed_fn()
C:\ProgramData\Anaconda3\lib\site-packages\merlin\core\dispatch.py in generate_local_seed(global_rank, global_size)
647 cp.random.seed(seeds[global_rank].get())
648 else:
--> 649 seeds = random_state.randint(0, 2**32, size=global_size)
650 return seeds[global_rank]
651
mtrand.pyx in numpy.random.mtrand.RandomState.randint()
_bounded_integers.pyx in numpy.random._bounded_integers._rand_int32()
ValueError: high is out of bounds for int32
I was trying to train the model usng Nvidia Merlin
Expecting a model iteration step.

InvalidArgumentError when building sequential model using Keras

I want to develop a sequential model using optimal hyperparameters derived from Keras Tuner. My code raised "Invalid argument: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:]" error.
I tried adding +1 or +10 to the entire X_train but still get the same error.
Build the sequential model:
def model_builder(hp):
model = Sequential()
# Tune the number of units in the first Dense layer
# Choose an optimal value between 32-512
hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
hp_units3 = hp.Int('units3', min_value=32, max_value=512, step=32)
model.add(Dense(units=hp_units1, activation='relu'))
model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))
model.add(tf.keras.layers.Dense(units=hp_units3, activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='linear')) # output layer
# Tune the learning rate for the optimizer
# Choose an optimal value from 0.01, 0.001, or 0.0001
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
# Performance visualization callback
performance_viz_cbk = PerformanceVisualizationCallback(
model=model,
test_data=X_test,
image_dir='c:\perorfmance_charts')
# Model summary
model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
loss=SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy', tf.keras.metrics.AUC(), MulticlassTruePositives()])
return model
Hyperparameter tuning
tuner = kt.Hyperband(model_builder,
objective='val_accuracy',
max_epochs=10,
factor=3,
directory='my_dir',
project_name='intro_to_kt')
tuner.search(X_train, y_train, epochs=10, validation_split=0.2)
Traceback:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tmp/ipykernel_17/1976310635.py in <module>
----> 1 tuner.search(X_train, y_train, epochs=10, validation_split=0.2)
/opt/conda/lib/python3.7/site-packages/keras_tuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
177
178 self.on_trial_begin(trial)
--> 179 results = self.run_trial(trial, *fit_args, **fit_kwargs)
180 # `results` is None indicates user updated oracle in `run_trial()`.
181 if results is None:
/opt/conda/lib/python3.7/site-packages/keras_tuner/tuners/hyperband.py in run_trial(self, trial, *fit_args, **fit_kwargs)
382 fit_kwargs["epochs"] = hp.values["tuner/epochs"]
383 fit_kwargs["initial_epoch"] = hp.values["tuner/initial_epoch"]
--> 384 return super(Hyperband, self).run_trial(trial, *fit_args, **fit_kwargs)
385
386 def _build_model(self, hp):
/opt/conda/lib/python3.7/site-packages/keras_tuner/engine/tuner.py in run_trial(self, trial, *args, **kwargs)
292 callbacks.append(model_checkpoint)
293 copied_kwargs["callbacks"] = callbacks
--> 294 obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
295
296 histories.append(obj_value)
/opt/conda/lib/python3.7/site-packages/keras_tuner/engine/tuner.py in _build_and_fit_model(self, trial, *args, **kwargs)
220 hp = trial.hyperparameters
221 model = self._try_build(hp)
--> 222 results = self.hypermodel.fit(hp, model, *args, **kwargs)
223 return tuner_utils.convert_to_metrics_dict(
224 results, self.oracle.objective, "HyperModel.fit()"
/opt/conda/lib/python3.7/site-packages/keras_tuner/engine/hypermodel.py in fit(self, hp, model, *args, **kwargs)
135 If return a float, it should be the `objective` value.
136 """
--> 137 return model.fit(*args, **kwargs)
138
139
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
948 # Lifting succeeded, so variables are initialized and we can run the
949 # stateless function.
--> 950 return self._stateless_fn(*args, **kwds)
951 else:
952 _, _, _, filtered_flat_args = \
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
3038 filtered_flat_args) = self._maybe_define_function(args, kwargs)
3039 return graph_function._call_flat(
-> 3040 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
3041
3042 #property
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1962 # No tape is watching; skip to running the function.
1963 return self._build_call_outputs(self._inference_function.call(
-> 1964 ctx, args, cancellation_manager=cancellation_manager))
1965 forward_backward = self._select_forward_and_backward_functions(
1966 args,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
594 inputs=args,
595 attrs=attrs,
--> 596 ctx=ctx)
597 else:
598 outputs = execute.execute_with_cancellation(
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential/dense_3/BiasAdd:0) = ] [[-1.17921221][-1.64039445][-1.71617472]...] [y (Cast_8/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_1/assert_greater_equal/Assert/AssertGuard/Assert}}]]
(1) Invalid argument: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential/dense_3/BiasAdd:0) = ] [[-1.17921221][-1.64039445][-1.71617472]...] [y (Cast_8/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_1/assert_greater_equal/Assert/AssertGuard/Assert}}]]
[[assert_less_equal/Assert/AssertGuard/pivot_f/_13/_41]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_7509]
Function call stack:
train_function -> train_function
Sample data:
X_train
array([[7.97469882, 3.5471509 , 5.67233186, ..., 5.40341065, 3.16356072,
4.62657322],
[4.5390077 , 5.19247562, 5.21946796, ..., 4.12139197, 5.62828788,
4.31792717],
[5.83576307, 6.42946189, 4.85759085, ..., 5.26563348, 5.48022682,
5.29933021],
[4.58240518, 5.02457724, 6.1146384 , ..., 5.47847468, 6.62666587,
5.77328054],
[4.51602713, 5.17875946, 4.57102455, ..., 4.15622882, 5.48888824,
4.88704 ]])
y_train
array([[1],
[3],
[1],
[0],
[3]])

load_from_checkpoint fails after transfer learning a LightningModule

I try to transfer learn a LightningModule. The relevant part of the code is this:
class DeepFilteringTransferLearning(pl.LightningModule):
def __init__(self, chk_path = None):
super().__init__()
#init class members
self.prediction = []
self.label = []
self.loss = MSELoss()
#init pretrained model
self.chk_path = chk_path
model = DeepFiltering.load_from_checkpoint(chk_path)
backbone = model.sequential
layers = list(backbone.children())[:-1]
self.groundModel = Sequential(*layers)
#use the pretrained modell the same way to regress Lshall and neq
self.regressor = nn.Linear(64,2)
def forward(self, x):
self.groundModel.eval()
with torch.no_grad():
groundOut = self.groundModel(x)
yPred = self.regressor(groundOut)
return yPred
I save my model in a different, main file which relevant part is:
#callbacks
callbacks = [
ModelCheckpoint(
dirpath = "checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl1ConvTransferLearned",
save_top_k=5,
monitor="val_loss",
),
]
#trainer
trainer = pl.Trainer(gpus=[1,2],strategy="dp",max_epochs=150,logger=wandb_logger,callbacks=callbacks,precision=32,deterministic=True)
trainer.fit(model,train_dataloaders=trainDl,val_dataloaders=valDl)
After try to load the modell from checkpoint like this:
chk_patH = "path/to/transfer_learned/model"
standardizedL2RegularizedL1 = DeepFilteringTransferLearning("path/to/model/trying/to/use/for/transfer_learning").load_from_checkpoint(chk_patH)
I got the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _check_seekable(f)
307 try:
--> 308 f.seek(f.tell())
309 return True
AttributeError: 'NoneType' object has no attribute 'seek'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-6-13f5fd0c7b85> in <module>
1 chk_patH = "checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl1/epoch=4-step=349.ckpt"
----> 2 standardizedL2RegularizedL1 = DeepFilteringTransferLearning("checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl2/epoch=145-step=10219.ckpt").load_from_checkpoint(chk_patH)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
154 checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].update(kwargs)
155
--> 156 model = cls._load_model_state(checkpoint, strict=strict, **kwargs)
157 return model
158
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in _load_model_state(cls, checkpoint, strict, **cls_kwargs_new)
196 _cls_kwargs = {k: v for k, v in _cls_kwargs.items() if k in cls_init_args_name}
197
--> 198 model = cls(**_cls_kwargs)
199
200 # give model a chance to load something
~/whistlerProject/gitHub/whistler/mathe/gwInspired/deepFilteringTransferLearning.py in __init__(self, chk_path)
34 #init pretrained model
35 self.chk_path = chk_path
---> 36 model = DeepFiltering.load_from_checkpoint(chk_path)
37 backbone = model.sequential
38 layers = list(backbone.children())[:-1]
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
132 checkpoint = pl_load(checkpoint_path, map_location=map_location)
133 else:
--> 134 checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
135
136 if hparams_file is not None:
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/utilities/cloud_io.py in load(path_or_url, map_location)
31 if not isinstance(path_or_url, (str, Path)):
32 # any sort of BytesIO or similiar
---> 33 return torch.load(path_or_url, map_location=map_location)
34 if str(path_or_url).startswith("http"):
35 return torch.hub.load_state_dict_from_url(str(path_or_url), map_location=map_location)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
579 pickle_load_args['encoding'] = 'utf-8'
580
--> 581 with _open_file_like(f, 'rb') as opened_file:
582 if _is_zipfile(opened_file):
583 # The zipfile reader is going to advance the current file position.
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _open_file_like(name_or_buffer, mode)
233 return _open_buffer_writer(name_or_buffer)
234 elif 'r' in mode:
--> 235 return _open_buffer_reader(name_or_buffer)
236 else:
237 raise RuntimeError(f"Expected 'r' or 'w' in mode but got {mode}")
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in __init__(self, buffer)
218 def __init__(self, buffer):
219 super(_open_buffer_reader, self).__init__(buffer)
--> 220 _check_seekable(buffer)
221
222
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _check_seekable(f)
309 return True
310 except (io.UnsupportedOperation, AttributeError) as e:
--> 311 raise_err_msg(["seek", "tell"], e)
312 return False
313
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in raise_err_msg(patterns, e)
302 + " Please pre-load the data into a buffer like io.BytesIO and"
303 + " try to load from it instead.")
--> 304 raise type(e)(msg)
305 raise e
306
AttributeError: 'NoneType' object has no attribute 'seek'. You can only torch.load from a file that is seekable. Please pre-load the data into a buffer like io.BytesIO and try to load from it instead.
which I can't resolve.
I try to this according to the available tutorials on the official page of pytorch lightning here. I can't figure it out what I miss.
Could somebody point me in the right direction?

DeepChem GraphConvodel (GNN) training TypeError

I am a beginner to GNNs and I was trying out a code for predicting drug toxicity using DeepChem's Tox21 dataset. It is a dataset with a training set of 12 thousand compounds and test set of 650 compounds. I need in help in debugging and rectifying this error:"TypeError: 'NoneType' object is not subscriptable", which I get at the end.
Here is the code snippet:
model = GraphConvModel(len(tox21_tasks),
batch_size=32,
mode='classification')
print("Fitting the model")
model.fit(train_dataset, nb_epoch=10)
And here is my error:
TypeError Traceback (most recent call last)
<ipython-input-5-8088249b7fd6> in <module>
4 mode='classification')
5 print("Fitting the model")
----> 6 model.fit(train_dataset, nb_epoch=10)
~\anaconda3\lib\site-packages\deepchem\models\keras_model.py in fit(self, dataset, nb_epoch, max_checkpoints_to_keep, checkpoint_interval, deterministic, restore, variables, loss, callbacks, all_losses)
322 dataset, epochs=nb_epoch,
323 deterministic=deterministic), max_checkpoints_to_keep,
--> 324 checkpoint_interval, restore, variables, loss, callbacks, all_losses)
325
326 def fit_generator(self,
~\anaconda3\lib\site-packages\deepchem\models\keras_model.py in fit_generator(self, generator, max_checkpoints_to_keep, checkpoint_interval, restore, variables, loss, callbacks, all_losses)
407 inputs = inputs[0]
408
--> 409 batch_loss = apply_gradient_for_batch(inputs, labels, weights, loss)
410 current_step = self._global_step.numpy()
411
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1846 if self.input_signature:
1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1849 return graph_function
1850
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
2148 graph_function = self._function_cache.primary.get(cache_key, None)
2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs)
2151 self._function_cache.primary[cache_key] = graph_function
2152 return graph_function, args, kwargs
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2039 arg_names=arg_names,
2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value),
2042 self._function_attributes,
2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
~\anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
~\anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in wrapper(*args, **kwargs)
903 except Exception as e: # pylint:disable=broad-except
904 if hasattr(e, "ag_error_metadata"):
--> 905 raise e.ag_error_metadata.to_exception(e)
906 else:
907 raise
TypeError: in converted code:
relative to C:\Users\Madiha\anaconda3\lib\site-packages:
deepchem\models\keras_model.py:474 apply_gradient_for_batch *
grads = tape.gradient(batch_loss, vars)
tensorflow_core\python\eager\backprop.py:1014 gradient
unconnected_gradients=unconnected_gradients)
tensorflow_core\python\eager\imperative_grad.py:76 imperative_grad
compat.as_str(unconnected_gradients.value))
tensorflow_core\python\eager\backprop.py:138 _gradient_function
return grad_fn(mock_op, *out_grads)
tensorflow_core\python\ops\math_grad.py:455 _UnsortedSegmentMaxGrad
return _UnsortedSegmentMinOrMaxGrad(op, grad)
tensorflow_core\python\ops\math_grad.py:432 _UnsortedSegmentMinOrMaxGrad
_GatherDropNegatives(op.outputs[0], op.inputs[1])
TypeError: 'NoneType' object is not subscriptable
As an advise, check some examples on the DeepChem website. Here is a code which will work:
tasks, datasets, transformers = dc.molnet.load_tox21(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = datasets
model = dc.models.GraphConvModel(len(tasks),
batch_size=32,
mode='classification')
print("Fitting the model")
model.fit(train_dataset)
Hope is work for you!

Nonetype Error on saving model in keras

Error:
TypeError: Fetch argument None has invalid type
I think the error occurs when I'm saving the model in the callback modelcheckpoint. On searching the error, this came up but I cannot use this answer because I am using keras thus I don't explicitly call sess.run() in tensorflow. Also the epoch is trained flawlessly, it is only when it is being saved does the error pop up.
Code:
The complete model is in a kaggle notebook linked here: https://www.kaggle.com/aevinq/cnn-batchnormalization-0-1646/
The relevant code where the error pops up is:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')
mcp_save = ModelCheckpoint('md.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, epsilon=1e-4, mode='min')
history = model.fit(train_X, train_y, batch_size=32, epochs=20, verbose=1, validation_split=0.25, callbacks=[early_stopping, reduce_lr_loss, mcp_save])
Error:
Train on 4413 samples, validate on 1471 samples
Epoch 1/20
4384/4413 [============================>.] - ETA: 1s - loss: 0.5157 - acc: 0.7696
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-97f0757a1e9c> in <module>()
2 mcp_save = ModelCheckpoint('md.hdf5', save_best_only=True, monitor='val_loss', mode='min')
3 reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, epsilon=1e-4, mode='min')
----> 4 history = model.fit(train_X, train_y, batch_size=32, epochs=20, verbose=1, validation_split=0.25, callbacks=[early_stopping, reduce_lr_loss, mcp_save])
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
970 initial_epoch=initial_epoch,
971 steps_per_epoch=steps_per_epoch,
--> 972 validation_steps=validation_steps)
973
974 def evaluate(self, x=None, y=None,
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1655 initial_epoch=initial_epoch,
1656 steps_per_epoch=steps_per_epoch,
-> 1657 validation_steps=validation_steps)
1658
1659 def evaluate(self, x=None, y=None,
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
1231 for l, o in zip(out_labels, val_outs):
1232 epoch_logs['val_' + l] = o
-> 1233 callbacks.on_epoch_end(epoch, epoch_logs)
1234 if callback_model.stop_training:
1235 break
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/callbacks.py in on_epoch_end(self, epoch, logs)
71 logs = logs or {}
72 for callback in self.callbacks:
---> 73 callback.on_epoch_end(epoch, logs)
74
75 def on_batch_begin(self, batch, logs=None):
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/callbacks.py in on_epoch_end(self, epoch, logs)
413 self.model.save_weights(filepath, overwrite=True)
414 else:
--> 415 self.model.save(filepath, overwrite=True)
416 else:
417 if self.verbose > 0:
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/engine/topology.py in save(self, filepath, overwrite, include_optimizer)
2563 """
2564 from ..models import save_model
-> 2565 save_model(self, filepath, overwrite, include_optimizer)
2566
2567 def save_weights(self, filepath, overwrite=True):
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/models.py in save_model(model, filepath, overwrite, include_optimizer)
145 if symbolic_weights:
146 optimizer_weights_group = f.create_group('optimizer_weights')
--> 147 weight_values = K.batch_get_value(symbolic_weights)
148 weight_names = []
149 for i, (w, val) in enumerate(zip(symbolic_weights,
/opt/conda/lib/python3.6/site-packages/Keras-2.1.2-py3.6.egg/keras/backend/tensorflow_backend.py in batch_get_value(ops)
2208 """
2209 if ops:
-> 2210 return get_session().run(ops)
2211 else:
2212 return []
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
887 try:
888 result = self._run(None, fetches, feed_dict, options_ptr,
--> 889 run_metadata_ptr)
890 if run_metadata:
891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1103 # Create a fetch handler to take care of the structure of fetches.
1104 fetch_handler = _FetchHandler(
-> 1105 self._graph, fetches, feed_dict_tensor, feed_handles=feed_handles)
1106
1107 # Run request and get response.
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, graph, fetches, feeds, feed_handles)
412 """
413 with graph.as_default():
--> 414 self._fetch_mapper = _FetchMapper.for_fetch(fetches)
415 self._fetches = []
416 self._targets = []
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in for_fetch(fetch)
232 elif isinstance(fetch, (list, tuple)):
233 # NOTE(touts): This is also the code path for namedtuples.
--> 234 return _ListFetchMapper(fetch)
235 elif isinstance(fetch, dict):
236 return _DictFetchMapper(fetch)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, fetches)
339 """
340 self._fetch_type = type(fetches)
--> 341 self._mappers = [_FetchMapper.for_fetch(fetch) for fetch in fetches]
342 self._unique_fetches, self._value_indices = _uniquify_fetches(self._mappers)
343
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in <listcomp>(.0)
339 """
340 self._fetch_type = type(fetches)
--> 341 self._mappers = [_FetchMapper.for_fetch(fetch) for fetch in fetches]
342 self._unique_fetches, self._value_indices = _uniquify_fetches(self._mappers)
343
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in for_fetch(fetch)
229 if fetch is None:
230 raise TypeError('Fetch argument %r has invalid type %r' %
--> 231 (fetch, type(fetch)))
232 elif isinstance(fetch, (list, tuple)):
233 # NOTE(touts): This is also the code path for namedtuples.
TypeError: Fetch argument None has invalid type <class 'NoneType'>
It's a bug in Keras. There are None values in model.optimizer.weights after a recent update, which leads to an error when K.batch_get_value is called during model saving.
I've opened a PR to fix it and it's merged. You can install the latest Keras on Github to fix it.

Resources