Problem with evaluation function in tensorflow federated

Problem with evaluation function in tensorflow federated - tensorflow-federated

I was trying to reimplement the github tutorial with my own CNN-based model with Keras. But I got an error when evaluating.
from __future__ import absolute_import, division, print_function
import collections
from six.moves import range
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.optimizer_v2 import gradient_descent
from tensorflow_federated import python as tff
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()
example_dataset = emnist_train.create_tf_dataset_for_client(
emnist_train.client_ids[0])
NUM_EPOCHS = 10
BATCH_SIZE = 20
SHUFFLE_BUFFER = 500
def preprocess(dataset):
def element_fn(element):
return collections.OrderedDict([
('x', tf.reshape(element['pixels'], [-1])),
('y', tf.reshape(element['label'], [1])),
])
return dataset.repeat(NUM_EPOCHS).map(element_fn).shuffle(
SHUFFLE_BUFFER).batch(BATCH_SIZE)
preprocessed_example_dataset = preprocess(example_dataset)
sample_batch = nest.map_structure(
lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x))
for x in client_ids]
NUM_CLIENTS = 3
sample_clients = emnist_train.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(emnist_train, sample_clients)
len(federated_train_data), federated_train_data[0]
def create_compiled_keras_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Reshape((28,28,1), input_shape=(784,)),
tf.keras.layers.Conv2D(32, kernel_size=(5,5), activation="relu", padding = "same", strides = 1),
tf.keras.layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'),
tf.keras.layers.Conv2D(64, kernel_size=(5,5), activation="relu", padding = "same", strides = 1),
tf.keras.layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax"),
])
def loss_fn(y_true, y_pred):
return tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(
y_true, y_pred))
model.compile(
loss=loss_fn,
optimizer=gradient_descent.SGD(learning_rate=0.02),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
def model_fn():
keras_model = create_compiled_keras_model()
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
for round_num in range(1,10):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
##Evaluation of the model
#This function doesn't work
evaluation = tff.learning.build_federated_evaluation(model_fn)
federated_test_data = make_federated_data(emnist_test, sample_clients)
test_metrics = evaluation(state.model, federated_test_data)
I expect the evaluation of the test data, but the actual output is the following error:
---------------------------------------------------------------------------
_FallbackException Traceback (most recent call last)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/gen_functional_ops.py in stateful_partitioned_call(args, Tout, f, config, config_proto, executor_type, name)
482 "Tout", Tout, "f", f, "config", config, "config_proto", config_proto,
--> 483 "executor_type", executor_type)
484 return _result
_FallbackException: This function does not handle the case of the path where all inputs are not already EagerTensors.
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-23-6e9c77f70201> in <module>()
----> 1 evaluation = tff.learning.build_federated_evaluation(model_fn)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/learning/federated_evaluation.py in build_federated_evaluation(model_fn)
83 #tff.federated_computation(
84 tff.FederatedType(model_weights_type, tff.SERVER, all_equal=True),
---> 85 tff.FederatedType(tff.SequenceType(batch_type), tff.CLIENTS))
86 def server_eval(server_model_weights, federated_dataset):
87 client_outputs = tff.federated_map(
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/computation_wrapper.py in <lambda>(fn)
406 args = (args,)
407 arg_type = computation_types.to_type(args[0])
--> 408 return lambda fn: _wrap(fn, arg_type, self._wrapper_fn)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/computation_wrapper.py in _wrap(fn, parameter_type, wrapper_fn)
94 function_utils.wrap_as_zero_or_one_arg_callable(fn, parameter_type),
95 parameter_type,
---> 96 name=fn_name)
97 py_typecheck.check_type(concrete_fn, function_utils.ConcreteFunction,
98 'value returned by the wrapper')
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/computation_wrapper_instances.py in _federated_computation_wrapper_fn(target_fn, parameter_type, name)
52 parameter_type,
53 ctx_stack,
---> 54 suggested_name=name))
55 return computation_impl.ComputationImpl(target_lambda.proto, ctx_stack)
56
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/federated_computation_utils.py in zero_or_one_arg_fn_to_building_block(fn, parameter_name, parameter_type, context_stack, suggested_name)
73 value_impl.ValueImpl(
74 computation_building_blocks.Reference(
---> 75 parameter_name, parameter_type), context_stack))
76 else:
77 result = fn()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/function_utils.py in <lambda>(arg)
551 # and to force any parameter bindings to be resolved now.
552 # pylint: disable=unnecessary-lambda,undefined-variable
--> 553 return (lambda fn, at, kt: lambda arg: _unpack_and_call(fn, at, kt, arg))(
554 fn, arg_types, kwarg_types)
555 # pylint: enable=unnecessary-lambda,undefined-variable
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/function_utils.py in _unpack_and_call(fn, arg_types, kwarg_types, arg)
545 name, str(expected_type), str(actual_type)))
546 kwargs[name] = element_value
--> 547 return fn(*args, **kwargs)
548
549 # Deliberate wrapping to isolate the caller from the underlying function
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/learning/federated_evaluation.py in server_eval(server_model_weights, federated_dataset)
88 client_eval,
89 [tff.federated_broadcast(server_model_weights), federated_dataset])
---> 90 return model.federated_output_computation(client_outputs.local_outputs)
91
92 return server_eval
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in federated_output_computation(self)
531 #property
532 def federated_output_computation(self):
--> 533 return self._model.federated_output_computation
534
535
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in federated_output_computation(self)
406 def federated_output_computation(self):
407 metric_variable_type_dict = nest.map_structure(tf.TensorSpec.from_tensor,
--> 408 self.report_local_outputs())
409 federated_local_outputs_type = tff.FederatedType(
410 metric_variable_type_dict, tff.CLIENTS, all_equal=False)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
314 if not self._created_variables:
315 # If we did not create any variables the trace we have is good enough.
--> 316 return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
317
318 def fn_with_cond(*inner_args, **inner_kwds):
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/function.py in _filtered_call(self, args, kwargs)
382 """
383 return self._call_flat(
--> 384 (t for t in nest.flatten((args, kwargs))
385 if isinstance(
386 t, (ops.Tensor, resource_variable_ops.ResourceVariable))))
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args)
431 # Only need to override the gradient in graph mode and when we have outputs.
432 if context.executing_eagerly() or not self.outputs:
--> 433 outputs = self._inference_function.call(ctx, args)
434 else:
435 if not self._gradient_name:
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args)
267 executing_eagerly=executing_eagerly,
268 config=function_call_options.config_proto_serialized,
--> 269 executor_type=function_call_options.executor_type)
270
271 if executing_eagerly:
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/functional_ops.py in partitioned_call(args, f, tout, executing_eagerly, config, executor_type)
1081 outputs = gen_functional_ops.stateful_partitioned_call(
1082 args=args, Tout=tout, f=f, config_proto=config,
-> 1083 executor_type=executor_type)
1084 else:
1085 outputs = gen_functional_ops.partitioned_call(
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/gen_functional_ops.py in stateful_partitioned_call(args, Tout, f, config, config_proto, executor_type, name)
487 return stateful_partitioned_call_eager_fallback(
488 args, Tout=Tout, f=f, config=config, config_proto=config_proto,
--> 489 executor_type=executor_type, name=name, ctx=_ctx)
490 except _core._SymbolicException:
491 pass # Add nodes to the TensorFlow graph.
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/ops/gen_functional_ops.py in stateful_partitioned_call_eager_fallback(args, Tout, f, config, config_proto, executor_type, name, ctx)
548 executor_type = ""
549 executor_type = _execute.make_str(executor_type, "executor_type")
--> 550 _attr_Tin, args = _execute.convert_to_mixed_eager_tensors(args, _ctx)
551 _inputs_flat = list(args)
552 _attrs = ("Tin", _attr_Tin, "Tout", Tout, "f", f, "config", config,
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/execute.py in convert_to_mixed_eager_tensors(values, ctx)
207 def convert_to_mixed_eager_tensors(values, ctx):
208 v = [ops.internal_convert_to_tensor(t, ctx=ctx) for t in values]
--> 209 types = [t._datatype_enum() for t in v] # pylint: disable=protected-access
210 return types, v
211
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/eager/execute.py in <listcomp>(.0)
207 def convert_to_mixed_eager_tensors(values, ctx):
208 v = [ops.internal_convert_to_tensor(t, ctx=ctx) for t in values]
--> 209 types = [t._datatype_enum() for t in v] # pylint: disable=protected-access
210 return types, v
211
AttributeError: 'Tensor' object has no attribute '_datatype_enum'

Nuria: this should just have been fixed earlier today. If you do not want to wait for the next release (coming soon), I would recommend that you simply build a local pip package from source. You can find instructions in the install guide.

As a followup here: TFF 0.4.0 has just been released, which contains this bugfix.

Related

Error using BayesSearchCV from skopt on RandomForestClassifier

this is the code to reproduce the error:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from scipy.stats import loguniform
from skopt import BayesSearchCV
from sklearn.datasets import load_iris
import numpy as np
X, y = load_iris(return_X_y=True)
grid = {
'LogisticRegression' : {
'C': loguniform.rvs(0.1, 10000, size = 50),
'solver': ['lbfgs','saga'],
'penalty': ['l2'],
'warm_start': [False, True],
'class_weight' : [None, 'balanced'],
'max_iter': [100, 1000],
'n_jobs': [ 10 ]
},
'RandomForestClassifier' : {
'n_estimators': np.random.randint(5, 200, size=10),
'criterion' : [ 'gini', 'entropy' ],
'max_depth' : np.random.randint(5, 50, size=10),
'min_samples_split': np.random.randint(5, 50, size=10),
'min_samples_leaf': np.random.randint(5, 50, size=10),
'max_features' : loguniform.rvs(0.2, 1.0, size=5),
'n_jobs' : [ 10 ]
}
}
tuner_params = {
'cv': 2,
'n_jobs': 10,
'scoring': 'roc_auc_ovr',
'return_train_score': True,
'refit': True,
'n_iter':3
}
clf = 'LogisticRegression'
search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
search_cv.fit(X,y)
clf = 'RandomForestClassifier'
search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
search_cv.fit(X,y)
Using BayesSearchCV on LogisticRegression as classifier gives no error, while using RandomForestClassifier it gives the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Input In [8], in <cell line: 2>()
1 search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
----> 2 search_cv.fit(X,y)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:466, in BayesSearchCV.fit(self, X, y, groups, callback, **fit_params)
463 else:
464 self.optimizer_kwargs_ = dict(self.optimizer_kwargs)
--> 466 super().fit(X=X, y=y, groups=groups, **fit_params)
468 # BaseSearchCV never ranked train scores,
469 # but apparently we used to ship this (back-compat)
470 if self.return_train_score:
File ~/.conda/envs/meth/lib/python3.9/site-packages/sklearn/model_selection/_search.py:875, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
869 results = self._format_results(
870 all_candidate_params, n_splits, all_out, all_more_results
871 )
873 return results
--> 875 self._run_search(evaluate_candidates)
877 # multimetric is determined here because in the case of a callable
878 # self.scoring the return type is only known after calling
879 first_test_score = all_out[0]["test_scores"]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:512, in BayesSearchCV._run_search(self, evaluate_candidates)
508 while n_iter > 0:
509 # when n_iter < n_points points left for evaluation
510 n_points_adjusted = min(n_iter, n_points)
--> 512 optim_result = self._step(
513 search_space, optimizer,
514 evaluate_candidates, n_points=n_points_adjusted
515 )
516 n_iter -= n_points
518 if eval_callbacks(callbacks, optim_result):
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:400, in BayesSearchCV._step(self, search_space, optimizer, evaluate_candidates, n_points)
397 """Generate n_jobs parameters and evaluate them in parallel.
398 """
399 # get parameter values to evaluate
--> 400 params = optimizer.ask(n_points=n_points)
402 # convert parameters to python native types
403 params = [[np.array(v).item() for v in p] for p in params]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:395, in Optimizer.ask(self, n_points, strategy)
393 X = []
394 for i in range(n_points):
--> 395 x = opt.ask()
396 X.append(x)
398 ti_available = "ps" in self.acq_func and len(opt.yi) > 0
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:367, in Optimizer.ask(self, n_points, strategy)
336 """Query point or multiple points at which objective should be evaluated.
337
338 n_points : int or None, default: None
(...)
364
365 """
366 if n_points is None:
--> 367 return self._ask()
369 supported_strategies = ["cl_min", "cl_mean", "cl_max"]
371 if not (isinstance(n_points, int) and n_points > 0):
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:434, in Optimizer._ask(self)
430 if self._n_initial_points > 0 or self.base_estimator_ is None:
431 # this will not make a copy of `self.rng` and hence keep advancing
432 # our random state.
433 if self._initial_samples is None:
--> 434 return self.space.rvs(random_state=self.rng)[0]
435 else:
436 # The samples are evaluated starting form initial_samples[0]
437 return self._initial_samples[
438 len(self._initial_samples) - self._n_initial_points]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:900, in Space.rvs(self, n_samples, random_state)
897 columns = []
899 for dim in self.dimensions:
--> 900 columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
902 # Transpose
903 return _transpose_list_array(columns)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:698, in Categorical.rvs(self, n_samples, random_state)
696 return self.inverse_transform([(choices)])
697 elif self.transform_ == "normalize":
--> 698 return self.inverse_transform(list(choices))
699 else:
700 return [self.categories[c] for c in choices]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:685, in Categorical.inverse_transform(self, Xt)
680 """Inverse transform samples from the warped space back into the
681 original space.
682 """
683 # The concatenation of all transformed dimensions makes Xt to be
684 # of type float, hence the required cast back to int.
--> 685 inv_transform = super(Categorical, self).inverse_transform(Xt)
686 if isinstance(inv_transform, list):
687 inv_transform = np.array(inv_transform)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:168, in Dimension.inverse_transform(self, Xt)
164 def inverse_transform(self, Xt):
165 """Inverse transform samples from the warped space back into the
166 original space.
167 """
--> 168 return self.transformer.inverse_transform(Xt)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:309, in Pipeline.inverse_transform(self, X)
307 def inverse_transform(self, X):
308 for transformer in self.transformers[::-1]:
--> 309 X = transformer.inverse_transform(X)
310 return X
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:216, in LabelEncoder.inverse_transform(self, Xt)
214 else:
215 Xt = np.asarray(Xt)
--> 216 return [
217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:217, in <listcomp>(.0)
214 else:
215 Xt = np.asarray(Xt)
216 return [
--> 217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
KeyError: 9
My versions:
python: 3.9.12
sklearn: 1.1.1
skopt: 0.9.0
The same error happen when using XGBClassifier or GradientBoostingClassifier, while there is no error using SVC or KNeighborsClassifier.

I believe that's related to how skopt encodes the hyperparameter space: it seems having identical points generated by your random lists are required to trigger the error, though sometimes it fits regardless. Either there are collisions or it makes the grid to be processed erroneously.
At least the issue stopped reproducing for me after changing all random lists to list(range(...)).
Might be worth a bug report.

Training the BERT model with pytorch

I am unable to figure out why my BERT model dosen't get pas the training command. I am using pytorch-lightning. I am running the code on AWS EC2(p3.2xLarge) and it does show me the available GPU but I can't really figure out the device side error. Could someone please guide me towards a direction? I really appreciate you time and consideration.
PS: The results are after setting CUDA_LAUNCH_BLOCKING=1.
trainer = pl.Trainer(
logger=logger,
checkpoint_callback=checkpoint_callback,
callbacks=[early_stopping_callback],
max_epochs=N_EPOCHS,
gpus=1,
progress_bar_refresh_rate=30,
)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
In [155]:
trainer.fit(model, data_module)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-155-7b6b8391c42e> in <module>
----> 1 trainer.fit(model, data_module)
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader, ckpt_path)
739 train_dataloaders = train_dataloader
740 self._call_and_handle_interrupt(
--> 741 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
742 )
743
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
683 """
684 try:
--> 685 return trainer_fn(*args, **kwargs)
686 # TODO: treat KeyboardInterrupt as BaseException (delete the code below) in v1.7
687 except KeyboardInterrupt as exception:
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
775 # TODO: ckpt_path only in v1.7
776 ckpt_path = ckpt_path or self.resume_from_checkpoint
--> 777 self._run(model, ckpt_path=ckpt_path)
778
779 assert self.state.stopped
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _run(self, model, ckpt_path)
1143
1144 self._call_configure_sharded_model() # allow user to setup in model sharded environment
-> 1145 self.accelerator.setup(self)
1146
1147 # ----------------------------
~/.local/lib/python3.6/site-packages/pytorch_lightning/accelerators/gpu.py in setup(self, trainer)
44 def setup(self, trainer: "pl.Trainer") -> None:
45 self.set_nvidia_flags(trainer.local_rank)
---> 46 return super().setup(trainer)
47
48 def on_train_start(self) -> None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in setup(self, trainer)
89 trainer: the trainer instance
90 """
---> 91 self.setup_training_type_plugin()
92 if not self.training_type_plugin.setup_optimizers_in_pre_dispatch:
93 self.setup_optimizers(trainer)
~/.local/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in setup_training_type_plugin(self)
361 def setup_training_type_plugin(self) -> None:
362 """Attaches the training type plugin to the accelerator."""
--> 363 self.training_type_plugin.setup()
364
365 def setup_precision_plugin(self) -> None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/single_device.py in setup(self)
69
70 def setup(self) -> None:
---> 71 self.model_to_device()
72
73 #property
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/single_device.py in model_to_device(self)
66
67 def model_to_device(self) -> None:
---> 68 self._model.to(self.root_device)
69
70 def setup(self) -> None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/core/mixins/device_dtype_mixin.py in to(self, *args, **kwargs)
109 out = torch._C._nn._parse_to(*args, **kwargs)
110 self.__update_properties(device=out[0], dtype=out[1])
--> 111 return super().to(*args, **kwargs)
112
113 def cuda(self, device: Optional[Union[torch.device, int]] = None) -> "DeviceDtypeModuleMixin":
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
897 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
898
--> 899 return self._apply(convert)
900
901 def register_backward_hook(
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _apply(self, fn)
591 # `with torch.no_grad():`
592 with torch.no_grad():
--> 593 param_applied = fn(param)
594 should_use_set_data = compute_should_use_set_data(param, param_applied)
595 if should_use_set_data:
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in convert(t)
895 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
896 non_blocking, memory_format=convert_to_format)
--> 897 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
898
899 return self._apply(convert)
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Restarting the machine returned this:
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Missing logger folder: lightning_logs/nara-comments
| Name | Type | Params
-----------------------------------------
0 | bert | BertModel | 108 M
1 | classifier | Linear | 288 K
2 | criterion | BCELoss | 0
-----------------------------------------
108 M Trainable params
0 Non-trainable params
108 M Total params
434.395 Total estimated model params size (MB)
/home/ubuntu/.local/lib/python3.6/site-packages/pytorch_lightning/utilities/data.py:60: UserWarning: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 4540. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
"Trying to infer the `batch_size` from an ambiguous collection. The batch size we"
/home/ubuntu/.local/lib/python3.6/site-packages/pytorch_lightning/utilities/data.py:60: UserWarning: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 4374. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
"Trying to infer the `batch_size` from an ambiguous collection. The batch size we"
Global seed set to 42
Epoch 0: 0%
0/397 [00:00<?, ?it/s]
/home/ubuntu/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/closure.py:36: LightningDeprecationWarning: One of the returned values {'predictions', 'labels'} has a `grad_fn`. We will detach it automatically but this behaviour will change in v1.6. Please detach it manually: `return {'loss': ..., 'something': something.detach()}`
f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-48-7b6b8391c42e> in <module>
----> 1 trainer.fit(model, data_module)
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader, ckpt_path)
739 train_dataloaders = train_dataloader
740 self._call_and_handle_interrupt(
--> 741 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
742 )
743
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
683 """
684 try:
--> 685 return trainer_fn(*args, **kwargs)
686 # TODO: treat KeyboardInterrupt as BaseException (delete the code below) in v1.7
687 except KeyboardInterrupt as exception:
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
775 # TODO: ckpt_path only in v1.7
776 ckpt_path = ckpt_path or self.resume_from_checkpoint
--> 777 self._run(model, ckpt_path=ckpt_path)
778
779 assert self.state.stopped
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _run(self, model, ckpt_path)
1197
1198 # dispatch `start_training` or `start_evaluating` or `start_predicting`
-> 1199 self._dispatch()
1200
1201 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _dispatch(self)
1277 self.training_type_plugin.start_predicting(self)
1278 else:
-> 1279 self.training_type_plugin.start_training(self)
1280
1281 def run_stage(self):
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
200 def start_training(self, trainer: "pl.Trainer") -> None:
201 # double dispatch to initiate the training loop
--> 202 self._results = trainer.run_stage()
203
204 def start_evaluating(self, trainer: "pl.Trainer") -> None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_stage(self)
1287 if self.predicting:
1288 return self._run_predict()
-> 1289 return self._run_train()
1290
1291 def _pre_training_routine(self):
~/.local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in _run_train(self)
1317 self.fit_loop.trainer = self
1318 with torch.autograd.set_detect_anomaly(self._detect_anomaly):
-> 1319 self.fit_loop.run()
1320
1321 def _run_evaluate(self) -> _EVALUATE_OUTPUT:
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/base.py in run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = False
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/fit_loop.py in advance(self)
232
233 with self.trainer.profiler.profile("run_training_epoch"):
--> 234 self.epoch_loop.run(data_fetcher)
235
236 # the global step is manually decreased here due to backwards compatibility with existing loggers
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/base.py in run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = False
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py in advance(self, *args, **kwargs)
191
192 with self.trainer.profiler.profile("run_training_batch"):
--> 193 batch_output = self.batch_loop.run(batch, batch_idx)
194
195 self.batch_progress.increment_processed()
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/base.py in run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = False
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py in advance(self, batch, batch_idx)
86 if self.trainer.lightning_module.automatic_optimization:
87 optimizers = _get_active_optimizers(self.trainer.optimizers, self.trainer.optimizer_frequencies, batch_idx)
---> 88 outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx)
89 else:
90 outputs = self.manual_loop.run(split_batch, batch_idx)
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/base.py in run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = False
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in advance(self, batch, *args, **kwargs)
217 self._batch_idx,
218 self._optimizers[self.optim_progress.optimizer_position],
--> 219 self.optimizer_idx,
220 )
221 if result.loss is not None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in _run_optimization(self, split_batch, batch_idx, optimizer, opt_idx)
264 # gradient update with accumulated gradients
265 else:
--> 266 self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
267
268 result = closure.consume_result()
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in _optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
384 on_tpu=(self.trainer._device_type == DeviceType.TPU and _TPU_AVAILABLE),
385 using_native_amp=(self.trainer.amp_backend is not None and self.trainer.amp_backend == AMPType.NATIVE),
--> 386 using_lbfgs=is_lbfgs,
387 )
388
~/.local/lib/python3.6/site-packages/pytorch_lightning/core/lightning.py in optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, on_tpu, using_native_amp, using_lbfgs)
1650
1651 """
-> 1652 optimizer.step(closure=optimizer_closure)
1653
1654 def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int):
~/.local/lib/python3.6/site-packages/pytorch_lightning/core/optimizer.py in step(self, closure, **kwargs)
162 assert trainer is not None
163 with trainer.profiler.profile(profiler_action):
--> 164 trainer.accelerator.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
~/.local/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in optimizer_step(self, optimizer, opt_idx, closure, model, **kwargs)
337 """
338 model = model or self.lightning_module
--> 339 self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
340
341 def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None:
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py in optimizer_step(self, model, optimizer, optimizer_idx, closure, **kwargs)
161 if isinstance(model, pl.LightningModule):
162 closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
--> 163 optimizer.step(closure=closure, **kwargs)
164
165 def _track_grad_norm(self, trainer: "pl.Trainer") -> None:
~/.local/lib/python3.6/site-packages/torch/optim/lr_scheduler.py in wrapper(*args, **kwargs)
63 instance._step_count += 1
64 wrapped = func.__get__(instance, cls)
---> 65 return wrapped(*args, **kwargs)
66
67 # Note that the returned function here is no longer a bound method,
~/.local/lib/python3.6/site-packages/torch/optim/optimizer.py in wrapper(*args, **kwargs)
86 profile_name = "Optimizer.step#{}.step".format(obj.__class__.__name__)
87 with torch.autograd.profiler.record_function(profile_name):
---> 88 return func(*args, **kwargs)
89 return wrapper
90
~/.local/lib/python3.6/site-packages/transformers/optimization.py in step(self, closure)
330 loss = None
331 if closure is not None:
--> 332 loss = closure()
333
334 for group in self.param_groups:
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py in _wrap_closure(self, model, optimizer, optimizer_idx, closure)
146 consistent with the ``PrecisionPlugin`` subclasses that cannot pass ``optimizer.step(closure)`` directly.
147 """
--> 148 closure_result = closure()
149 self._after_closure(model, optimizer, optimizer_idx)
150 return closure_result
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in __call__(self, *args, **kwargs)
158
159 def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]:
--> 160 self._result = self.closure(*args, **kwargs)
161 return self._result.loss
162
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in closure(self, *args, **kwargs)
153 if self._backward_fn is not None and step_output.closure_loss is not None:
154 with self._profiler.profile("backward"):
--> 155 self._backward_fn(step_output.closure_loss)
156
157 return step_output
~/.local/lib/python3.6/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py in backward_fn(loss)
325
326 def backward_fn(loss: Tensor) -> None:
--> 327 self.trainer.accelerator.backward(loss, optimizer, opt_idx)
328
329 # check if model weights are nan
~/.local/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in backward(self, closure_loss, *args, **kwargs)
312 closure_loss = self.precision_plugin.pre_backward(self.lightning_module, closure_loss)
313
--> 314 self.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
315
316 closure_loss = self.precision_plugin.post_backward(self.lightning_module, closure_loss)
~/.local/lib/python3.6/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py in backward(self, model, closure_loss, optimizer, *args, **kwargs)
89 # do backward pass
90 if model is not None and isinstance(model, pl.LightningModule):
---> 91 model.backward(closure_loss, optimizer, *args, **kwargs)
92 else:
93 self._run_backward(closure_loss, *args, **kwargs)
~/.local/lib/python3.6/site-packages/pytorch_lightning/core/lightning.py in backward(self, loss, optimizer, optimizer_idx, *args, **kwargs)
1432 loss.backward()
1433 """
-> 1434 loss.backward(*args, **kwargs)
1435
1436 def toggle_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer], optimizer_idx: int) -> None:
~/.local/lib/python3.6/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
305 create_graph=create_graph,
306 inputs=inputs)
--> 307 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
308
309 def register_hook(self, hook):
~/.local/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
154 Variable._execution_engine.run_backward(
155 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 156 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
157
158
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

load_from_checkpoint fails after transfer learning a LightningModule

I try to transfer learn a LightningModule. The relevant part of the code is this:
class DeepFilteringTransferLearning(pl.LightningModule):
def __init__(self, chk_path = None):
super().__init__()
#init class members
self.prediction = []
self.label = []
self.loss = MSELoss()
#init pretrained model
self.chk_path = chk_path
model = DeepFiltering.load_from_checkpoint(chk_path)
backbone = model.sequential
layers = list(backbone.children())[:-1]
self.groundModel = Sequential(*layers)
#use the pretrained modell the same way to regress Lshall and neq
self.regressor = nn.Linear(64,2)
def forward(self, x):
self.groundModel.eval()
with torch.no_grad():
groundOut = self.groundModel(x)
yPred = self.regressor(groundOut)
return yPred
I save my model in a different, main file which relevant part is:
#callbacks
callbacks = [
ModelCheckpoint(
dirpath = "checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl1ConvTransferLearned",
save_top_k=5,
monitor="val_loss",
),
]
#trainer
trainer = pl.Trainer(gpus=[1,2],strategy="dp",max_epochs=150,logger=wandb_logger,callbacks=callbacks,precision=32,deterministic=True)
trainer.fit(model,train_dataloaders=trainDl,val_dataloaders=valDl)
After try to load the modell from checkpoint like this:
chk_patH = "path/to/transfer_learned/model"
standardizedL2RegularizedL1 = DeepFilteringTransferLearning("path/to/model/trying/to/use/for/transfer_learning").load_from_checkpoint(chk_patH)
I got the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _check_seekable(f)
307 try:
--> 308 f.seek(f.tell())
309 return True
AttributeError: 'NoneType' object has no attribute 'seek'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-6-13f5fd0c7b85> in <module>
1 chk_patH = "checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl1/epoch=4-step=349.ckpt"
----> 2 standardizedL2RegularizedL1 = DeepFilteringTransferLearning("checkpoints/maxPooling16StandardizedL2RegularizedReproduceableSeeded42Ampl2/epoch=145-step=10219.ckpt").load_from_checkpoint(chk_patH)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
154 checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].update(kwargs)
155
--> 156 model = cls._load_model_state(checkpoint, strict=strict, **kwargs)
157 return model
158
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in _load_model_state(cls, checkpoint, strict, **cls_kwargs_new)
196 _cls_kwargs = {k: v for k, v in _cls_kwargs.items() if k in cls_init_args_name}
197
--> 198 model = cls(**_cls_kwargs)
199
200 # give model a chance to load something
~/whistlerProject/gitHub/whistler/mathe/gwInspired/deepFilteringTransferLearning.py in __init__(self, chk_path)
34 #init pretrained model
35 self.chk_path = chk_path
---> 36 model = DeepFiltering.load_from_checkpoint(chk_path)
37 backbone = model.sequential
38 layers = list(backbone.children())[:-1]
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
132 checkpoint = pl_load(checkpoint_path, map_location=map_location)
133 else:
--> 134 checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
135
136 if hparams_file is not None:
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/pytorch_lightning/utilities/cloud_io.py in load(path_or_url, map_location)
31 if not isinstance(path_or_url, (str, Path)):
32 # any sort of BytesIO or similiar
---> 33 return torch.load(path_or_url, map_location=map_location)
34 if str(path_or_url).startswith("http"):
35 return torch.hub.load_state_dict_from_url(str(path_or_url), map_location=map_location)
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
579 pickle_load_args['encoding'] = 'utf-8'
580
--> 581 with _open_file_like(f, 'rb') as opened_file:
582 if _is_zipfile(opened_file):
583 # The zipfile reader is going to advance the current file position.
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _open_file_like(name_or_buffer, mode)
233 return _open_buffer_writer(name_or_buffer)
234 elif 'r' in mode:
--> 235 return _open_buffer_reader(name_or_buffer)
236 else:
237 raise RuntimeError(f"Expected 'r' or 'w' in mode but got {mode}")
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in __init__(self, buffer)
218 def __init__(self, buffer):
219 super(_open_buffer_reader, self).__init__(buffer)
--> 220 _check_seekable(buffer)
221
222
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in _check_seekable(f)
309 return True
310 except (io.UnsupportedOperation, AttributeError) as e:
--> 311 raise_err_msg(["seek", "tell"], e)
312 return False
313
~/anaconda3/envs/skimageTrial/lib/python3.6/site-packages/torch/serialization.py in raise_err_msg(patterns, e)
302 + " Please pre-load the data into a buffer like io.BytesIO and"
303 + " try to load from it instead.")
--> 304 raise type(e)(msg)
305 raise e
306
AttributeError: 'NoneType' object has no attribute 'seek'. You can only torch.load from a file that is seekable. Please pre-load the data into a buffer like io.BytesIO and try to load from it instead.
which I can't resolve.
I try to this according to the available tutorials on the official page of pytorch lightning here. I can't figure it out what I miss.
Could somebody point me in the right direction?

FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan when using greater int values

I've recently watched a YouTube (DataSchool) video where the guy used only 3 columns from the Titanic dataset and made a pipeline. I wanted to add more columns to get better accuracy so I added Age and Fare.
I think it's probably because of the values of Age and Fare that I'm getting this error when I perform cross_val_score
columns_trans = make_column_transformer(
(OneHotEncoder(), ['Sex', 'Embarked']),
remainder='passthrough')
logreg = LogisticRegression(solver='lbfgs')
pipe = make_pipeline(columns_trans, logreg)
cross_val_score(pipe, X, y, cv=5, scoring='accuracy').mean()
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py:552: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan.
If I remove Age and Fare, everything works fine. I was wondering if the Column Transformer or the make_pipeline had a problem with values like that.
I also tried scaling the values of Fare and Age, then it gave a cross_val_score but failed in pipe.predict() giving an error:
ValueError: Input contains NaN, infinity or a value too large for dtype('float64')
Traceback:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_119/4279568460.py in <module>
----> 1 cross_val_score(pipe, X, y, cv=5, scoring='accuracy', error_score="raise").mean()
/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
404 fit_params=fit_params,
405 pre_dispatch=pre_dispatch,
--> 406 error_score=error_score)
407 return cv_results['test_score']
408
/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
246 return_times=True, return_estimator=return_estimator,
247 error_score=error_score)
--> 248 for train, test in cv.split(X, y, groups))
249
250 zipped_scores = list(zip(*scores))
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1039 # remaining jobs.
1040 self._iterating = False
-> 1041 if self.dispatch_one_batch(iterator):
1042 self._iterating = self._original_iterator is not None
1043
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
857 return False
858 else:
--> 859 self._dispatch(tasks)
860 return True
861
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
775 with self._lock:
776 job_idx = len(self._jobs)
--> 777 job = self._backend.apply_async(batch, callback=cb)
778 # A job can complete so quickly than its callback is
779 # called before we get here, causing self._jobs to
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
/opt/conda/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
529 estimator.fit(X_train, **fit_params)
530 else:
--> 531 estimator.fit(X_train, y_train, **fit_params)
532
533 except Exception as e:
/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
333 if self._final_estimator != 'passthrough':
334 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 335 self._final_estimator.fit(Xt, y, **fit_params_last_step)
336
337 return self
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py in fit(self, X, y, sample_weight)
1415 penalty=penalty, max_squared_sum=max_squared_sum,
1416 sample_weight=sample_weight)
-> 1417 for class_, warm_start_coef_ in zip(classes_, warm_start_coef))
1418
1419 fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1039 # remaining jobs.
1040 self._iterating = False
-> 1041 if self.dispatch_one_batch(iterator):
1042 self._iterating = self._original_iterator is not None
1043
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
857 return False
858 else:
--> 859 self._dispatch(tasks)
860 return True
861
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
775 with self._lock:
776 job_idx = len(self._jobs)
--> 777 job = self._backend.apply_async(batch, callback=cb)
778 # A job can complete so quickly than its callback is
779 # called before we get here, causing self._jobs to
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
/opt/conda/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
262 return [func(*args, **kwargs)
--> 263 for func, args, kwargs in self.items]
264
265 def __reduce__(self):
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py in _logistic_regression_path(X, y, pos_class, Cs, fit_intercept, max_iter, tol, verbose, solver, coef, class_weight, dual, penalty, intercept_scaling, multi_class, random_state, check_input, max_squared_sum, sample_weight, l1_ratio)
762 n_iter_i = _check_optimize_result(
763 solver, opt_res, max_iter,
--> 764 extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
765 w0, loss = opt_res.x, opt_res.fun
766 elif solver == 'newton-cg':
/opt/conda/lib/python3.7/site-packages/sklearn/utils/optimize.py in _check_optimize_result(solver, result, max_iter, extra_warning_msg)
241 " https://scikit-learn.org/stable/modules/"
242 "preprocessing.html"
--> 243 ).format(solver, result.status, result.message.decode("latin1"))
244 if extra_warning_msg is not None:
245 warning_msg += "\n" + extra_warning_msg
AttributeError: 'str' object has no attribute 'decode'

I solved this error by changing solver=lbfgs to solver=liblinear in LogisticRegression()
logreg = LogisticRegression(solver='lbfgs')
to
logreg = LogisticRegression(solver='liblinear')
And for the following error:
ValueError: Input contains NaN, infinity or a value too large for dtype('float64')
It's best to check if your test data contains any null values or strings.

neo4django with Neo4j 2.0

I'm just starting out using Neo4j and I'd like to use 2.0 (I have 2.0.1 community installed). I see that neo4django was only tested against neo4j 1.8.2-1.9.4, but have people gotten it working with 2.x? I installed the gremlin plugin but can't create or query through neo4django.
create:
In [8]: NeoProfile.objects.create(profile_id=1234)
[INFO] requests.packages.urllib3.connectionpool#214: Resetting dropped connection: localhost
---------------------------------------------------------------------------
StatusException Traceback (most recent call last)
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/core/management/commands/shell.pyc in <module>()
----> 1 NeoProfile.objects.create(profile_id=1234)
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/manager.pyc in create(self, **kwargs)
41
42 def create(self, **kwargs):
---> 43 return self.get_query_set().create(**kwargs)
44
45 def filter(self, *args, **kwargs):
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/query.pyc in create(self, **kwargs)
1295 if 'id' in kwargs or 'pk' in kwargs:
1296 raise FieldError("Neo4j doesn't allow node ids to be assigned.")
-> 1297 return super(NodeQuerySet, self).create(**kwargs)
1298
1299 #TODO would be awesome if this were transactional
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/db/models/query.pyc in create(self, **kwargs)
375 obj = self.model(**kwargs)
376 self._for_write = True
--> 377 obj.save(force_insert=True, using=self.db)
378 return obj
379
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in save(self, using, **kwargs)
315
316 def save(self, using=DEFAULT_DB_ALIAS, **kwargs):
--> 317 return super(NodeModel, self).save(using=using, **kwargs)
318
319 #alters_data
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/db/models/base.pyc in save(self, force_insert, force_update, using)
461 if force_insert and force_update:
462 raise ValueError("Cannot force both insert and updating in model saving.")
--> 463 self.save_base(using=using, force_insert=force_insert, force_update=force_update)
464
465 save.alters_data = True
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in save_base(self, raw, cls, origin, force_insert, force_update, using, *args, **kwargs)
331
332 is_new = self.id is None
--> 333 self._save_neo4j_node(using)
334 self._save_properties(self, self.__node, is_new)
335 self._save_neo4j_relationships(self, self.__node)
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in _save_neo4j_node(self, using)
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in trans_method(func, *args, **kw)
95 #TODO this is where generalized transaction support will go,
96 #when it's ready in neo4jrestclient
---> 97 ret = func(*args, **kw)
98 #tx.commit()
99 return ret
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in _save_neo4j_node(self, using)
359 self.__node = conn.gremlin_tx(script, types=type_hier_props,
360 indexName=self.index_name(),
--> 361 typesToIndex=type_names_to_index)
362 return self.__node
363
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/neo4jclient.pyc in gremlin_tx(self, script, **params)
177 will be wrapped in a transaction.
178 """
--> 179 return self.gremlin(script, tx=True, **params)
180
181 def cypher(self, query, **params):
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/neo4jclient.pyc in gremlin(self, script, tx, raw, **params)
166 try:
167 return send_script(include_unloaded_libraries(lib_script),
--> 168 params)
169 except LibraryCouldNotLoad:
170 if i == 0:
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/neo4jclient.pyc in send_script(s, params)
151 if raw:
152 execute_kwargs['returns'] = RETURNS_RAW
--> 153 script_rv = ext.execute_script(s, params=params, **execute_kwargs)
154 if isinstance(script_rv, basestring):
155 if LIBRARY_ERROR_REGEX.match(script_rv):
/Users/atomos/workspace/Project-Vitamin/src/neo4j-rest-client/neo4jrestclient/client.py in __call__(self, *args, **kwargs)
2313 except (ValueError, AttributeError, KeyError, TypeError):
2314 pass
-> 2315 raise StatusException(response.status_code, msg)
2316
2317 def __repr__(self):
StatusException: Code [400]: Bad Request. Bad request syntax or unsupported method.
Invalid data sent: javax.script.ScriptException: groovy.lang.MissingMethodException: No signature of method: groovy.lang.MissingMethodException.setMaxBufferSize() is applicable for argument types: () values: []
query:
In [9]: NeoProfile.objects.filter(profile_id=1234)
Out[9]: ---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/core/management/commands/shell.pyc in <module>()
----> 1 NeoProfile.objects.filter(profile_id=1234)
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
358 if callable(meth):
359 return meth(obj, self, cycle)
--> 360 return _default_pprint(obj, self, cycle)
361 finally:
362 self.end_group()
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
478 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
479 # A user-provided repr.
--> 480 p.text(repr(obj))
481 return
482 p.begin_group(1, '<')
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/db/models/query.pyc in __repr__(self)
70
71 def __repr__(self):
---> 72 data = list(self[:REPR_OUTPUT_SIZE + 1])
73 if len(data) > REPR_OUTPUT_SIZE:
74 data[-1] = "...(remaining elements truncated)..."
/Users/atomos/workspace/Project-Vitamin/lib/python2.7/site-packages/django/db/models/query.pyc in __len__(self)
85 self._result_cache = list(self.iterator())
86 elif self._iter:
---> 87 self._result_cache.extend(self._iter)
88 if self._prefetch_related_lookups and not self._prefetch_done:
89 self._prefetch_related_objects()
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/query.pyc in iterator(self)
1274 using = self.db
1275 if not self.query.can_filter():
-> 1276 for model in self.query.execute(using):
1277 yield model
1278 else:
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/query.pyc in execute(self, using)
1161 conn = connections[using]
1162
-> 1163 groovy, params = self.as_groovy(using)
1164
1165 raw_result_set = conn.gremlin_tx(groovy, **params) if groovy is not None else []
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/query.pyc in as_groovy(self, using)
925 # add the typeNodeId param, either for type verification or initial
926 # type tree traversal
--> 927 cypher_params['typeNodeId'] = self.model._type_node(using).id
928
929 type_restriction_expr = """
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in _type_node(cls, using)
411 return cls.__type_node_memoized(using)
412 else:
--> 413 return cls.__type_node_classmethod(using)
414
415 #classmethod
/Users/atomos/workspace/Project-Vitamin/src/neo4django/neo4django/db/models/base.pyc in __type_node(cls, using)
394 script_rv = conn.gremlin_tx(script, types=type_hier_props)
395 except Exception, e:
--> 396 raise RuntimeError(error_message, e)
397 if not hasattr(script_rv, 'properties'):
398 raise RuntimeError(error_message + '\n\n%s' % script_rv)
RuntimeError: ('The type node for class NeoProfile could not be created in the database.', StatusException())
My model is incredibly complex:
class NeoProfile(neomodels.NodeModel):
profile_id = neomodels.IntegerProperty(indexed=True)

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Problem with evaluation function in tensorflow federated - tensorflow-federated

Nuria: this should just have been fixed earlier today. If you do not want to wait for the next release (coming soon), I would recommend that you simply build a local pip package from source. You can find instructions in the install guide.

As a followup here: TFF 0.4.0 has just been released, which contains this bugfix.

Related

Error using BayesSearchCV from skopt on RandomForestClassifier

Training the BERT model with pytorch

load_from_checkpoint fails after transfer learning a LightningModule

FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan when using greater int values

neo4django with Neo4j 2.0

Categories

Resources