Add random numbers to dask dataframe using data from the dataframe to set limits - dask

I would like to add random numbers to a dask dataframe that uses a column intensity of the original dataframe to set the limits of the random numbers for each row. The code works with pandas and numpy.random, but not with dask and dask.array.
import dask.array as da
import dask.dataframe as dd
from dask.distributed import Client
client = Client()
fns = [list-of-filenames]
df = dd.read_parquet(fns)
# dataframe has a column called intensity of type float
# and no missing values
df['separation_dimension_1'] = da.random.uniform(size=N, low=-noise_level/df.intensity, high=noise_level/df.intensity)
The error is:
ValueError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (0,) and arg 1 with shape (33276691,).
Seems the syntax of numpy.random.uniform is a bit different than dask_array.random.uniform?
Full traceback
Cell In[21], line 7
5 df['mz_'] = df.mz * 1000000000
6 df['rt_'] = df.scan_time*10
----> 7 df['separation_dimension_1'] = da.random.uniform(size=N, low=-noise_level/df.intensity, high=noise_level/df.intensity)
8 #df['separation_dimension_2'] = da.random.uniform(size=N, low=-noise_level/df.intensity, high=noise_level/df.intensity)
9 #df['separation_dimension_3'] = da.random.uniform(size=N, low=-noise_level/df.intensity, high=noise_level/df.intensity)
11 df = df[df.intensity > 1e5][['rt_', 'mz_', 'logint']]
File ~/miniconda3/envs/dask/lib/python3.9/site-packages/dask/array/random.py:465, in _make_api.<locals>.wrapper(*args, **kwargs)
462 if backend not in _cached_random_states:
463 # Cache the default RandomState object for this backend
464 _cached_random_states[backend] = RandomState()
--> 465 return getattr(
466 _cached_random_states[backend],
467 attr,
468 )(*args, **kwargs)
File ~/miniconda3/envs/dask/lib/python3.9/site-packages/dask/array/random.py:423, in RandomState.uniform(self, low, high, size, chunks, **kwargs)
421 #derived_from(np.random.RandomState, skipblocks=1)
422 def uniform(self, low=0.0, high=1.0, size=None, chunks="auto", **kwargs):
--> 423 return self._wrap("uniform", low, high, size=size, chunks=chunks, **kwargs)
File ~/miniconda3/envs/dask/lib/python3.9/site-packages/dask/array/random.py:170, in RandomState._wrap(self, funcname, size, chunks, extra_chunks, *args, **kwargs)
165 kwrg[k] = (getitem, lookup[k], slc)
166 vals.append(
167 (_apply_random, self._RandomState, funcname, seed, size, arg, kwrg)
168 )
--> 170 meta = _apply_random(
171 self._RandomState,
172 funcname,
173 seed,
174 (0,) * len(size),
175 small_args,
176 small_kwargs,
177 )
179 dsk.update(dict(zip(keys, vals)))
181 graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)
File ~/miniconda3/envs/dask/lib/python3.9/site-packages/dask/array/random.py:453, in _apply_random(RandomState, funcname, state_data, size, args, kwargs)
451 state = RandomState(state_data)
452 func = getattr(state, funcname)
--> 453 return func(*args, size=size, **kwargs)
File mtrand.pyx:1134, in numpy.random.mtrand.RandomState.uniform()
File _common.pyx:600, in numpy.random._common.cont()
File _common.pyx:517, in numpy.random._common.cont_broadcast_2()
File __init__.pxd:741, in numpy.PyArray_MultiIterNew3()
ValueError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (0,) and arg 1 with shape (6249365,).

As is often the case, you will be able to do this using map_partitions, which applies the operation you are after on each component real pandas dataframe
def op(df):
df['separation_dimension_1'] = np.random.uniform(size=N, low=-noise_level/df.intensity, high=noise_level/df.intensity)
return df
df2 = df.map_partitions(op)

Related

Dtype issue using transformer function from sklearn

I've been receiving the following error after running the following line:
transformer = preprocessing.FunctionTransformer(func=np.log1p, inverse_func=np.expm1)
scaler = preprocessing.StandardScaler()
X1_t = transformer.fit_transform(X_t)
Error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In [103], line 3
1 transformer = preprocessing.FunctionTransformer(func=np.log1p, inverse_func=np.expm1)
2 scaler = preprocessing.StandardScaler()
----> 3 X1_t = transformer.fit_transform(X_t)
4 X2_t = scaler.fit_transform(X1_t)
5 print(X2_t.shape)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/base.py:867, in TransformerMixin.fit_transform(self, X, y, **fit_params)
863 # non-optimized default implementation; override when a better
864 # method is possible for a given clustering algorithm
865 if y is None:
866 # fit method of arity 1 (unsupervised transformation)
--> 867 return self.fit(X, **fit_params).transform(X)
868 else:
869 # fit method of arity 2 (supervised transformation)
870 return self.fit(X, y, **fit_params).transform(X)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/preprocessing/_function_transformer.py:195, in FunctionTransformer.fit(self, X, y)
193 X = self._check_input(X, reset=True)
194 if self.check_inverse and not (self.func is None or self.inverse_func is None):
--> 195 self._check_inverse_transform(X)
196 return self
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/preprocessing/_function_transformer.py:160, in FunctionTransformer._check_inverse_transform(self, X)
157 idx_selected = slice(None, None, max(1, X.shape[0] // 100))
158 X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))
--> 160 if not np.issubdtype(X.dtype, np.number):
161 raise ValueError(
162 "'check_inverse' is only supported when all the elements in `X` is"
163 " numerical."
164 )
166 if not _allclose_dense_sparse(X[idx_selected], X_round_trip):
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/generic.py:5575, in NDFrame.__getattr__(self, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'dtype'
I was able to run this code before, but had to reinstall Jupyter notebook and when reinstalling and downloading all libraries, started getting this issue. My hypothesis is that it is related to combinations of versions of Jupyter + libraries (pandas, sklearn), but don't remember the versions I previously had.
Any idea?

Error using BayesSearchCV from skopt on RandomForestClassifier

this is the code to reproduce the error:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from scipy.stats import loguniform
from skopt import BayesSearchCV
from sklearn.datasets import load_iris
import numpy as np
X, y = load_iris(return_X_y=True)
grid = {
'LogisticRegression' : {
'C': loguniform.rvs(0.1, 10000, size = 50),
'solver': ['lbfgs','saga'],
'penalty': ['l2'],
'warm_start': [False, True],
'class_weight' : [None, 'balanced'],
'max_iter': [100, 1000],
'n_jobs': [ 10 ]
},
'RandomForestClassifier' : {
'n_estimators': np.random.randint(5, 200, size=10),
'criterion' : [ 'gini', 'entropy' ],
'max_depth' : np.random.randint(5, 50, size=10),
'min_samples_split': np.random.randint(5, 50, size=10),
'min_samples_leaf': np.random.randint(5, 50, size=10),
'max_features' : loguniform.rvs(0.2, 1.0, size=5),
'n_jobs' : [ 10 ]
}
}
tuner_params = {
'cv': 2,
'n_jobs': 10,
'scoring': 'roc_auc_ovr',
'return_train_score': True,
'refit': True,
'n_iter':3
}
clf = 'LogisticRegression'
search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
search_cv.fit(X,y)
clf = 'RandomForestClassifier'
search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
search_cv.fit(X,y)
Using BayesSearchCV on LogisticRegression as classifier gives no error, while using RandomForestClassifier it gives the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Input In [8], in <cell line: 2>()
1 search_cv = BayesSearchCV( estimator = eval(clf)(), search_spaces = grid[clf], **tuner_params)
----> 2 search_cv.fit(X,y)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:466, in BayesSearchCV.fit(self, X, y, groups, callback, **fit_params)
463 else:
464 self.optimizer_kwargs_ = dict(self.optimizer_kwargs)
--> 466 super().fit(X=X, y=y, groups=groups, **fit_params)
468 # BaseSearchCV never ranked train scores,
469 # but apparently we used to ship this (back-compat)
470 if self.return_train_score:
File ~/.conda/envs/meth/lib/python3.9/site-packages/sklearn/model_selection/_search.py:875, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
869 results = self._format_results(
870 all_candidate_params, n_splits, all_out, all_more_results
871 )
873 return results
--> 875 self._run_search(evaluate_candidates)
877 # multimetric is determined here because in the case of a callable
878 # self.scoring the return type is only known after calling
879 first_test_score = all_out[0]["test_scores"]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:512, in BayesSearchCV._run_search(self, evaluate_candidates)
508 while n_iter > 0:
509 # when n_iter < n_points points left for evaluation
510 n_points_adjusted = min(n_iter, n_points)
--> 512 optim_result = self._step(
513 search_space, optimizer,
514 evaluate_candidates, n_points=n_points_adjusted
515 )
516 n_iter -= n_points
518 if eval_callbacks(callbacks, optim_result):
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/searchcv.py:400, in BayesSearchCV._step(self, search_space, optimizer, evaluate_candidates, n_points)
397 """Generate n_jobs parameters and evaluate them in parallel.
398 """
399 # get parameter values to evaluate
--> 400 params = optimizer.ask(n_points=n_points)
402 # convert parameters to python native types
403 params = [[np.array(v).item() for v in p] for p in params]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:395, in Optimizer.ask(self, n_points, strategy)
393 X = []
394 for i in range(n_points):
--> 395 x = opt.ask()
396 X.append(x)
398 ti_available = "ps" in self.acq_func and len(opt.yi) > 0
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:367, in Optimizer.ask(self, n_points, strategy)
336 """Query point or multiple points at which objective should be evaluated.
337
338 n_points : int or None, default: None
(...)
364
365 """
366 if n_points is None:
--> 367 return self._ask()
369 supported_strategies = ["cl_min", "cl_mean", "cl_max"]
371 if not (isinstance(n_points, int) and n_points > 0):
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/optimizer/optimizer.py:434, in Optimizer._ask(self)
430 if self._n_initial_points > 0 or self.base_estimator_ is None:
431 # this will not make a copy of `self.rng` and hence keep advancing
432 # our random state.
433 if self._initial_samples is None:
--> 434 return self.space.rvs(random_state=self.rng)[0]
435 else:
436 # The samples are evaluated starting form initial_samples[0]
437 return self._initial_samples[
438 len(self._initial_samples) - self._n_initial_points]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:900, in Space.rvs(self, n_samples, random_state)
897 columns = []
899 for dim in self.dimensions:
--> 900 columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
902 # Transpose
903 return _transpose_list_array(columns)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:698, in Categorical.rvs(self, n_samples, random_state)
696 return self.inverse_transform([(choices)])
697 elif self.transform_ == "normalize":
--> 698 return self.inverse_transform(list(choices))
699 else:
700 return [self.categories[c] for c in choices]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:685, in Categorical.inverse_transform(self, Xt)
680 """Inverse transform samples from the warped space back into the
681 original space.
682 """
683 # The concatenation of all transformed dimensions makes Xt to be
684 # of type float, hence the required cast back to int.
--> 685 inv_transform = super(Categorical, self).inverse_transform(Xt)
686 if isinstance(inv_transform, list):
687 inv_transform = np.array(inv_transform)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/space.py:168, in Dimension.inverse_transform(self, Xt)
164 def inverse_transform(self, Xt):
165 """Inverse transform samples from the warped space back into the
166 original space.
167 """
--> 168 return self.transformer.inverse_transform(Xt)
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:309, in Pipeline.inverse_transform(self, X)
307 def inverse_transform(self, X):
308 for transformer in self.transformers[::-1]:
--> 309 X = transformer.inverse_transform(X)
310 return X
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:216, in LabelEncoder.inverse_transform(self, Xt)
214 else:
215 Xt = np.asarray(Xt)
--> 216 return [
217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
File ~/.conda/envs/meth/lib/python3.9/site-packages/skopt/space/transformers.py:217, in <listcomp>(.0)
214 else:
215 Xt = np.asarray(Xt)
216 return [
--> 217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
KeyError: 9
My versions:
python: 3.9.12
sklearn: 1.1.1
skopt: 0.9.0
The same error happen when using XGBClassifier or GradientBoostingClassifier, while there is no error using SVC or KNeighborsClassifier.
I believe that's related to how skopt encodes the hyperparameter space: it seems having identical points generated by your random lists are required to trigger the error, though sometimes it fits regardless. Either there are collisions or it makes the grid to be processed erroneously.
At least the issue stopped reproducing for me after changing all random lists to list(range(...)).
Might be worth a bug report.

Cleaning Data in CSV file for ML Model

I'm trying to clean my data in jupyterlab by watching several tutorials, but I keep getting one or the other error every time. So I thought I'd come on stack overflow and ask if someone can help me.
This is the csv file I want to clean: https://1drv.ms/u/s!AvOXB8kb-IHBgjaveis044GVoPpk
I'm building a machine learning model so I want to convert all the object values, but I don't know how to.
EDIT: I tried cleaning the data from scratch.
My code input:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
criminal_data = pd.read_csv('database2.csv')
X = criminal_data.drop(columns=['Agency Type', 'City', 'State',
'Crime Solved'])
y = criminal_data['City']
model = DecisionTreeClassifier()
model.fit(X, y)
criminal_data
The error message:
ValueError Traceback (most recent call
last)
<ipython-input-117-4b6968f9994f> in <module>
6 y = criminal_data['City']
7 model = DecisionTreeClassifier()
----> 8 model.fit(X, y)
9 criminal_data
~\anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
896 """
897
--> 898 super().fit(
899 X, y,
900 sample_weight=sample_weight,
~\anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
154 check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
155 check_y_params = dict(ensure_2d=False, dtype=None)
--> 156 X, y = self._validate_data(X, y,
157 validate_separately=(check_X_params,
158 check_y_params))
~\anaconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
428 # :(
429 check_X_params, check_y_params =
validate_separately
--> 430 X = check_array(X, **check_X_params)
431 y = check_array(y, **check_y_params)
432 else:
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in
check_array(array, accept_sparse, accept_large_sparse, dtype, order,
copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
614 array = array.astype(dtype, casting="unsafe",
copy=False)
615 else:
--> 616 array = np.asarray(array, order=order, dtype=dtype)
617 except ComplexWarning as complex_warning:
618 raise ValueError("Complex data not supported\n"
~\anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order, like)
100 return _asarray_with_like(a, dtype=dtype, order=order,
like=like)
101
--> 102 return array(a, dtype, copy=False, order=order)
103
104
~\anaconda3\lib\site-packages\pandas\core\generic.py in __array__(self, dtype)
1897
1898 def __array__(self, dtype=None) -> np.ndarray:
-> 1899 return np.asarray(self._values, dtype=dtype)
1900
1901 def __array_wrap__(
~\anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype,
order, like)
100 return _asarray_with_like(a, dtype=dtype, order=order,
like=like)
101
--> 102 return array(a, dtype, copy=False, order=order)
103
104
ValueError: could not convert string to float: 'Anchorage'
​
You are trying to train your model with some data that is not numerical. Before using the model, you need to do encoding. You can try LabelEncoder for that.
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for column_name in X.columns:
if X[column_name].dtype == object:
X[column_name] = le.fit_transform(X[column_name])
else:
pass
If you have a combination of different data types in a row. Try below:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for column_name in X.columns:
X[column_name] = X[column_name].replace(np.nan, 'none', regex=True)
X[column_name] = le.fit_transform(X[column_name].astype(str))

DeepChem GraphConvodel (GNN) training TypeError

I am a beginner to GNNs and I was trying out a code for predicting drug toxicity using DeepChem's Tox21 dataset. It is a dataset with a training set of 12 thousand compounds and test set of 650 compounds. I need in help in debugging and rectifying this error:"TypeError: 'NoneType' object is not subscriptable", which I get at the end.
Here is the code snippet:
model = GraphConvModel(len(tox21_tasks),
batch_size=32,
mode='classification')
print("Fitting the model")
model.fit(train_dataset, nb_epoch=10)
And here is my error:
TypeError Traceback (most recent call last)
<ipython-input-5-8088249b7fd6> in <module>
4 mode='classification')
5 print("Fitting the model")
----> 6 model.fit(train_dataset, nb_epoch=10)
~\anaconda3\lib\site-packages\deepchem\models\keras_model.py in fit(self, dataset, nb_epoch, max_checkpoints_to_keep, checkpoint_interval, deterministic, restore, variables, loss, callbacks, all_losses)
322 dataset, epochs=nb_epoch,
323 deterministic=deterministic), max_checkpoints_to_keep,
--> 324 checkpoint_interval, restore, variables, loss, callbacks, all_losses)
325
326 def fit_generator(self,
~\anaconda3\lib\site-packages\deepchem\models\keras_model.py in fit_generator(self, generator, max_checkpoints_to_keep, checkpoint_interval, restore, variables, loss, callbacks, all_losses)
407 inputs = inputs[0]
408
--> 409 batch_loss = apply_gradient_for_batch(inputs, labels, weights, loss)
410 current_step = self._global_step.numpy()
411
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1846 if self.input_signature:
1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1849 return graph_function
1850
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
2148 graph_function = self._function_cache.primary.get(cache_key, None)
2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs)
2151 self._function_cache.primary[cache_key] = graph_function
2152 return graph_function, args, kwargs
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2039 arg_names=arg_names,
2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value),
2042 self._function_attributes,
2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
~\anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
~\anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in wrapper(*args, **kwargs)
903 except Exception as e: # pylint:disable=broad-except
904 if hasattr(e, "ag_error_metadata"):
--> 905 raise e.ag_error_metadata.to_exception(e)
906 else:
907 raise
TypeError: in converted code:
relative to C:\Users\Madiha\anaconda3\lib\site-packages:
deepchem\models\keras_model.py:474 apply_gradient_for_batch *
grads = tape.gradient(batch_loss, vars)
tensorflow_core\python\eager\backprop.py:1014 gradient
unconnected_gradients=unconnected_gradients)
tensorflow_core\python\eager\imperative_grad.py:76 imperative_grad
compat.as_str(unconnected_gradients.value))
tensorflow_core\python\eager\backprop.py:138 _gradient_function
return grad_fn(mock_op, *out_grads)
tensorflow_core\python\ops\math_grad.py:455 _UnsortedSegmentMaxGrad
return _UnsortedSegmentMinOrMaxGrad(op, grad)
tensorflow_core\python\ops\math_grad.py:432 _UnsortedSegmentMinOrMaxGrad
_GatherDropNegatives(op.outputs[0], op.inputs[1])
TypeError: 'NoneType' object is not subscriptable
As an advise, check some examples on the DeepChem website. Here is a code which will work:
tasks, datasets, transformers = dc.molnet.load_tox21(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = datasets
model = dc.models.GraphConvModel(len(tasks),
batch_size=32,
mode='classification')
print("Fitting the model")
model.fit(train_dataset)
Hope is work for you!

TypeError: add(): argument 'other' (position 1) must be Tensor, not numpy.ndarray

I am testing a ResNet-34 trained_model using Pytorch and fastai on a linux system with the latest anaconda3. To run it as a batch job, I commented out the gui related lines. It started to run for a few hrs, then stopped in the Validation step, the error message is as below.
...
^M100%|█████████▉| 452/453 [1:07:07<00:08, 8.75s/it,
loss=1.23]^[[A^[[A^[[A
^MValidation: 0%| | 0/40 [00:00<?, ?it/s]^[[A^[[A^[[ATraceback
(most recent call last):
File "./resnet34_pretrained_PNG_nogui_2.py", line 279, in <module>
learner.fit(lr,1,callbacks=[f1_callback])
File "/project/6000192/jemmyhu/resnet_png/fastai/learner.py", line 302,
in fit
return self.fit_gen(self.model, self.data, layer_opt, n_cycle,
**kwargs)
File "/project/6000192/jemmyhu/resnet_png/fastai/learner.py", line 249,
in fit_gen
swa_eval_freq=swa_eval_freq, **kwargs)
File "/project/6000192/jemmyhu/resnet_png/fastai/model.py", line 162, in
fit
vals = validate(model_stepper, cur_data.val_dl, metrics, epoch,
seq_first=seq_first, validate_skip = validate_skip)
File "/project/6000192/jemmyhu/resnet_png/fastai/model.py", line 241, in
validate
res.append([to_np(f(datafy(preds), datafy(y))) for f in metrics])
File "/project/6000192/jemmyhu/resnet_png/fastai/model.py", line 241, in
<listcomp>
res.append([to_np(f(datafy(preds), datafy(y))) for f in metrics])
File "./resnet34_pretrained_PNG_nogui_2.py", line 237, in __call__
self.TP += (preds*targs).float().sum(dim=0)
TypeError: add(): argument 'other' (position 1) must be Tensor, not
numpy.ndarray
The link for the original code is
https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb
lines 279 and 237 in my copy are shown below:
226 class F1:
227 __name__ = 'F1 macro'
228 def __init__(self,n=28):
229 self.n = n
230 self.TP = np.zeros(self.n)
231 self.FP = np.zeros(self.n)
232 self.FN = np.zeros(self.n)
233
234 def __call__(self,preds,targs,th=0.0):
235 preds = (preds > th).int()
236 targs = targs.int()
237 self.TP += (preds*targs).float().sum(dim=0)
238 self.FP += (preds > targs).float().sum(dim=0)
239 self.FN += (preds < targs).float().sum(dim=0)
240 score = (2.0*self.TP/(2.0*self.TP + self.FP + self.FN + 1e-6)).mean()
241 return score
276 lr = 0.5e-2
277 with warnings.catch_warnings():
278 warnings.simplefilter("ignore")
279 learner.fit(lr,1,callbacks=[f1_callback])
Could anyone have a clue for the issue?
Many thanks,
Jemmy
Ok, the error seems be for the latest pytorch-1.0.0, when degrade pytorch to pytorch-0.4.1, the code seems work (passed the error lines at this point). Still have no idea to make the code work with pytorch-1.0.0
I have had the same issue with this Kaggle kernel. My workarounds are the following:
1st option: In the F1 __call__ method convert preds and targs from pytorch tensors to numpy arrays;
2nd option: Initialise TP/FP/FN with pytorch tensors instead of numpy arrays, i.e. replace np.zeros(self.n) with torch.zeros(1, self.n).
Basically, the main idea - all variables should be of the same type.
Check your input data parameter.
Make sure it is in 123123 not in [123123]

Resources