Dask distributed LocalCluster fails with "TypeError: can't pickle _thread._local objects" when using dask.array.store to hdf5 file

I'm running on one machine with 16 cores and 64GB RAM and want to use dask with LocalCluster, since need the profiling tool for optimization.
I set up the LocalCluster as explained here. Still it gives me the following error:
Traceback (most recent call last):
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/pickle.py", line 38, in dumps
result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: can't pickle _thread._local objects
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/myusername/remote_code/trials/minimal_reproducible_example.py", line 61, in <module>
File "/home/myusername/remote_code/trials/minimal_reproducible_example.py", line 55, in create_matrix
da.store(w, d_set, dtype="float32")
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/array/core.py", line 916, in store
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/base.py", line 175, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/base.py", line 446, in compute
results = schedule(dsk, keys, **kwargs)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 2499, in get
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 2426, in _graph_to_futures
"tasks": valmap(dumps_task, dsk3),
File "cytoolz/dicttoolz.pyx", line 179, in cytoolz.dicttoolz.valmap
File "cytoolz/dicttoolz.pyx", line 204, in cytoolz.dicttoolz.valmap
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/worker.py", line 3186, in dumps_task
return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/worker.py", line 3195, in warn_dumps
b = dumps(obj)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/pickle.py", line 51, in dumps
return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/cloudpickle.py", line 1108, in dumps
File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/cloudpickle.py", line 473, in dump
return Pickler.dump(self, obj)
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 437, in dump
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 786, in save_tuple
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 662, in save_reduce
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 524, in save
rv = reduce(self.proto)
TypeError: can't pickle _thread._local objects
I use the latest versions of all AFAIK needed versions:
python 3.7.3 with anaconda3 on ubuntu 18.04 LTS
dask: 2.3.0
distributed: 2.3.0
bokeh: 1.3.4
cytoolz: 0.10.0
h5py: 2.9.0
Here is the minimal reproducible example:
import os
import dask.array as da
import h5py
import numpy as np
from dask.distributed import Client
MY_USER_NAME = "myusername"
EARTH_RADIUS = 6372.795
N = 20000
def create_matrix():
lat_vec = np.random.random(N) * 90
lon_vec = np.random.random(N) * 180
lat_vec = np.radians(lat_vec)
lon_vec = np.radians(lon_vec)
sin_lat_vec = np.sin(lat_vec)
cos_lat_vec = np.cos(lat_vec)
def _blocked_calculate_great_circle_distance(block, block_info=None):
loc = block_info[0]['array-location']
(row_start, row_stop) = loc[0]
(col_start, col_stop) = loc[1]
# see https://en.wikipedia.org/wiki/Great-circle_distance
# and https://github.com/ulope/geopy/blob/master/geopy/distance.py
row_lon = lon_vec[row_start:row_stop]
col_lon = lon_vec[col_start:col_stop]
delta_lon = row_lon[:, np.newaxis] - col_lon
cos_delta_lon = np.cos(delta_lon)
central_angle = np.arccos(sin_lat_vec[row_start:row_stop, np.newaxis] * sin_lat_vec[col_start:col_stop] +
cos_lat_vec[row_start:row_stop, np.newaxis] * cos_lat_vec[col_start:col_stop]
* cos_delta_lon)
return EARTH_RADIUS * central_angle
dir_path = "/home/" + MY_USER_NAME + "/minimum_reproducible_example/"
if not os.path.exists(dir_path):
file_path = os.path.join(dir_path, "matrix.hdf5")
if os.path.exists(file_path):
with h5py.File(file_path) as f:
d_set = f.create_dataset('/data', shape=(N, N), dtype='f4', fillvalue=0)
w = da.from_array(d_set, chunks=(CHUNK_SIZE, CHUNK_SIZE))
w = w.map_blocks(_blocked_calculate_great_circle_distance, chunks=(CHUNK_SIZE, CHUNK_SIZE), dtype='f4')
da.store(w, d_set, dtype="float32")
if __name__ == '__main__':
client = Client(processes=False)
Can anybody help me with this?


StyleGen2 ADA PyTorch - Error message assert C in [1, 3] AssertionError

I just started to learn about StyleGen2 ADA PyTorch, I'm trying to create a model using customised dataset, I'm using colab at the moment but I'm facing this error couldn't know what is the reason
Setting up augmentation...
Distributing across 1 GPUs...
Setting up training phases...
Exporting sample images...
Traceback (most recent call last):
File "train.py", line 582, in <module>
main() # pylint: disable=no-value-for-parameter
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/decorators.py", line 21, in new_func
return f(get_current_context(), *args, **kwargs)
File "train.py", line 575, in main
subprocess_fn(rank=0, args=args, temp_dir=temp_dir)
File "train.py", line 422, in subprocess_fn
training_loop.training_loop(rank=rank, **args)
File "/content/drive/MyDrive/colab-sg2-ada-pytorch/stylegan2-ada-pytorch/training/training_loop.py", line 227, in training_loop
save_image_grid(images, os.path.join(run_dir, 'reals.jpg'), drange=[0,255], grid_size=grid_size)
File "/content/drive/MyDrive/colab-sg2-ada-pytorch/stylegan2-ada-pytorch/training/training_loop.py", line 80, in save_image_grid
assert C in [1, 3]
This is my training Code
#required: definitely edit these!
dataset_path = '/content/drive/MyDrive/images.zip'
aug_strength = 0.0
train_count = 0
mirror_x = True
mirror_y = False
#optional: you might not need to edit these
gamma_value = 50.0
augs = 'bg'
config = '11gb-gpu'
snapshot_count = 4
!python train.py --gpus=1 --cfg=$config --metrics=None --outdir=./results --data=$dataset_path --snap=$snapshot_count --augpipe=$augs --initstrength=$aug_strength --gamma=$gamma_value --mirror=$mirror_x --mirrory=False --nkimg=$train_count
Any Ideas?
I tried to train my dataset using StyleGen2 ADA PyTorch - but I faced an Assertion Error

Pytorch ResNet152 Model Not Predicting

I have a Pytorch resnet152 model, initialized with the following:
model = torchvision.models.resnet152()
for parameter in model.parameters():
parameter.requires_grad = False
model.fc = torch.nn.Linear(2048, 10)
And "resnet152_weights.pth" contains the weights of the model, which is the exact same as torchvision.models.ResNet152_Weights.IMAGENET1K_V2. I downloaded it because my IDE (Pycharm) could not find the URL.
When my model is trained, the code output = model(images) returns the following error:
Traceback (most recent call last):
File "deep_learning_model.py", line 184, in <module>
File "deep_learning_model.py", line 168, in main
model = train(model, 2)
File "deep_learning_model.py", line 141, in train
output = model(images)
File "torch\nn\modules\module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "torchvision\models\resnet.py", line 285, in forward
return self._forward_impl(x)
File "torchvision\models\resnet.py", line 268, in _forward_impl
x = self.conv1(x)
File "torch\nn\modules\module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "torch\nn\modules\conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "torch\nn\modules\conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: expected scalar type Byte but found Float
Can you please help me fix this bug (if you want me to send more code, please specify which block).

Neural Network Dense Layer Error in Shape attribute

I have created a feed forward neural network but but it is giving a Type Error despite changing the datatype of the parameter. I am really new to keras and Machine Learning so I would appreciate as detailed help as possible. I am attaching the code snippet and the error log below. CODE-
num_of_features = X_train.shape[1]
nb_classes = Y_train.shape[1]
def baseline_model():
def branch2(x):
x = Dense(np.floor(num_of_features*50), activation='sigmoid')(x)
x = Dropout(0.75)(x)
x = Dense(np.floor(num_of_features*20), activation='sigmoid')(x)
x = Dropout(0.5)(x)
x = Dense(np.floor(num_of_features), activation='sigmoid')(x)
x = Dropout(0.1)(x)
return x
main_input = Input(shape=(num_of_features,), name='main_input')
x = main_input
x = branch2(x)
main_output = Dense(nb_classes, activation='softmax')(x)
model = Model(input=main_input, output=main_output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'categorical_crossentropy'])
return model
model = baseline_model()
Traceback (most recent call last):
File "h2_fit_neural.py", line 143, in <module>
model = baseline_model()
File "h2_fit_neural.py", line 137, in baseline_model
x = branch2(x)
File "h2_fit_neural.py", line 124, in branch2
x = Dense(np.floor(num_of_features*50), activation='sigmoid')(x)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/engine/base_layer.py", line 432, in __call__
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/layers/core.py", line 872, in build
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/engine/base_layer.py", line 249, in add_weight
weight = K.variable(initializer(shape),
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/initializers.py", line 218, in __call__
dtype=dtype, seed=self.seed)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 4077, in random_uniform
dtype=dtype, seed=seed)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/random_ops.py", line 242, in random_uniform
rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_random_ops.py", line 674, in random_uniform
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 609, in _apply_op_helper
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'shape' has DataType float32 not in list of allowed values: int32, int64
Why are you using np.floor for the shape in your Dense layers? This will produce a float, you need an int there. Removing np.floor should solve your problem.

distributed.utils - ERROR - Existing exports of data: object cannot be re-sized

I am running a dask-scheduler on one node and my dask-worker is running on another node.. And I submit a task to the dask-scheduler from a third node.
it sometimes throws distributed.utils
ERROR - Existing exports of data: object cannot be re-sized
I am using python 2.7, tornado 4.5.2, tensorflow 1.3.0
INFO:tensorflow:Restoring parameters from /home/mapr/mano/slim_data/flowers/model/inception/inception_v3.ckpt
distributed.utils - ERROR - Existing exports of data: object cannot be re-sized
Traceback (most recent call last):
File "/usr/lib/python2.7/site-packages/distributed/utils.py", line 238, in f
result[0] = yield make_coro()
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/usr/lib64/python2.7/site-packages/tornado/concurrent.py", line 238, in result
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1063, in run
yielded = self.gen.throw(*exc_info)
File "/usr/lib/python2.7/site-packages/distributed/variable.py", line 179, in _get
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/usr/lib64/python2.7/site-packages/tornado/concurrent.py", line 238, in result
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1063, in run
yielded = self.gen.throw(*exc_info)
File "/usr/lib/python2.7/site-packages/distributed/core.py", line 464, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/usr/lib64/python2.7/site-packages/tornado/concurrent.py", line 238, in result
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1063, in run
yielded = self.gen.throw(*exc_info)
File "/usr/lib/python2.7/site-packages/distributed/core.py", line 348, in send_recv
yield comm.write(msg)
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1055, in run
value = future.result()
File "/usr/lib64/python2.7/site-packages/tornado/concurrent.py", line 238, in result
File "/usr/lib64/python2.7/site-packages/tornado/gen.py", line 1069, in run
yielded = self.gen.send(value)
File "/usr/lib/python2.7/site-packages/distributed/comm/tcp.py", line 218, in write
future = stream.write(frame)
File "/usr/lib64/python2.7/site-packages/tornado/iostream.py", line 406, in write
File "/usr/lib64/python2.7/site-packages/tornado/iostream.py", line 872, in _handle_write
del self._write_buffer[:self._write_buffer_pos]
BufferError: Existing exports of data: object cannot be re-sized
distributed.worker - WARNING - Compute Failed
Function: my_task
args: ({'upper': '1.4', 'trainable_scopes': 'InceptionV3/Logits,InceptionV3/AuxLogits', 'checkpoint_path': '/home/mapr/mano/slim_data/flowers/model/inception/inception_v3.ckpt', 'log_every_n_steps': '1', 'dataset_split_name': 'train', 'learning_rate': '0.01', 'train_dir': '/home/mapr/mano/slim_data/flowers/train_dir/train_outs_19', 'clone_on_cpu': 'True', 'batch_size': '32', 'resize_method': '3', 'hue_max_delta': '0.3', 'lower': '0.6', 'trace_every_n_steps': '1', 'script_name': 'train_image_classifier.py', 'checkpoint_exclude_scopes': 'InceptionV3/Logits,InceptionV3/AuxLogits', 'dataset_dir': '/home/mapr/mano/slim_data/flowers/slim_data_dir', 'max_number_of_steps': '4', 'model_name': 'inception_v3', 'dataset_name': 'flowers'})
kwargs: {}
Exception: BufferError('Existing exports of data: object cannot be re-sized',)
INFO:tensorflow:Starting Session.
INFO:tensorflow:Saving checkpoint to path /home/mapr/mano/slim_data/flowers/train_dir/train_outs_19/model.ckpt
INFO:tensorflow:Starting Queues.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:global step 1: loss = 2.6281 (19.799 sec/step)
INFO:tensorflow:Recording summary at step 1.
INFO:tensorflow:global step 2: loss = nan (7.406 sec/step)
INFO:tensorflow:global step 3: loss = nan (6.953 sec/step)
INFO:tensorflow:global step 4: loss = nan (6.840 sec/step)
INFO:tensorflow:Stopping Training.
INFO:tensorflow:Finished training! Saving model to disk.
I am pretty much sure that this one is related with dask.

Dimension mismatch error with scikit pipeline FeatureUnion

This is my first post. I've been trying to combine features with FeatureUnion and Pipeline, but when I add a tf-idf + svd piepline the test fails with a 'dimension mismatch' error. My simple task is to create a regression model to predict search relevance. Code and errors are reported below. Is there something wrong in my code?
df = read_tsv_data(input_file)
df = tokenize(df)
df_train, df_test = train_test_split(df, test_size = 0.2, random_state=2016)
x_train = df_train['sq'].values
y_train = df_train['relevance'].values
x_test = df_test['sq'].values
y_test = df_test['relevance'].values
# char ngrams
char_ngrams = CountVectorizer(ngram_range=(2,5), analyzer='char_wb', encoding='utf-8')
# TFIDF word ngrams
tfidf_word_ngrams = TfidfVectorizer(ngram_range=(1, 4), analyzer='word', encoding='utf-8')
svd = TruncatedSVD(n_components=100, random_state = 2016)
svr_lin = SVR(kernel='linear', C=0.01)
pipeline = Pipeline([
transformer_list = [
('char_ngrams', char_ngrams),
('char_ngrams_svd_pipeline', make_pipeline(char_ngrams, svd)),
('tfidf_word_ngrams', tfidf_word_ngrams),
('tfidf_word_ngrams_svd', make_pipeline(tfidf_word_ngrams, svd))
('svr_lin', svr_lin)
model = pipeline.fit(x_train, y_train)
y_pred = model.predict(x_test)
When adding the pipeline below to the FeatureUnion list:
('tfidf_word_ngrams_svd', make_pipeline(tfidf_word_ngrams, svd))
The exception below is generated:
2016-07-31 10:34:08,712 : Testing ... Test Shape: (400,) - Training Shape: (1600,)
Traceback (most recent call last):
File "src/model/end_to_end_pipeline.py", line 236, in <module>
File "src/model/end_to_end_pipeline.py", line 233, in main
process_data(input_file, output_file)
File "src/model/end_to_end_pipeline.py", line 175, in process_data
y_pred = model.predict(x_test)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/utils/metaestimators.py", line 37, in <lambda>
out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.py", line 203, in predict
Xt = transform.transform(Xt)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.py", line 523, in transform
for name, trans in self.transformer_list)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 800, in __call__
while self.dispatch_one_batch(iterator):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 658, in dispatch_one_batch
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 566, in _dispatch
job = ImmediateComputeBatch(batch)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 180, in __init__
self.results = batch()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 72, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.py", line 399, in _transform_one
return transformer.transform(X)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/utils/metaestimators.py", line 37, in <lambda>
out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.py", line 291, in transform
Xt = transform.transform(Xt)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/decomposition/truncated_svd.py", line 201, in transform
return safe_sparse_dot(X, self.components_.T)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/utils/extmath.py", line 179, in safe_sparse_dot
ret = a * b
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scipy/sparse/base.py", line 389, in __mul__
raise ValueError('dimension mismatch')
ValueError: dimension mismatch
What if you change second svd usage to new svd?
transformer_list = [
('char_ngrams', char_ngrams),
('char_ngrams_svd_pipeline', make_pipeline(char_ngrams, svd)),
('tfidf_word_ngrams', tfidf_word_ngrams),
('tfidf_word_ngrams_svd', make_pipeline(tfidf_word_ngrams, clone(svd)))
Seems your problem occurs because you're using same object 2 times. I is fitted first time on CountVectorizer, and second time on TfidfVectorizer (Or vice versa), and after you call predict of whole pipeline this svd object cannot understand output of CountVectorizer, because it was fitted on or TfidfVectorizer's output (Or again, vice versa).
