Dataflow: Override the Coder used by Shuffler - google-cloud-dataflow

We're using Dataflow to run a set of operations over Python objects. Because our objects aren't Pickle-able, we're using the DillCoder, which generally works well. We can either override the _fallback_coder of the coder registry, or supply the coder to the Reader or Writer - either works.
However, when our objects need to be shuffled, we get an error that our object can't be pickled from the PickleCoder.
I think that the Dataflow Worker Shuffler doesn't take into account the coder registry and instead always uses the PickleCoder. Is that right? Is there a way to override that?
Full stack trace below. Job-ID is 2018-01-25_15_21_50-3249437741466877997.
Thanks
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 582, in do_work
work_executor.execute()
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/executor.py", line 167, in execute
op.start()
File "dataflow_worker/shuffle_operations.py", line 49, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
def start(self):
File "dataflow_worker/shuffle_operations.py", line 50, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
with self.scoped_start_state:
File "dataflow_worker/shuffle_operations.py", line 65, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
with self.shuffle_source.reader() as reader:
File "dataflow_worker/shuffle_operations.py", line 69, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
self.output(windowed_value)
File "apache_beam/runners/worker/operations.py", line 154, in apache_beam.runners.worker.operations.Operation.output
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 86, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "dataflow_worker/shuffle_operations.py", line 233, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
self.output(wvalue.with_value((k, wvalue.value)))
File "apache_beam/runners/worker/operations.py", line 154, in apache_beam.runners.worker.operations.Operation.output
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 86, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 339, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 340, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 382, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 390, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 415, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise
File "apache_beam/runners/common.py", line 388, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 281, in apache_beam.runners.common.PerWindowInvoker.invoke_process
self._invoke_per_window(windowed_value)
File "apache_beam/runners/common.py", line 306, in apache_beam.runners.common.PerWindowInvoker._invoke_per_window
self.output_processor.process_outputs(
File "apache_beam/runners/common.py", line 480, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 86, in apache_beam.runners.worker.operations.ConsumerSet.receive
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 339, in apache_beam.runners.worker.operations.DoOperation.process
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 340, in apache_beam.runners.worker.operations.DoOperation.process
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 382, in apache_beam.runners.common.DoFnRunner.receive
self.process(windowed_value)
File "apache_beam/runners/common.py", line 390, in apache_beam.runners.common.DoFnRunner.process
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 431, in apache_beam.runners.common.DoFnRunner._reraise_augmented
raise new_exn, None, original_traceback
File "apache_beam/runners/common.py", line 388, in apache_beam.runners.common.DoFnRunner.process
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 189, in apache_beam.runners.common.SimpleInvoker.invoke_process
self.output_processor.process_outputs(
File "apache_beam/runners/common.py", line 480, in apache_beam.runners.common._OutputProcessor.process_outputs
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 84, in apache_beam.runners.worker.operations.ConsumerSet.receive
self.update_counters_start(windowed_value)
File "apache_beam/runners/worker/operations.py", line 90, in apache_beam.runners.worker.operations.ConsumerSet.update_counters_start
self.opcounter.update_from(windowed_value)
File "apache_beam/runners/worker/opcounters.py", line 63, in apache_beam.runners.worker.opcounters.OperationCounters.update_from
self.do_sample(windowed_value)
File "apache_beam/runners/worker/opcounters.py", line 81, in apache_beam.runners.worker.opcounters.OperationCounters.do_sample
self.coder_impl.get_estimated_size_and_observables(windowed_value))
File "apache_beam/coders/coder_impl.py", line 730, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables
def get_estimated_size_and_observables(self, value, nested=False):
File "apache_beam/coders/coder_impl.py", line 739, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables
self._value_coder.get_estimated_size_and_observables(
File "apache_beam/coders/coder_impl.py", line 260, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.get_estimated_size_and_observables
self.encode_to_stream(value, out, nested)
File "apache_beam/coders/coder_impl.py", line 298, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.encode_to_stream
self.fallback_coder_impl.encode_to_stream(value, stream, nested)
File "apache_beam/coders/coder_impl.py", line 154, in apache_beam.coders.coder_impl.CallbackCoderImpl.encode_to_stream
return stream.write(self._encoder(value), nested)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coders.py", line 497, in <lambda>
lambda x: dumps(x, HIGHEST_PROTOCOL), pickle.loads)
PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed [while running 'run reversion/FlatMap(run_reversion)']
EDIT:
Here's a reproducible example. Job-ID was 2018-01-30_11_53_22-14709945294406059840
import zlib
import argparse
from itertools import chain
import apache_beam as beam
import apache_beam.coders.coders as coders
from apache_beam.coders.coders import (DillCoder, coder_impl, maybe_dill_dumps,
maybe_dill_loads)
from apache_beam.coders.typecoders import CoderRegistry, FirstOf
from apache_beam.options.pipeline_options import PipelineOptions
class ObjectCoder(DillCoder):
"""
Coder that allows multi-line pickles to be read, and compresses the output
After an object is pickled, the bytes are encoded as `unicode_escape`,
meaning newline characters (`\n`) aren't in the string.
# but now we're doing compression, do we need to do the newlines?
Previously, the presence of newline characters these confues the Dataflow
reader, as it can't discriminate between a new object and a new line
within a pickle string
"""
def _create_impl(self):
return coder_impl.CallbackCoderImpl(
dill_compress_dumps, dill_compress_loads)
def dill_compress_dumps(stream):
# in Py3 this needs to be `unicode_escape`
return zlib.compress(maybe_dill_dumps(stream)).encode('string_escape')
def dill_compress_loads(stream):
# in Py3 this needs to be `unicode_escape`
decoded = stream.decode('string_escape')
try:
decompressed = zlib.decompress(decoded)
except zlib.error:
decompressed = stream
return maybe_dill_loads(decompressed)
def add_obj_to_coder_registry(coder_registry):
"""
Add Dill to the middle of the coder registry - after the deterministic
coders but before the Pickle coder
Use like: `add_obj_to_coder_registry(beam.coders.registry)`
"""
assert isinstance(coder_registry, CoderRegistry)
primitives_coder = coders.FastPrimitivesCoder(fallback_coder=ObjectCoder())
# https://github.com/apache/beam/blob/master/sdks/python/apacpycohe_beam/coders/typecoders.py#L93 # noqa
fallback_coders = [coders.ProtoCoder, primitives_coder]
coder_registry._fallback_coder = FirstOf(fallback_coders)
known_args, unknown_args = argparse.ArgumentParser().parse_known_args()
default_options = dict(
runner='DataflowRunner',
project='PROJECT_ID',
temp_location='gs://BUCKET_NAME/dataflow/temp/',
staging_location='gs://BUCKET_NAME/dataflow/staging/',
max_num_workers='100')
# from https://github.com/apache/incubator-airflow/blob/master/airflow/contrib/hooks/gcp_dataflow_hook.py # noqa
default_options_args = ['--{}={}'.format(attr, value)
for attr, value in default_options.items()]
default_args = chain(
default_options_args,
['--save_main_session'])
args = chain(unknown_args, default_args)
add_obj_to_coder_registry(beam.coders.typecoders.registry)
def produce_unpicklable():
def double(x):
return x * 2
return double
p = beam.Pipeline(options=PipelineOptions(list(args)))
items = p | beam.Create([x for x in range(10)])
unpickleable = items | beam.Map(lambda x: (x, produce_unpicklable()))
shuffle = unpickleable | 'use shuffler' >> beam.GroupByKey()
r = p.run()

Related

How to upgrading Docker Odoo CE to EE

I'm trying to upgrade my local Odoo CE 14.0 to Enterprise, i follow this video tutorial https://www.youtube.com/watch?v=-eCHJAq1QdY and official steps https://www.odoo.com/documentation/14.0/administration/maintain/enterprise.html
But when i trying to instal web_enterpsie module get this error:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/odoo/addons/base/models/ir_http.py", line 237, in _dispatch
result = request.dispatch()
File "/usr/lib/python3/dist-packages/odoo/http.py", line 683, in dispatch
result = self._call_function(**self.params)
File "/usr/lib/python3/dist-packages/odoo/http.py", line 359, in _call_function
return checked_call(self.db, *args, **kwargs)
File "/usr/lib/python3/dist-packages/odoo/service/model.py", line 94, in wrapper
return f(dbname, *args, **kwargs)
File "/usr/lib/python3/dist-packages/odoo/http.py", line 347, in checked_call
result = self.endpoint(*a, **kw)
File "/usr/lib/python3/dist-packages/odoo/http.py", line 912, in __call__
return self.method(*args, **kw)
File "/usr/lib/python3/dist-packages/odoo/http.py", line 531, in response_wrap
response = f(*args, **kw)
File "/usr/lib/python3/dist-packages/odoo/addons/web/controllers/main.py", line 1398, in call_button
action = self._call_kw(model, method, args, kwargs)
File "/usr/lib/python3/dist-packages/odoo/addons/web/controllers/main.py", line 1386, in _call_kw
return call_kw(request.env[model], method, args, kwargs)
File "/usr/lib/python3/dist-packages/odoo/api.py", line 399, in call_kw
result = _call_kw_multi(method, model, args, kwargs)
File "/usr/lib/python3/dist-packages/odoo/api.py", line 386, in _call_kw_multi
result = method(recs, *args, **kwargs)
File "<decorator-gen-71>", line 2, in button_immediate_install
File "/usr/lib/python3/dist-packages/odoo/addons/base/models/ir_module.py", line 74, in check_and_log
return method(self, *args, **kwargs)
File "/usr/lib/python3/dist-packages/odoo/addons/base/models/ir_module.py", line 475, in button_immediate_install
return self._button_immediate_function(type(self).button_install)
File "/usr/lib/python3/dist-packages/odoo/addons/base/models/ir_module.py", line 593, in _button_immediate_function
modules.registry.Registry.new(self._cr.dbname, update_module=True)
File "/usr/lib/python3/dist-packages/odoo/modules/registry.py", line 89, in new
odoo.modules.load_modules(registry._db, force_demo, status, update_module)
File "/usr/lib/python3/dist-packages/odoo/modules/loading.py", line 461, in load_modules
loaded_modules, update_module, models_to_check)
File "/usr/lib/python3/dist-packages/odoo/modules/loading.py", line 349, in load_marked_modules
perform_checks=perform_checks, models_to_check=models_to_check
File "/usr/lib/python3/dist-packages/odoo/modules/loading.py", line 198, in load_module_graph
registry.setup_models(cr)
File "/usr/lib/python3/dist-packages/odoo/modules/registry.py", line 276, in setup_models
model._setup_fields()
File "/usr/lib/python3/dist-packages/odoo/models.py", line 2845, in _setup_fields
field.setup_full(self)
File "/usr/lib/python3/dist-packages/odoo/fields.py", line 401, in setup_full
self._setup_related_full(model)
File "/usr/lib/python3/dist-packages/odoo/fields.py", line 458, in _setup_related_full
field = model.pool[model_name]._fields[name]
Exception
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/odoo/http.py", line 639, in _handle_exception
return super(JsonRequest, self)._handle_exception(exception)
File "/usr/lib/python3/dist-packages/odoo/http.py", line 315, in _handle_exception
raise exception.with_traceback(None) from new_cause
KeyError: 'avatar_128'
odoo.conf:
addons_path = /usr/lib/python3/dist-packages/odoo/addons,/mnt/extra-addons/enterprise,/mnt/extra-addons/custom
Anybody could help me please ?

AWS cdk running locally using sam

I have a CDK app which I generate template.yml file using:
cdk synth --no-staging > ./template.yml
and then run sam local:
sam local start-api
but I get the following error:
Traceback (most recent call last):
File "/usr/local/bin/sam", line 8, in <module>
sys.exit(cli())
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/decorators.py", line 73, in new_func
return ctx.invoke(f, obj, *args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/telemetry/metric.py", line 166, in wrapped
raise exception # pylint: disable=raising-bad-type
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/telemetry/metric.py", line 124, in wrapped
return_value = func(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/utils/version_checker.py", line 41, in wrapped
actual_result = func(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/cli/main.py", line 87, in wrapper
return func(*args, **kwargs)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/commands/local/start_api/cli.py", line 94, in cli
do_cli(
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/commands/local/start_api/cli.py", line 192, in do_cli
service = LocalApiService(lambda_invoke_context=invoke_context, port=port, host=host, static_dir=static_dir)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/commands/local/lib/local_api_service.py", line 37, in __init__
self.api_provider = ApiProvider(lambda_invoke_context.stacks, cwd=self.cwd)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/providers/api_provider.py", line 37, in __init__
self.api = self._extract_api()
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/providers/api_provider.py", line 64, in _extract_api
provider.extract_resources(self.stacks, collector, cwd=self.cwd)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/providers/cfn_api_provider.py", line 73, in extract_resources
self._extract_cfn_gateway_v2_route(stack.stack_path, resources, logical_id, resource, collector)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/providers/cfn_api_provider.py", line 315, in _extract_cfn_gateway_v2_route
method, path = self._parse_route_key(route_key)
File "/usr/local/Cellar/aws-sam-cli/1.37.0/libexec/lib/python3.8/site-packages/samcli/lib/providers/cfn_api_provider.py", line 488, in _parse_route_key
[method, path] = route_key.split()
ValueError: not enough values to unpack (expected 2, got 1)
and ideas what's the issue?

Running papermill I get a FileNotFound Error

I'm running papermill 2.1.0 in a newly created virtual environment in Mac OS
When I execute a simple notebook I get: FileNotFoundError, referring to and old file that no longer exists.
papermill 59848931.ipynb 59848931-out.ipynb
I get the following:
Input Notebook: 59848931.ipynb
Output Notebook: 59848931-out.ipynb
Executing: 0%| | 0/2 [00:00<?, ?cell/s]Failed to run command:
['/Users/user/Documents/Development/python/virtual_environments/udemy_tensorflow_venv/bin/python3', '-m', 'ipykernel_launcher', '-f', '/var/folders/p2/jh8vcbv51ks2gzvfx3dw1bd000_wjb/T/tmp3cf56dkh.json', '--HistoryManager.hist_file=:memory:']
PATH='/Users/user/Documents/notebooks/venv/bin:/Users/user/google-cloud-sdk/bin:/Users/user/Downloads/google-cloud-sdk/bin:/anaconda3/bin:/anaconda/bin:/Users/user/homebrew/bin:/Users/user/bin:/Library/Frameworks/R.framework/Versions/Current/Resources/:/usr/local/git/current/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:/usr/local/go/bin:/usr/local/share/dotnet:/opt/X11/bin:~/.dotnet/tools:/Applications/Wireshark.app/Contents/MacOS:/Users/user/Documents/Development'
with kwargs:
{'stdin': -1, 'stdout': None, 'stderr': None, 'cwd': None, 'start_new_session': True}
Executing: 0%| | 0/2 [00:01<?, ?cell/s]
Traceback (most recent call last):
File "/Users/user/Documents/notebooks/venv/bin/papermill", line 10, in <module>
sys.exit(papermill())
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/cli.py", line 235, in papermill
execution_timeout=execution_timeout,
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/execute.py", line 104, in execute_notebook
**engine_kwargs
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/engines.py", line 49, in execute_notebook_with_engine
return self.get_engine(engine_name).execute_notebook(nb, kernel_name, **kwargs)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/engines.py", line 343, in execute_notebook
cls.execute_managed_notebook(nb_man, kernel_name, log_output=log_output, **kwargs)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/engines.py", line 402, in execute_managed_notebook
return PapermillNotebookClient(nb_man, **final_kwargs).execute()
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/papermill/clientwrap.py", line 36, in execute
with self.setup_kernel(**kwargs):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/nbclient/client.py", line 404, in setup_kernel
self.start_new_kernel_client(**kwargs)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/nbclient/util.py", line 37, in wrapped
result = loop.run_until_complete(coro(self, *args, **kwargs))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 467, in run_until_complete
return future.result()
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/nbclient/client.py", line 375, in async_start_new_kernel_client
await ensure_async(self.km.start_kernel(extra_arguments=self.extra_arguments, **kwargs))
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/nbclient/util.py", line 57, in ensure_async
result = await obj
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/jupyter_client/manager.py", line 542, in start_kernel
self.kernel = await self._launch_kernel(kernel_cmd, **kw)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/jupyter_client/manager.py", line 523, in _launch_kernel
res = launch_kernel(kernel_cmd, **kw)
File "/Users/user/Documents/notebooks/venv/lib/python3.6/site-packages/jupyter_client/launcher.py", line 135, in launch_kernel
proc = Popen(cmd, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/subprocess.py", line 1344, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: '/Users/user/Documents/Development/python/virtual_environments/udemy_tensorflow_venv/bin/python3': '/Users/user/Documents/Development/python/virtual_environments/udemy_tensorflow_venv/bin/python3'
I reinstall Python3, papermill and virtualenv and same issue
I reinstalled my IPython kernel
python3 -m pip install ipykernel
python3 -m ipykernel install --user

Python error when shapefile reading PySal

I'm currently working with PySal library, I'm using queen_from_shapefile() fonction, and Python return an error for some shp and work perfectly for the others. All shapefiles has been created in the same way.They are area shapefiles.
There is the error:
Traceback (most recent call last):
File "<pyshell#7>", line 1, in <module>
graph(1850,117)
File "C:\Users\jbeverag\Desktop\graph_queen_fonction.py", line 37, in graph
qW = ps.queen_from_shapefile(str(planche)+".shp")
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\user.py", line 67, in queen_from_shapefile
w = Queen.from_shapefile(shapefile, idVariable=idVariable)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\Contiguity.py", line 255, in from_shapefile
w = cls(iterable, ids=ids, id_order=id_order, **kwargs)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\Contiguity.py", line 199, in __init__
criterion=criterion, method=method)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\Contiguity.py", line 383, in _build
neighbor_data = ContiguityWeightsPolygons(polygons, wttype=wttype).w
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\_contW_binning.py", line 68, in __init__
self.do_weights()
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\weights\_contW_binning.py", line 98, in do_weights
shpObj = self.collection[i]
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\FileIO.py", line 162, in __getitem__
return self.by_row.__getitem__(key)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\FileIO.py", line 145, in __getitem__
return self.p.get(key)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\FileIO.py", line 269, in get
obj = self.__read()
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\FileIO.py", line 312, in __read
row = self._read()
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\IOHandlers\pyShpIO.py", line 142, in _read
rec = self.dataObj.get_shape(self.pos)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\util\shapefile.py", line 362, in get_shape
return self.shape.unpack(bufferIO(self.fileObj.read(byts)))
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\util\shapefile.py", line 633, in unpack
record = _unpackDict(cls.USTRUCT, dat)
File "C:\Users\jbeverag\AppData\Local\Programs\Python\Python36\lib\site-packages\pysal\core\util\shapefile.py", line 136, in _unpackDict
fileObj.read(struct['size']))
struct.error: unpack requires a buffer of 44 bytes
Thanks for your help,
Lacafed
Rebuild shp file fixed the problem, but I don't know what was problem origin's

Load dask dataframe from CSV lazily (inside delayed)

While using dask.distributed I'm trying to load dask dataframe from CSV on S3 inside delayed function like this:
#delayed
def func1():
...
return df.read_csv(*s3_url*, ...)
read_csv() does not need interaction with distributed client, so I assumed it's possible. Then on the client machine I compute the delayed object returned by func1.
res = func1()
future = client.compute(res)
progress(future)
frame = client.gather(future)
Until that point it looks good, printing the result
Dask DataFrame Structure:
COL1 COL2
npartitions=9
object object
... ...
... ... ...
... ...
... ...
Dask Name: from-delayed, 27 tasks
However, it fails with Failed to serialize (<dask.bytes.core.OpenFile object at ...>, ..., ..., '\n'). Exception: can't pickle thread.lock objects when I try to further process it, e.g.
client.compute(frame)
Is there a way to get this scheme to work or I'm missing some more fundamental limitation here?
PS. error log that I'm getting:
.pickle - Failed to serialize (<dask.bytes.core.OpenFile object at ...>, 20971520, 10485760, '\n'). Exception: can't pickle thread.lock objects
ERROR:2017-11-10 15:31:31:root:Exception while executing graph: can't pickle thread.lock objects
Traceback (most recent call last):
...
client.compute(res.data)
File ".../python2.7/site-packages/distributed/client.py", line 2089, in compute
resources=resources)
File ".../python2.7/site-packages/distributed/client.py", line 1906, in _graph_to_futures
'tasks': valmap(dumps_task, dsk3),
File ".../python2.7/site-packages/toolz-0.8.2-py2.7.egg/toolz/dicttoolz.py", line 84, in valmap
rv.update(zip(iterkeys(d), map(func, itervalues(d))))
File ".../python2.7/site-packages/distributed/worker.py", line 731, in dumps_task
'args': pickle.dumps(task[1:])}
File ".../python2.7/site-packages/distributed/protocol/pickle.py", line 51, in dumps
return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 829, in dumps
cp.dump(obj)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 233, in dump
return Pickler.dump(self, obj)
File "...python2.7/pickle.py", line 224, in dump
self.save(obj)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 568, in save_tuple
save(element)
File "...python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 727, in save_reduce
save(state)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "...python2.7/pickle.py", line 687, in _batch_setitems
save(v)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 564, in save_instancemethod
obj=obj)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 709, in save_reduce
save(args)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 554, in save_tuple
save(element)
File "...python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 727, in save_reduce
save(state)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "...python2.7/pickle.py", line 687, in _batch_setitems
save(v)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "...python2.7/pickle.py", line 692, in _batch_setitems
save(v)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 554, in save_tuple
save(element)
File "...python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 727, in save_reduce
save(state)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "...python2.7/pickle.py", line 687, in _batch_setitems
save(v)
File "...python2.7/pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File ".../python2.7/site-packages/cloudpickle/cloudpickle.py", line 727, in save_reduce
save(state)
File "...python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "...python2.7/pickle.py", line 655, in save_dict
self._batch_setitems(obj.iteritems())
File "...python2.7/pickle.py", line 687, in _batch_setitems
save(v)
File "...python2.7/pickle.py", line 306, in save
rv = reduce(self.proto)
TypeError: can't pickle thread.lock objects

Resources