Recently, I have been trying to parallelize some Sage (Sage 9.4 on a MacBook Pro running OSX 11.2.3) code using Dask. The problem I run into is that while I can run Dask inside Sage, it will break whenever I include any code that isn't "pure python." In particular, it keeps throwing an ImportError. Here is a basic example of what I am running into
import time
from dask import delayed
from dask.distributed import Client
from time import sleep
client = Client(n_workers=4)
def Hello():
1+1 #this line breaks things by adding a sage operation
#if I remove it the code runs fine
return 'Hello World'
z = delayed(Hello)()
z.compute()
This code throws the following error
Traceback
ImportError Traceback (most recent call last)
<timed eval> in <module>
~/.sage/local/lib/python3.9/site-packages/dask/base.py in compute(self, **kwargs)
284 dask.base.compute
285 """
--> 286 (result,) = compute(self, traverse=False, **kwargs)
287 return result
288
~/.sage/local/lib/python3.9/site-packages/dask/base.py in compute(*args, **kwargs)
566 postcomputes.append(x.__dask_postcompute__())
567
--> 568 results = schedule(dsk, keys, **kwargs)
569 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
570
~/.sage/local/lib/python3.9/site-packages/distributed/client.py in get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2669 should_rejoin = False
2670 try:
-> 2671 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2672 finally:
2673 for f in futures.values():
~/.sage/local/lib/python3.9/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1946 else:
1947 local_worker = None
-> 1948 return self.sync(
1949 self._gather,
1950 futures,
~/.sage/local/lib/python3.9/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
843 return future
844 else:
--> 845 return sync(
846 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
847 )
~/.sage/local/lib/python3.9/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
324 if error[0]:
325 typ, exc, tb = error[0]
--> 326 raise exc.with_traceback(tb)
327 else:
328 return result[0]
~/.sage/local/lib/python3.9/site-packages/distributed/utils.py in f()
307 if callback_timeout is not None:
308 future = asyncio.wait_for(future, callback_timeout)
--> 309 result[0] = yield future
310 except Exception:
311 error[0] = sys.exc_info()
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/tornado/gen.py in run(self)
733
734 try:
--> 735 value = future.result()
736 except Exception:
737 exc_info = sys.exc_info()
~/.sage/local/lib/python3.9/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1811 exc = CancelledError(key)
1812 else:
-> 1813 raise exception.with_traceback(traceback)
1814 raise exc
1815 if errors == "skip":
~/.sage/local/lib/python3.9/site-packages/distributed/protocol/pickle.py in loads()
73 return pickle.loads(x, buffers=buffers)
74 else:
---> 75 return pickle.loads(x)
76 except Exception:
77 logger.info("Failed to deserialize %s", x[:10000], exc_info=True)
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/rings/integer.pyx in init sage.rings.integer (build/cythonized/sage/rings/integer.c:54201)()
----> 1 r"""
2 Elements of the ring `\ZZ` of integers
3
4 Sage has highly optimized and extensive functionality for arithmetic with integers
5 and the ring of integers.
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/rings/rational.pyx in init sage.rings.rational (build/cythonized/sage/rings/rational.cpp:40442)()
98
99
--> 100 import sage.rings.real_mpfr
101 import sage.rings.real_double
102 from libc.stdint cimport uint64_t
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/rings/real_mpfr.pyx in init sage.rings.real_mpfr (build/cythonized/sage/rings/real_mpfr.c:46795)()
----> 1 r"""
2 Arbitrary Precision Real Numbers
3
4 AUTHORS:
5
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/libs/mpmath/utils.pyx in init sage.libs.mpmath.utils (build/cythonized/sage/libs/mpmath/utils.c:9062)()
----> 1 """
2 Utilities for Sage-mpmath interaction
3
4 Also patches some mpmath functions for speed
5 """
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/rings/complex_mpfr.pyx in init sage.rings.complex_mpfr (build/cythonized/sage/rings/complex_mpfr.c:34594)()
----> 1 """
2 Arbitrary Precision Floating Point Complex Numbers
3
4 AUTHORS:
5
/var/tmp/sage-9.4-current/local/lib/python3.9/site-packages/sage/rings/complex_double.pyx in init sage.rings.complex_double (build/cythonized/sage/rings/complex_double.c:25284)()
96 from cypari2.convert cimport new_gen_from_double, new_t_COMPLEX_from_double
97
---> 98 from . import complex_mpfr
99
100 from .complex_mpfr import ComplexField
ImportError: cannot import name complex_mpfr
The only other time I have seen an ImportError like this is when I have been running sage inside python and did not include a from sage.all import *, so I am wondering if what is happening is that Dask is trying to run my code in python. I'm also not sure whether this qualifies as a Sage or a Dask problem. Any help would be greatly appreciated!
Related
I need to use mne python for my master degree project, but i'm new in python programming. So i'm trying to open a single EEG edf file, i have tried several codes and none of them worked. The last code i tried:
import mne
from google.colab import drive
drive.mount ('/gdrive')
cd /gdrive/drive/MyDrive/EDFfiles
fname = 'chb01_01.edf'
raw = mne.io.read_raw_edf(fname, preload=True)
and i get this message:
Extracting EDF parameters from /content/chb01_01.edf...
EDF file detected
FileNotFoundError Traceback (most recent call last)
in ()
----> 1 raw = mne.io.read_raw_edf(fname, preload=True)
4 frames
/usr/local/lib/python3.7/dist-packages/mne/io/edf/edf.py in read_raw_edf(input_fname, eog, misc, stim_channel, exclude, infer_types, preload, verbose)
1331 return RawEDF(input_fname=input_fname, eog=eog, misc=misc,
1332 stim_channel=stim_channel, exclude=exclude,
-> 1333 infer_types=infer_types, preload=preload, verbose=verbose)
1334
1335
in init(self, input_fname, eog, misc, stim_channel, exclude, infer_types, preload, verbose)
/usr/local/lib/python3.7/dist-packages/mne/io/edf/edf.py in init(self, input_fname, eog, misc, stim_channel, exclude, infer_types, preload, verbose)
132 info, edf_info, orig_units = _get_info(input_fname, stim_channel, eog,
133 misc, exclude, infer_types,
--> 134 preload)
135 logger.info('Creating raw.info structure...')
136
/usr/local/lib/python3.7/dist-packages/mne/io/edf/edf.py in _get_info(fname, stim_channel, eog, misc, exclude, infer_types, preload)
374 misc = misc if misc is not None else []
375
--> 376 edf_info, orig_units = _read_header(fname, exclude, infer_types)
377
378 # XXX: tal_ch_names to pass to _check_stim_channel should be computed
/usr/local/lib/python3.7/dist-packages/mne/io/edf/edf.py in _read_header(fname, exclude, infer_types)
361 logger.info('%s file detected' % ext.upper())
362 if ext in ('bdf', 'edf'):
--> 363 return _read_edf_header(fname, exclude, infer_types)
364 elif ext == 'gdf':
365 return _read_gdf_header(fname, exclude), None
/usr/local/lib/python3.7/dist-packages/mne/io/edf/edf.py in _read_edf_header(fname, exclude, infer_types)
578 edf_info = {'events': []}
579
--> 580 with open(fname, 'rb') as fid:
581
582 fid.read(8) # version (unused here)
FileNotFoundError: [Errno 2] No such file or directory: '/content/chb01_01.edf'
just a suggestion:
Firstly try to insert 'chb01_01.edf' in python working directory. Python will find the file or c:\temp_edf\chb01_01.edf. It is easier to find.
best
First of all thank you for providing dask with all its functionality, which is highly appreciated!
However, using dask.distributed to run an SVD on a rasterized dataset, it seems as if it fails when only single chunks consist only of NaN values although most of the dataset does contain correct values.
I read a dataset using xarray.open_mfdataset(chunks={...}) and try to set the chunksize such, that SVD computation (dask.array.linalg) used in the eofs.xarray package makes use of the cores our cluster provides, by using a dask.distributed client.
<xarray.Dataset>
Dimensions: (time: 8760, x: 1000, y: 840)
Coordinates:
* x (x) float64 2.452e+06 2.458e+06 2.462e+06 ... 7.442e+06 7.448e+06
* y (y) float64 1.352e+06 1.358e+06 1.362e+06 ... 5.542e+06 5.548e+06
* time (time) datetime64[ns] 2005-01-01 ... 2005-12-31T23:00:00
Data variables:
capacity (y, x) float64 dask.array<shape=(840, 1000), chunksize=(840, 840)>
capfac (time, y, x) float32 dask.array<shape=(8760, 840, 1000), chunksize=(876, 840, 840)>
However, when I run the computation, it fails with the below-mentioned error message.
ValueError: error encountered in SVD, check that missing values are in the same places at each time and that all the values are not missing
See complete error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.conda/envs/spagat_py36/lib/python3.6/site-packages/eofs/standard.py in __init__(self, dataset, weights, center, ddof)
164 # Use the parallel Dask algorithm
--> 165 dsvd = dask.array.linalg.svd(dataNoMissing)
166 A, Lh, E = (x.compute() for x in dsvd)
~/.conda/envs/spagat_py36/lib/python3.6/site-packages/dask/array/linalg.py in svd(a)
803 """
--> 804 return tsqr(a, compute_svd=True)
805
~/.conda/envs/spagat_py36/lib/python3.6/site-packages/dask/array/linalg.py in tsqr(data, compute_svd, _max_vchunk_size)
116 "Current shape: {},\nCurrent chunksize: {}".format(
--> 117 data.shape, data.chunksize
118 )
ValueError: Input must have the following properties:
1. Have two dimensions
2. Have only one column of blocks
Note: This function (tsqr) supports QR decomposition in the case of
tall-and-skinny matrices (single column chunk/block; see qr)Current shape: (8760, nan),
Current chunksize: (876, nan)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-17-f60250fedf8b> in <module>
----> 1 pca.analyze()
~/code/tsa_lib/tsa_lib/time_tools.py in f(*args, **kwargs)
8 def f(*args, **kwargs):
9 before = time.perf_counter() # maybe exchange with time.process_time()
---> 10 rv = func(*args, **kwargs)
11 after = time.perf_counter()
12 print('elapsed time for {.__name__}: {:.2f} minutes'.format(func, (after - before)/60))
~/code/playground/playground/PCA.py in analyze(self)
145 print('PCA completed. Weights used.')
146 else:
--> 147 self.eofs, self.pcs, self.solver = eof_analysis(self.data_variability, n_eofs=None, xarray=True)
148 print('PCA completed. No weights used.')
149
~/code/tsa_lib/tsa_lib/time_tools.py in f(*args, **kwargs)
8 def f(*args, **kwargs):
9 before = time.perf_counter() # maybe exchange with time.process_time()
---> 10 rv = func(*args, **kwargs)
11 after = time.perf_counter()
12 print('elapsed time for {.__name__}: {:.2f} minutes'.format(func, (after - before)/60))
~/code/playground/playground/PCA.py in eof_analysis(data, n_eofs, xarray, wgts, lats)
36 solver = xEof(data, weights=wgts)
37 else:
---> 38 solver = xEof(data)
39
40 eofs = solver.eofsAsCovariance(neofs=n_eofs)
~/.conda/envs/spagat_py36/lib/python3.6/site-packages/eofs/xarray.py in __init__(self, array, weights, center, ddof)
131 weights=wtarray,
132 center=center,
--> 133 ddof=ddof)
134 # Name of the input DataArray.
135 self._name = array.name
~/.conda/envs/spagat_py36/lib/python3.6/site-packages/eofs/standard.py in __init__(self, dataset, weights, center, ddof)
175
176 except (np.linalg.LinAlgError, ValueError):
--> 177 raise ValueError('error encountered in SVD, check that missing '
178 'values are in the same places at each time and '
179 'that all the values are not missing')
ValueError: error encountered in SVD, check that missing values are in the same places at each time and that all the values are not missing
When applying SVD on a rasterized dataset, the below-mentioned error is given. Is it possible, that the error is raised because single chunks might be only containing NaN values?
If so, it could be considered as a bug of dask.distributed because the SVD works fine when applying it without chunking. Hence, the SVD should not fail only because single chunks only contain NaN values, whereas other chunks contain valid values, should it?
I am running my code on google colabs but my sequential model is not compiling. I could not create a new Sequential mode from a pre-existing model and compile it. I have called compile function from model but I have no idea why the error pops up. I don't know where I messed up. Actually Im doing something different but this is a simplied code with intention to find error.
train_path = '/content/drive/My Drive/ML/train'
model_custom=load_model("/content/drive/My Drive/ML/model_after_vgg_back_up.h5")
model=Sequential()
for layer in model_custom.layers:
model.add(layer)
model.compile(Adam(lr=0.0001),loss='categorical_crossentropy',metrics=['accuracy'])
train_batches = ImageDataGenerator().flow_from_directory(train_path,target_size=[224,224],classes=['mom','nanu','prabesh','sanu'],batch_size=15)
valid_path = '/content/drive/My Drive/ML/test1'
valid_batches = ImageDataGenerator().flow_from_directory(valid_path,target_size=[224,224],classes=['mom','nanu','prabesh','sanu'],batch_size=15)
model.fit_generator(train_batches,validation_data=valid_batches,validation_steps=1,steps_per_epoch=35,verbose=2,epochs=3)
Traceback is like follows:
Found 569 images belonging to 4 classes.
Found 12 images belonging to 4 classes.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-16-c160c47bcd7c> in <module>()
7 valid_path = '/content/drive/My Drive/ML/test1'
8 valid_batches = ImageDataGenerator().flow_from_directory(valid_path,target_size=[224,224],classes=['mom','nanu','prabesh','sanu'],batch_size=15)
----> 9 model.fit_generator(train_batches,validation_data=valid_batches,validation_steps=1,steps_per_epoch=35,verbose=2,epochs=3)
/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1416 use_multiprocessing=use_multiprocessing,
1417 shuffle=shuffle,
-> 1418 initial_epoch=initial_epoch)
1419
1420 #interfaces.legacy_generator_methods_support
/usr/local/lib/python3.6/dist-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
38
39 do_validation = bool(validation_data)
---> 40 model._make_train_function()
41 if do_validation:
42 model._make_test_function()
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in _make_train_function(self)
494 def _make_train_function(self):
495 if not hasattr(self, 'train_function'):
--> 496 raise RuntimeError('You must compile your model before using it.')
497 self._check_trainable_weights_consistency()
498 if self.train_function is None:
RuntimeError: You must compile your model before using it.
I am storing a pickle of a xgboost classifier model file to disk using joblib
from sklearn.externals import joblib
joblib.dump(clf, 'model.pkl')
Then, I am loading the model and trying to predict on a test dataset
from sklearn.externals import joblib
model=joblib.load('model.pkl')
model.predict(X_test)
I am getting the TypeError: 'str' object is not callable when I execute the predict method on the loaded classifier object.
Any help on how to resolve this?
Here is the traceback
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-72-0da70f003ba0> in <module>()
----> 1 model.predict(X_test)
C:\Users\Sriram\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
113
114 # lambda, but not partial, allows help() to work with update_wrapper
--> 115 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
116 # update the docstring of the returned function
117 update_wrapper(out, self.fn)
C:\Users\Sriram\Anaconda3\lib\site-packages\sklearn\pipeline.py in predict(self, X)
314 if transform is not None:
315 Xt = transform.transform(Xt)
--> 316 return self.steps[-1][-1].predict(Xt)
317
318 #if_delegate_has_method(delegate='_final_estimator')
C:\Users\Sriram\Anaconda3\lib\site-packages\xgboost\sklearn.py in predict(self, data, output_margin, ntree_limit)
461 def predict(self, data, output_margin=False, ntree_limit=0):
462 test_dmatrix = DMatrix(data, missing=self.missing)
--> 463 class_probs = self.booster().predict(test_dmatrix,
464 output_margin=output_margin,
465 ntree_limit=ntree_limit)
TypeError: 'str' object is not callable
I am doing Machine Learning Course from Coursera by University of Washington. In which I am using iPython's graphlab. During practise when I execute below command:
sales = graphlab.SFrame('home_data.gl/')
I am getting the error:
InvalidProductKey Traceback (most recent call last)
<ipython-input-3-c5971b60b216> in <module>()
----> 1 sales=graphlab.SFrame('home_data.gl/')
/opt/conda/lib/python2.7/site-packages/graphlab/data_structures/sframe.pyc in __init__(self, data, format, _proxy)
865 self.__proxy__ = _proxy
866 else:
--> 867 self.__proxy__ = UnitySFrameProxy(glconnect.get_client())
868 _format = None
869 if (format == 'auto'):
/opt/conda/lib/python2.7/site-packages/graphlab/connect/main.pyc in get_client()
138 """
139 if not is_connected():
--> 140 launch()
141 assert is_connected(), ENGINE_START_ERROR_MESSAGE
142 return __CLIENT__
/opt/conda/lib/python2.7/site-packages/graphlab/connect/main.pyc in launch(server_addr, server_bin, server_log, auth_token, server_public_key)
90 if server:
91 server.try_stop()
---> 92 raise e
93 server.set_log_progress(True)
94 # start the client
InvalidProductKey: Product key not found.
(Note the ipython notebook and home_data.gl are in same folder.)
You will need to set the product key for graphlab using the command
graphlab.product_key.set_product_key('PRODUCT KEY HERE')