Non lazy cubes throws empty assertion error
We touched upon this during the last stand-up. I now came across it again working with some station data - cubes created from csv via pandas. Simply adding cube.data = cube.lazy_data()
solves it. But maybe we should add our own more informative error, or just make sure that data is lazy? Full traceback below.
AssertionError Traceback (most recent call last)
Cell In [122], line 1
----> 1 index([cube], client=client)
File ~/dev/climix/climix/index.py:52, in Index.__call__(self, cubes, client, sliced_mode)
50 self.index_function.prepare(cube_mapping)
51 logging.debug("Setting up aggregation")
---> 52 aggregated = multicube_aggregated_by(
53 cube_mapping,
54 coord_name,
55 self.aggregator,
56 period=self.period,
57 client=client,
58 sliced_mode=sliced_mode,
59 output_metadata=self.metadata.output,
60 )
61 aggregated.attributes["frequency"] = self.period.label
62 return aggregated
File ~/dev/climix/climix/iris.py:148, in multicube_aggregated_by(cubes, coords, aggregator, **kwargs)
145 aggregateby_cube.add_aux_coord(coord.copy(), ref_cube.coord_dims(coord))
147 # Attach the aggregate-by data into the aggregate-by cube.
--> 148 aggregateby_cube = aggregator.post_process(
149 aggregateby_cube, aggregateby_data, coords, **kwargs
150 )
152 return aggregateby_cube
File ~/dev/climix/climix/aggregators.py:65, in PointLocalAggregator.post_process(self, cube, data, coords, client, sliced_mode, **kwargs)
64 def post_process(self, cube, data, coords, client, sliced_mode, **kwargs):
---> 65 data = self.compute_pre_result(data, client, sliced_mode)
66 try:
67 post_processor = self.index_function.post_process
File ~/dev/climix/climix/aggregators.py:59, in PointLocalAggregator.compute_pre_result(self, data, client, sliced_mode)
57 logging.debug("Setting up pre-result in aggregate mode")
58 start = time.time()
---> 59 data = client.persist(data)
60 end = time.time()
61 logging.debug(f"Setup completed in {end - start:4.0f}")
File ~/miniconda3/lib/python3.10/site-packages/distributed/client.py:3437, in Client.persist(self, collections, optimize_graph, workers, allow_other_workers, resources, retries, priority, fifo_timeout, actors, **kwargs)
3434 singleton = True
3435 collections = [collections]
-> 3437 assert all(map(dask.is_dask_collection, collections))
3439 dsk = self.collections_to_dsk(collections, optimize_graph, **kwargs)
3441 names = {k for c in collections for k in flatten(c.__dask_keys__())}
AssertionError: