Hi all,
When using a large initial dataset and performing some aggregation, I want to cache the results of the aggregation using “DataCube.save_result” and sending a batch job. (id = vito-1d885703-4970-4cac-a1a8-961f6e0296a3
) In this case I use format="gtiff"
and my backend is the openEO platform.
Then I want to load using DataCube.load_result
. This works, but gives off a warning:
/opt/conda/lib/python3.9/site-packages/openeo/metadata.py:240: UserWarning: No cube:dimensions metadata
complain("No cube:dimensions metadata")
A job that directly downloads this data (again with a batch job, id=vito-816e931b-aadd-4430-9760-70fc2eee8f31
) gives the following error:
error processing batch job Traceback (most recent call last): File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/utils.py", line 28, in memory_logging_wrapper from spark_memlogger import memlogger ModuleNotFoundError: No module named 'spark_memlogger' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/job_registry.py", line 267, in _read data, stat = self._zk.get(path) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/kazoo/client.py", line 1165, in get return self.get_async(path, watch=watch).get() File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/kazoo/handlers/utils.py", line 75, in get raise self._exception kazoo.exceptions.NoNodeError During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/job_registry.py", line 273, in _read data, stat = self._zk.get(path) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/kazoo/client.py", line 1165, in get return self.get_async(path, watch=watch).get() File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/kazoo/handlers/utils.py", line 75, in get raise self._exception kazoo.exceptions.NoNodeError During handling of the above exception, another exception occurred: Traceback (most recent call last): File "batch_job.py", line 307, in main run_driver() File "batch_job.py", line 281, in run_driver run_job( File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/utils.py", line 30, in memory_logging_wrapper return function(*args, **kwargs) File "batch_job.py", line 334, in run_job result = ProcessGraphDeserializer.evaluate(process_graph, env=env, do_dry_run=tracer) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 262, in evaluate return convert_node(result_node, env=env) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 268, in convert_node return apply_process( File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1075, in apply_process args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())} File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1075, in <dictcomp> args = {name: convert_node(expr, env=env) for (name, expr) in sorted(args.items())} File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 273, in convert_node return convert_node(processGraph['node'], env=env) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 268, in convert_node return apply_process( File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1186, in apply_process return process_function(args=args, env=env) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1466, in load_result return env.backend_implementation.load_result(job_id=job_id, user=user, load_params=load_params, env=env) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/backend.py", line 498, in load_result for _, asset in self.batch_jobs.get_results(job_id=job_id, user_id=user.user_id).items() File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/backend.py", line 1355, in get_results job_info = self._get_job_info(job_id=job_id, user_id=user_id) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/backend.py", line 708, in _get_job_info job_info = registry.get_job(job_id, user_id) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/job_registry.py", line 186, in get_job job_info, _ = self._read(job_id, user_id, include_done=True) File "/data2/hadoop/yarn/local/usercache/jaapel/appcache/application_1643116788003_0783/container_e5013_1643116788003_0783_01_000002/venv/lib/python3.8/site-packages/openeogeotrellis/job_registry.py", line 275, in _read raise JobNotFoundException(job_id) openeo_driver.errors.JobNotFoundException: The batch job 'vito-1d885703-4970-4cac-a1a8-961f6e0296a3' does not exist.
I know I can directly download results with job.get_results
, but for further processing, I need the load_result
to work. Any ideas why it cannot find my previous job? And what is up with the warning that the cube has no dimensions metadata?
Thanks,
Jaap