Hi,
when experimenting with Sentinel-1 data products we came across some unexpected errors.
Firstly, the Terrascope/VITO Sentinel-1 collections “S1_GRD_SIGMA0_ASCENDING”/“S1_GRD_SIGMA0_DESCENDING” produce an error when trying to access the collection with a simple request:
library(openeo)
ext = list(11.00204 , 49.66901, 11.28197, 49.85029 )
names(ext) = c("west", "south", "east", "north")
dir = tempdir()
# change to login with your credentials
# con = openeo::connect("https://openeo.cloud")
# login(
# login_type = "oidc"
# , provider = "egi"
# , config = list(
# client_id = "<client_id>"
# , secret = "<secret>"
# )
# )
procs = openeo::processes()
# vito s1-collection
SAR_cube_sigma0 = procs$load_collection(
id = "S1_GRD_SIGMA0_ASCENDING"
, spatial_extent = ext
, temporal_extent = c("2017-04-01", "2017-04-05")
, bands = c("VV", "VH")
)
# create and start job
job = openeo::create_job(SAR_cube_sigma0)
openeo::start_job(job)
id = as.character(job$id)
jobs = openeo::list_jobs()
jobs[[id]]
openeo::log_job("vito-ed24e155-2387-40b5-9ab3-eed1cbd31210")
# [ERROR] error processing batch job
# Traceback (most recent call last):
# File "batch_job.py", line 305, in main
# run_driver()
# File "batch_job.py", line 279, in run_driver
# run_job(
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/utils.py", line 40, in memory_logging_wrapper
# return function(*args, **kwargs)
# File "batch_job.py", line 332, in run_job
# result = ProcessGraphDeserializer.evaluate(process_graph, env=env, do_dry_run=tracer)
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 262, in evaluate
# return convert_node(result_node, env=env)
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 268, in convert_node
# return apply_process(
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 1225, in apply_process
# return process_function(args=args, env=env)
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeo_driver/ProcessGraphDeserializer.py", line 433, in load_collection
# return env.backend_implementation.catalog.load_collection(collection_id, load_params=load_params, env=env)
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeo/util.py", line 363, in wrapper
# return f(*args, **kwargs)
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/layercatalog.py", line 446, in load_collection
# pyramid = file_s2_pyramid()
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/layercatalog.py", line 227, in file_s2_pyramid
# return file_pyramid(lambda opensearch_endpoint, opensearch_collection_id, opensearch_link_titles, root_path:
# File "/data3/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10754/container_e5013_1643116788003_10754_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/layercatalog.py", line 272, in file_pyramid
# return factory.datacube_seq(projected_polygons_native_crs, from_date, to_date, metadata_properties(),
# File "/opt/spark3_2_0/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1309, in __call__
# return_value = get_return_value(
# File "/opt/spark3_2_0/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py", line 326, in get_return_value
# raise Py4JJavaError(
# py4j.protocol.Py4JJavaError: An error occurred while calling o818.datacube_seq.
# : java.lang.IllegalArgumentException: Could not find data for your load_collection request with catalog ID urn:eop:VITO:CGS_S1_GRD_SIGMA0_L1. The catalog query had id eea74825-cede-4cf4-b5af-922dcfda89bd and returned 0 results.
# at org.openeo.geotrellis.layers.FileLayerProvider.loadRasterSourceRDD(FileLayerProvider.scala:602)
# at org.openeo.geotrellis.layers.FileLayerProvider.readMultibandTileLayer(FileLayerProvider.scala:476)
# at org.openeo.geotrellis.file.Sentinel2PyramidFactory.datacube(Sentinel2PyramidFactory.scala:150)
# at org.openeo.geotrellis.file.Sentinel2PyramidFactory.datacube_seq(Sentinel2PyramidFactory.scala:129)
# at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
# at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
# at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
# at java.base/java.lang.reflect.Method.invoke(Method.java:566)
# at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
# at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
# at py4j.Gateway.invoke(Gateway.java:282)
# at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
# at py4j.commands.CallCommand.execute(CallCommand.java:79)
# at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
# at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
# at java.base/java.lang.Thread.run(Thread.java:834)
From this error message it seems as though the collection cannot be found on the back-end.
Secondly, and this is more a remark than a problem, querying the Sentinel-Hub collection “SENTINEL1_GRD” without specifying the bands also results in an error:
SAR_cube = procs$load_collection(
id = "SENTINEL1_GRD"
, spatial_extent = ext
, temporal_extent = c("2017-04-01", "2017-04-05")
)
# create and start job
job = openeo::create_job(SAR_cube)
openeo::start_job(job)
id = as.character(job$id)
jobs = openeo::list_jobs()
jobs[[id]]
openeo::log_job("vito-78f3e13e-0a8a-4951-bff0-3e5efc23d9b5")
# [ERROR] error processing batch job
# Traceback (most recent call last):
# File "batch_job.py", line 305, in main
# run_driver()
# File "batch_job.py", line 279, in run_driver
# run_job(
# File "/data1/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10751/container_e5013_1643116788003_10751_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/utils.py", line 40, in memory_logging_wrapper
# return function(*args, **kwargs)
# File "batch_job.py", line 367, in run_job
# assets_metadata = result.write_assets(str(output_file))
# File "/data1/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10751/container_e5013_1643116788003_10751_01_000001/venv/lib/python3.8/site-packages/openeo_driver/save_result.py", line 87, in write_assets
# return self.cube.write_assets(filename=directory, format=self.format, format_options=self.options)
# File "/data1/hadoop/yarn/local/usercache/hendrik.wagenseil/appcache/application_1643116788003_10751/container_e5013_1643116788003_10751_01_000001/venv/lib/python3.8/site-packages/openeogeotrellis/geopysparkdatacube.py", line 1437, in write_assets
# timestamped_paths = self._get_jvm().org.openeo.geotrellis.geotiff.package.saveRDDTemporal(
# File "/opt/spark3_2_0/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1309, in __call__
# return_value = get_return_value(
# File "/opt/spark3_2_0/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py", line 326, in get_return_value
# raise Py4JJavaError(
# py4j.protocol.Py4JJavaError: An error occurred while calling z:org.openeo.geotrellis.geotiff.package.saveRDDTemporal.
# : org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 1.0 failed 4 times, most recent failure: Lost task 2.3 in stage 1.0 (TID 18) (epod072.vgt.vito.be executor 6): org.openeo.geotrellissentinelhub.SentinelHubException: Sentinel Hub returned an error
# response: HTTP/1.1 500 Internal Server Error with body: {"error":{"status":500,"reason":"Internal Server Error","message":"Illegal request to https://sentinel-s1-l1c.s3.amazonaws.com/GRD/2017/4/1/IW/DV/S1B_IW_GRDH_1SDV_20170401T165833_20170401T165858_004970_008B07_B03C/measurement/iw-hv.tiff. HTTP Status: 404.","code":"RENDERER_EXCEPTION"}}
# request: POST https://services.sentinel-hub.com/api/v1/process with body: {
# "input": {
# "bounds": {
# "bbox": [664390.0, 5506570.0, 666950.0, 5509130.0],
# "properties": {
# "crs": "http://www.opengis.net/def/crs/EPSG/0/32632"
# }
# },
# "data": [
# {
# "type": "sentinel-1-grd",
# "dataFilter": {"timeRange":{"from":"2017-04-01T00:00:00Z","to":"2017-04-02T00:00:00Z"}},
# "processing": {"backCoeff":"GAMMA0_TERRAIN","orthorectify":true}
# }
# ]
# },
# "output": {
# "width": 256,
# "height": 256,
# "responses": [
# {
# "identifier": "default",
# "format": {
# "type": "image/tiff"
# }
# }
# ]
# },
# "evalscript": "//VERSION=3\nfunction setup() {\n return {\n input: [{\n \"bands\": [\"VV\", \"VH\", \"HV\", \"HH\"]\n }],\n output: {\n bands: 4,\n sampleType: \"FLOAT32\",\n }\n };\n}\n\nfunction evaluatePixel(sample) {\n return [sample.VV, sample.VH, sample.HV, sample.HH];\n}"
# }
# at org.openeo.geotrellissentinelhub.SentinelHubException$.apply(SentinelHubException.scala:19)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8(ProcessApi.scala:127)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8$adapted(ProcessApi.scala:121)
# at scalaj.http.HttpRequest.$anonfun$toResponse$17(Http.scala:422)
# at scala.Option.getOrElse(Option.scala:189)
# at scalaj.http.HttpRequest.$anonfun$toResponse$14(Http.scala:414)
# at scala.Option.getOrElse(Option.scala:189)
# at scalaj.http.HttpRequest.toResponse(Http.scala:414)
# at scalaj.http.HttpRequest.doConnection(Http.scala:368)
# at scalaj.http.HttpRequest.exec(Http.scala:343)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$7(ProcessApi.scala:121)
# at org.openeo.geotrellissentinelhub.package$$anon$2.get(package.scala:55)
# at net.jodah.failsafe.Functions.lambda$get$0(Functions.java:46)
# at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
# at net.jodah.failsafe.Execution.executeSync(Execution.java:128)
# at net.jodah.failsafe.FailsafeExecutor.call(FailsafeExecutor.java:378)
# at net.jodah.failsafe.FailsafeExecutor.get(FailsafeExecutor.java:68)
# at org.openeo.geotrellissentinelhub.package$.withRetries(package.scala:54)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.getTile(ProcessApi.scala:120)
# at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$datacube_seq$7(PyramidFactory.scala:213)
# at org.openeo.geotrellissentinelhub.package$.authorized(package.scala:63)
# at org.openeo.geotrellissentinelhub.PyramidFactory.authorized(PyramidFactory.scala:68)
# at org.openeo.geotrellissentinelhub.PyramidFactory.org$openeo$geotrellissentinelhub$PyramidFactory$$getTile$1(PyramidFactory.scala:211)
# at org.openeo.geotrellissentinelhub.PyramidFactory.org$openeo$geotrellissentinelhub$PyramidFactory$$dataTile$1(PyramidFactory.scala:219)
# at org.openeo.geotrellissentinelhub.PyramidFactory.loadMasked$1(PyramidFactory.scala:244)
# at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$datacube_seq$13(PyramidFactory.scala:267)
# at scala.collection.Iterator$$anon$10.next(Iterator.scala:459)
# at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:512)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:511)
# at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
# at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
# at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
# at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
# at org.apache.spark.scheduler.Task.run(Task.scala:131)
# at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
# at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
# at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
# at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
# at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
# at java.base/java.lang.Thread.run(Thread.java:834)
#
# Driver stacktrace:
# at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2403)
# at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2352)
# at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2351)
# at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
# at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
# at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
# at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2351)
# at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1109)
# at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1109)
# at scala.Option.foreach(Option.scala:407)
# at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1109)
# at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2591)
# at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2533)
# at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
# at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
# at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:898)
# at org.apache.spark.SparkContext.runJob(SparkContext.scala:2214)
# at org.apache.spark.SparkContext.runJob(SparkContext.scala:2235)
# at org.apache.spark.SparkContext.runJob(SparkContext.scala:2254)
# at org.apache.spark.SparkContext.runJob(SparkContext.scala:2279)
# at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
# at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
# at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
# at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
# at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
# at org.openeo.geotrellis.geotiff.package$.saveRDDTemporal(package.scala:138)
# at org.openeo.geotrellis.geotiff.package.saveRDDTemporal(package.scala)
# at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
# at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
# at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
# at java.base/java.lang.reflect.Method.invoke(Method.java:566)
# at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
# at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
# at py4j.Gateway.invoke(Gateway.java:282)
# at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
# at py4j.commands.CallCommand.execute(CallCommand.java:79)
# at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
# at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
# at java.base/java.lang.Thread.run(Thread.java:834)
# Caused by: org.openeo.geotrellissentinelhub.SentinelHubException: Sentinel Hub returned an error
# response: HTTP/1.1 500 Internal Server Error with body: {"error":{"status":500,"reason":"Internal Server Error","message":"Illegal request to https://sentinel-s1-l1c.s3.amazonaws.com/GRD/2017/4/1/IW/DV/S1B_IW_GRDH_1SDV_20170401T165833_20170401T165858_004970_008B07_B03C/measurement/iw-hv.tiff. HTTP Status: 404.","code":"RENDERER_EXCEPTION"}}
# request: POST https://services.sentinel-hub.com/api/v1/process with body: {
# "input": {
# "bounds": {
# "bbox": [664390.0, 5506570.0, 666950.0, 5509130.0],
# "properties": {
# "crs": "http://www.opengis.net/def/crs/EPSG/0/32632"
# }
# },
# "data": [
# {
# "type": "sentinel-1-grd",
# "dataFilter": {"timeRange":{"from":"2017-04-01T00:00:00Z","to":"2017-04-02T00:00:00Z"}},
# "processing": {"backCoeff":"GAMMA0_TERRAIN","orthorectify":true}
# }
# ]
# },
# "output": {
# "width": 256,
# "height": 256,
# "responses": [
# {
# "identifier": "default",
# "format": {
# "type": "image/tiff"
# }
# }
# ]
# },
# "evalscript": "//VERSION=3\nfunction setup() {\n return {\n input: [{\n \"bands\": [\"VV\", \"VH\", \"HV\", \"HH\"]\n }],\n output: {\n bands: 4,\n sampleType: \"FLOAT32\",\n }\n };\n}\n\nfunction evaluatePixel(sample) {\n return [sample.VV, sample.VH, sample.HV, sample.HH];\n}"
# }
# at org.openeo.geotrellissentinelhub.SentinelHubException$.apply(SentinelHubException.scala:19)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8(ProcessApi.scala:127)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8$adapted(ProcessApi.scala:121)
# at scalaj.http.HttpRequest.$anonfun$toResponse$17(Http.scala:422)
# at scala.Option.getOrElse(Option.scala:189)
# at scalaj.http.HttpRequest.$anonfun$toResponse$14(Http.scala:414)
# at scala.Option.getOrElse(Option.scala:189)
# at scalaj.http.HttpRequest.toResponse(Http.scala:414)
# at scalaj.http.HttpRequest.doConnection(Http.scala:368)
# at scalaj.http.HttpRequest.exec(Http.scala:343)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$7(ProcessApi.scala:121)
# at org.openeo.geotrellissentinelhub.package$$anon$2.get(package.scala:55)
# at net.jodah.failsafe.Functions.lambda$get$0(Functions.java:46)
# at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
# at net.jodah.failsafe.Execution.executeSync(Execution.java:128)
# at net.jodah.failsafe.FailsafeExecutor.call(FailsafeExecutor.java:378)
# at net.jodah.failsafe.FailsafeExecutor.get(FailsafeExecutor.java:68)
# at org.openeo.geotrellissentinelhub.package$.withRetries(package.scala:54)
# at org.openeo.geotrellissentinelhub.DefaultProcessApi.getTile(ProcessApi.scala:120)
# at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$datacube_seq$7(PyramidFactory.scala:213)
# at org.openeo.geotrellissentinelhub.package$.authorized(package.scala:63)
# at org.openeo.geotrellissentinelhub.PyramidFactory.authorized(PyramidFactory.scala:68)
# at org.openeo.geotrellissentinelhub.PyramidFactory.org$openeo$geotrellissentinelhub$PyramidFactory$$getTile$1(PyramidFactory.scala:211)
# at org.openeo.geotrellissentinelhub.PyramidFactory.org$openeo$geotrellissentinelhub$PyramidFactory$$dataTile$1(PyramidFactory.scala:219)
# at org.openeo.geotrellissentinelhub.PyramidFactory.loadMasked$1(PyramidFactory.scala:244)
# at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$datacube_seq$13(PyramidFactory.scala:267)
# at scala.collection.Iterator$$anon$10.next(Iterator.scala:459)
# at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:512)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:511)
# at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
# at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
# at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
# at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
# at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
# at org.apache.spark.scheduler.Task.run(Task.scala:131)
# at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
# at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
# at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
# at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
# at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
# ... 1 more
When specifying bands this error does not occur, however, seeing as the bands argument is usually not a required argument to load_collections() this caused some confusion as well.
As always, thankful for any ideas, tips or fixes!
Best!