import ray
import lance_namespace as ln
from lance_ray import read_lance, write_lance
# Initialize Ray
ray.init()
# Connect to a metadata catalog (directory-based example)
namespace = ln.connect("rest", {xxx})
# Create a Ray dataset
data = ray.data.range(1000).map(lambda row: {"id": row["id"], "value": row["id"] * 2})
# Write to Lance format using metadata catalog
write_lance(data, namespace=namespace, table_id=["lance_minio_catalog", "schema","my_table32"], storage_options = {'lance.storage.access_key_id' : 'x','lance.storage.endpoint' : 'http://minio:9000','lance.storage.secret_access_key' : 'x','lance.storage.allow_http' : 'true'})
# Read Lance dataset back using metadata catalog
ray_dataset = read_lance(namespace=namespace, table_id=["lance_minio_catalog", "schema", "my_table32"], storage_options = {'lance.storage.access_key_id' : 'x','lance.storage.endpoint' : 'http://minio:9000','lance.storage.secret_access_key' : 'x','lance.storage.allow_http' : 'true'})
# Perform distributed operations
result = ray_dataset.filter(lambda row: row["value"] < 100).count()
print(f"Filtered count: {result}")
>>> write_lance(data, namespace=namespace, table_id=["lance_minio_catalog", "schema","my_table32"], storage_options = {'lance.storage.access_key_id' : 'xx','lance.storage.endpoint' : 'http://minio:9000','lance.storage.secret_access_key' : 'xxx','lance.storage.allow_http' : 'true'})
2026-01-06 15:19:28,163 INFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2026-01-05_16-05-53_740074_85505/logs/ray-data
2026-01-06 15:19:28,164 INFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[ReadRange->Map(<lambda>)->Write]
(ReadRange->Map(<lambda>)->Write pid=85885) [2026-01-06T07:19:35Z WARN lance::dataset::write::insert] No existing dataset at s3://bucket1/lance_minio_catalog/schema/my_table32/, it will be created [repeated 20x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
(ReadRange->Map(<lambda>)->Write pid=85886) [2026-01-06T07:19:41Z WARN lance::dataset::write::insert] No existing dataset at s3://bucket1/lance_minio_catalog/schema/my_table32/, it will be created [repeated 4x across cluster]
(ReadRange->Map(<lambda>)->Write pid=85887) [2026-01-06T07:19:46Z WARN lance::dataset::write::insert] No existing dataset at s3://bucket1/lance_minio_catalog/schema/my_table32/, it will be created [repeated 17x across cluster]
(ReadRange->Map(<lambda>)->Write pid=85694) [2026-01-06T07:19:52Z WARN lance::dataset::write::insert] No existing dataset at s3://bucket1/lance_minio_catalog/schema/my_table32/, it will be created [repeated 16x across cluster]
2026-01-06 15:19:53,973 INFO datasink.py:103 -- Write operation succeeded. Aggregated write results:
- num_rows: 1000
- size_bytes: 16000
>>> ray_dataset = read_lance(namespace=namespace, table_id=["lance_minio_catalog", "schema", "my_table32"], storage_options = {'lance.storage.access_key_id' : 'xx','lance.storage.endpoint' : 'http://minio:9000','lance.storage.secret_access_key' : 'xx','lance.storage.allow_http' : 'true'})
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/yuqi/project/lance-ray/lance_ray/io.py", line 114, in read_lance
return read_datasource(
File "/Users/yuqi/.pyenv/versions/3.10.13/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
return fn(*args, **kwargs)
File "/Users/yuqi/.pyenv/versions/3.10.13/lib/python3.10/site-packages/ray/data/read_api.py", line 387, in read_datasource
requested_parallelism, _, inmemory_size = _autodetect_parallelism(
File "/Users/yuqi/.pyenv/versions/3.10.13/lib/python3.10/site-packages/ray/data/_internal/util.py", line 176, in _autodetect_parallelism
mem_size = datasource_or_legacy_reader.estimate_inmemory_data_size()
File "/Users/yuqi/project/lance-ray/lance_ray/datasource.py", line 150, in estimate_inmemory_data_size
if not self.fragments:
File "/Users/yuqi/project/lance-ray/lance_ray/datasource.py", line 85, in fragments
self._fragments = self.lance_dataset.get_fragments() or []
File "/Users/yuqi/project/lance-ray/lance_ray/datasource.py", line 79, in lance_dataset
self._lance_ds = lance.dataset(**dataset_options)
File "/Users/yuqi/.pyenv/versions/3.10.13/lib/python3.10/site-packages/lance/__init__.py", line 237, in dataset
ds = LanceDataset(
File "/Users/yuqi/.pyenv/versions/3.10.13/lib/python3.10/site-packages/lance/dataset.py", line 443, in __init__
self._ds = _Dataset(
ValueError: Dataset at path lance_minio_catalog/schema/my_table32 was not found: LanceError(IO): Generic N/A error: Encountered internal error. Please file a bug report at https://github.com/lance-format/lance/issues. Failed to get AWS credentials: CredentialsNotLoaded(CredentialsNotLoaded { source: Some("no providers in chain provided credentials") }), /Users/runner/work/lance/lance/rust/lance-io/src/object_store/providers/aws.rs:439:31, /Users/runner/work/lance/lance/rust/lance-io/src/object_store.rs:670:92, /Users/runner/work/lance/lance/rust/lance/src/dataset/builder.rs:628:35
Based on the logs, it seems that we have written data to the Lance table, but it fails to load it, however, I checked the location in MinIO, the location does not exists.
When I use the following code
And then the logs message
Based on the logs, it seems that we have written data to the Lance table, but it fails to load it, however, I checked the location in MinIO, the location does not exists.