diff --git a/lance_ray/compaction.py b/lance_ray/compaction.py index b5f624e1..b8ebbdb3 100644 --- a/lance_ray/compaction.py +++ b/lance_ray/compaction.py @@ -132,7 +132,9 @@ def compact_files( if describe_response.storage_options: merged_storage_options.update(describe_response.storage_options) - namespace_kwargs = get_namespace_kwargs(namespace_impl, namespace_properties, table_id) + namespace_kwargs = get_namespace_kwargs( + namespace_impl, namespace_properties, table_id + ) # Load dataset dataset = lance.LanceDataset( @@ -261,9 +263,7 @@ def compact_database( if not database: raise ValueError("'database' must be a non-empty list of path segments.") if not namespace_impl: - raise ValueError( - "'namespace_impl' is required when using compact_database." - ) + raise ValueError("'namespace_impl' is required when using compact_database.") from lance_namespace import ListTablesRequest @@ -314,8 +314,6 @@ def compact_database( results.append({"table_id": table_id, "metrics": metrics}) except Exception as e: logger.exception("Compaction failed for table %s: %s", table_id, e) - raise RuntimeError( - f"Compaction failed for table {table_id}: {e}" - ) from e + raise RuntimeError(f"Compaction failed for table {table_id}: {e}") from e return results diff --git a/lance_ray/datasource.py b/lance_ray/datasource.py index 1ecb07b4..58c582be 100644 --- a/lance_ray/datasource.py +++ b/lance_ray/datasource.py @@ -154,7 +154,16 @@ def get_read_tasks(self, parallelism: int, **kwargs) -> list[ReadTask]: ) read_task = ReadTask( - lambda fids=fragment_ids, uri=dataset_uri, version=dataset_version, storage_options=dataset_storage_options, manifest=serialized_manifest, ns_impl=namespace_impl, ns_props=namespace_properties, tbl_id=table_id, scanner_options=self._scanner_options, retry_params=self._retry_params: ( + lambda fids=fragment_ids, + uri=dataset_uri, + version=dataset_version, + storage_options=dataset_storage_options, + manifest=serialized_manifest, + ns_impl=namespace_impl, + ns_props=namespace_properties, + tbl_id=table_id, + scanner_options=self._scanner_options, + retry_params=self._retry_params: ( _read_fragments_with_retry( fids, uri, diff --git a/lance_ray/fragment.py b/lance_ray/fragment.py index 60b25ad9..4debeb29 100644 --- a/lance_ray/fragment.py +++ b/lance_ray/fragment.py @@ -107,7 +107,10 @@ def record_batch_converter(): except (AttributeError, ValueError): # pragma: no cover write_sig = None - if write_sig is not None and "allow_external_blob_outside_bases" in write_sig.parameters: + if ( + write_sig is not None + and "allow_external_blob_outside_bases" in write_sig.parameters + ): write_kwargs.setdefault("allow_external_blob_outside_bases", True) tbl_first = pd_to_arrow(first, schema) diff --git a/lance_ray/index.py b/lance_ray/index.py index 6955d5de..1207e70b 100755 --- a/lance_ray/index.py +++ b/lance_ray/index.py @@ -371,7 +371,9 @@ def create_scalar_index( if describe_response.storage_options: merged_storage_options.update(describe_response.storage_options) - namespace_kwargs = get_namespace_kwargs(namespace_impl, namespace_properties, table_id) + namespace_kwargs = get_namespace_kwargs( + namespace_impl, namespace_properties, table_id + ) # Load dataset dataset = LanceDataset( @@ -441,7 +443,9 @@ def create_scalar_index( existing_indices = dataset.list_indices() existing_names = {idx["name"] for idx in existing_indices} index_exists = name in existing_names - except Exception: # pragma: no cover - list_indices() not available in older lance versions + except ( + Exception + ): # pragma: no cover - list_indices() not available in older lance versions pass if index_exists: raise ValueError( @@ -830,7 +834,9 @@ def create_index( dataset_obj = uri dataset_uri = dataset_obj.uri if not merged_storage_options: - merged_storage_options = getattr(dataset_obj, "_storage_options", None) or {} + merged_storage_options = ( + getattr(dataset_obj, "_storage_options", None) or {} + ) namespace_kwargs = {} try: @@ -850,7 +856,9 @@ def create_index( existing_indices = dataset_obj.list_indices() existing_names = {idx["name"] for idx in existing_indices} index_exists = name in existing_names - except Exception: # pragma: no cover - list_indices() not available in older lance versions + except ( + Exception + ): # pragma: no cover - list_indices() not available in older lance versions pass if index_exists: raise ValueError( @@ -1107,7 +1115,9 @@ def optimize_indices( uri, ) - namespace_kwargs = get_namespace_kwargs(namespace_impl, namespace_properties, table_id) + namespace_kwargs = get_namespace_kwargs( + namespace_impl, namespace_properties, table_id + ) dataset = LanceDataset( uri, diff --git a/lance_ray/io.py b/lance_ray/io.py index 10e122b2..a8bcadb8 100644 --- a/lance_ray/io.py +++ b/lance_ray/io.py @@ -460,7 +460,9 @@ def add_columns( """ storage_options = storage_options or {} - namespace_kwargs = get_namespace_kwargs(namespace_impl, namespace_properties, table_id) + namespace_kwargs = get_namespace_kwargs( + namespace_impl, namespace_properties, table_id + ) lance_ds = LanceDataset( uri=uri, diff --git a/lance_ray/utils.py b/lance_ray/utils.py index 06d96765..00b02930 100644 --- a/lance_ray/utils.py +++ b/lance_ray/utils.py @@ -129,7 +129,9 @@ def _create_storage_options_provider( if not hasattr(lance, "LanceNamespaceStorageOptionsProvider"): return None - return lance.LanceNamespaceStorageOptionsProvider(namespace=namespace, table_id=table_id) + return lance.LanceNamespaceStorageOptionsProvider( + namespace=namespace, table_id=table_id + ) def get_namespace_kwargs( @@ -185,7 +187,9 @@ def get_write_fragments_kwargs( return {} return {"namespace_client": namespace, "table_id": table_id} - provider = _create_storage_options_provider(namespace_impl, namespace_properties, table_id) + provider = _create_storage_options_provider( + namespace_impl, namespace_properties, table_id + ) if provider is None: return {} return {"storage_options_provider": provider} diff --git a/pyproject.toml b/pyproject.toml index 5dd2f9f3..f0f26bee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ dependencies = [ "ray[data]>=2.41.0", - "pylance>=4.0.0", + "pylance>=6.0.0b1", "lance-namespace", "packaging", "pyarrow>=17.0.0", diff --git a/tests/test_blob.py b/tests/test_blob.py index b7a811e8..c4dcb629 100755 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -344,10 +344,7 @@ def test_stream_copy_basic_local(temp_dir): assert src.schema == dst.schema src_df = ( - ray.data.from_arrow(table) - .to_pandas() - .sort_values("id") - .reset_index(drop=True) + ray.data.from_arrow(table).to_pandas().sort_values("id").reset_index(drop=True) ) dst_df = ( lr.read_lance(str(dst_path)) @@ -367,7 +364,9 @@ def test_stream_copy_resume_local(temp_dir): # Legacy blob schema schema = pa.schema( [ - pa.field("blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}), + pa.field( + "blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"} + ), pa.field("id", pa.int64()), pa.field("name", pa.string()), pa.field("val", pa.float64()), @@ -402,10 +401,7 @@ def test_stream_copy_resume_local(temp_dir): ) src_df = ( - ray.data.from_arrow(table) - .to_pandas() - .sort_values("id") - .reset_index(drop=True) + ray.data.from_arrow(table).to_pandas().sort_values("id").reset_index(drop=True) ) dst_df = ( lr.read_lance(str(dst_path)) diff --git a/tests/test_distributed_indexing.py b/tests/test_distributed_indexing.py index a900bba3..5c4c751f 100755 --- a/tests/test_distributed_indexing.py +++ b/tests/test_distributed_indexing.py @@ -465,9 +465,13 @@ def test_scalar_index_on_mixed_schema_list_indices(self, temp_dir): ) indices = updated_dataset.list_indices() - assert len(indices) >= 1, "list_indices should return at least the new scalar index" + assert len(indices) >= 1, ( + "list_indices should return at least the new scalar index" + ) names = [idx["name"] for idx in indices] - assert index_name in names, f"Expected index name {index_name!r} in list_indices: {names}" + assert index_name in names, ( + f"Expected index name {index_name!r} in list_indices: {names}" + ) label_index = next(idx for idx in indices if idx["name"] == index_name) assert label_index["type"] == "BTree", ( @@ -1018,7 +1022,9 @@ def test_optimize_indices_success_with_uri(self, multi_fragment_lance_dataset): assert result.count_rows() == lance.LanceDataset(dataset_uri).count_rows() indices = result.list_indices() - assert len(indices) >= 1, "list_indices should include at least the existing index" + assert len(indices) >= 1, ( + "list_indices should include at least the existing index" + ) names = [idx["name"] for idx in indices] assert "text_idx" in names, f"Expected 'text_idx' in list_indices: {names}" @@ -1029,9 +1035,10 @@ def test_optimize_indices_runtime_error_when_api_missing(self, temp_dir): lr.write_lance(ray.data.from_pandas(df), str(path)) ds = lance.LanceDataset(str(path)) - if getattr(ds, "optimize_indices", None) is not None or getattr( - ds, "optimize", None - ) is not None: + if ( + getattr(ds, "optimize_indices", None) is not None + or getattr(ds, "optimize", None) is not None + ): pytest.skip( "This lance version exposes optimize_indices/optimize; " "cannot test RuntimeError path." diff --git a/uv.lock b/uv.lock index 5cbbf34b..8d890830 100644 --- a/uv.lock +++ b/uv.lock @@ -358,6 +358,7 @@ source = { editable = "." } dependencies = [ { name = "lance-namespace" }, { name = "more-itertools", marker = "python_full_version < '3.12'" }, + { name = "packaging" }, { name = "pyarrow" }, { name = "pylance" }, { name = "pytest" }, @@ -384,8 +385,9 @@ requires-dist = [ { name = "mkdocs-awesome-pages-plugin", marker = "extra == 'docs'", specifier = ">=2.9.0" }, { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.0.0" }, { name = "more-itertools", marker = "python_full_version < '3.12'", specifier = ">=2.6.0" }, + { name = "packaging" }, { name = "pyarrow", specifier = ">=17.0.0" }, - { name = "pylance", specifier = ">=4.0.0" }, + { name = "pylance", specifier = ">=6.0.0b1" }, { name = "pytest", specifier = ">=8.4.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=1.0.0" }, { name = "pytest-cov", specifier = ">=5.0.0" }, @@ -1022,8 +1024,8 @@ wheels = [ [[package]] name = "pylance" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } +version = "6.0.0b1" +source = { registry = "https://pypi.fury.io/lance-format" } dependencies = [ { name = "lance-namespace" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -1031,12 +1033,12 @@ dependencies = [ { name = "pyarrow" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/19/29/5152da1261a628c293876917b6185538bd68f4cf1420da6265b5be79d09b/pylance-4.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:7310892f3089eeddb1af1fe5c398b71cc483a3015646caceaa2f62fc92b227b2", size = 54420876, upload-time = "2026-03-30T18:18:37.525Z" }, - { url = "https://files.pythonhosted.org/packages/99/ae/7edbbfc18c3be43eedb886e74a17826c09fdf35588b35912f2733779ea43/pylance-4.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57f6a521b1b4b77a62d791850213a854093719c7d76b9641e8abcd445eb73e56", size = 56752552, upload-time = "2026-03-30T18:24:21.331Z" }, - { url = "https://files.pythonhosted.org/packages/ef/88/6d8bda83224bac52806f09d3e211d8886b81500384948a753c4b24c11f35/pylance-4.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e433d6bddd66de99c58e472bc3e8ed1590c7ff4ff7948479254c1c2111a601a8", size = 60305704, upload-time = "2026-03-30T18:35:23.425Z" }, - { url = "https://files.pythonhosted.org/packages/52/f3/8d8369c756c4173ea070f6964213f9b622ac278bd04a058c48d00a549177/pylance-4.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f36dce83c11cd5d598cb0f64bad7c51fc21ed43df868b9029184a385c6bf4d84", size = 56771233, upload-time = "2026-03-30T18:25:40.012Z" }, - { url = "https://files.pythonhosted.org/packages/66/e6/53e0713440685b1c76e20d72755eca2e531cc182ea9a612b4cb6a15abe50/pylance-4.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9ca03f97f22e0b75f06378c4006d587aba26408122fd066f0e43e2b7a019c67e", size = 60260813, upload-time = "2026-03-30T18:36:07.976Z" }, - { url = "https://files.pythonhosted.org/packages/1e/04/5f22b88c8965d3982f68f67bfe24d756e7b788e10392d2bec6f97f5eb0e3/pylance-4.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:9261c32d3bd6aaab33025a45b20c2f2554804e1bc2a1ec2bfcb06f0c9d2e59b9", size = 65137830, upload-time = "2026-03-30T18:37:33.048Z" }, + { url = "https://pypi.fury.io/lance-format/-/ver_2i0Mk7/pylance-6.0.0b1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9d6eb1cce9d04fd407dfdf977acc1825587a660ade863d414daabd3e8524c55" }, + { url = "https://pypi.fury.io/lance-format/-/ver_rPGwG/pylance-6.0.0b1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:039a5f1fc9522500ea602128f6f9dea83bfb8f0451c664d1163ec4ba1e9fa754" }, + { url = "https://pypi.fury.io/lance-format/-/ver_1zvKTu/pylance-6.0.0b1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c05e89e34be0f97b66b0c4f4690917562794e3037346caf443b3568a9a4bbde1" }, + { url = "https://pypi.fury.io/lance-format/-/ver_ZOnel/pylance-6.0.0b1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a70e8c967263550a02ee8c937a81a128114dc33ba5894980156a3632f3d53a90" }, + { url = "https://pypi.fury.io/lance-format/-/ver_26XaWT/pylance-6.0.0b1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9474c78a06230162c87a789c82c8dd2bba6c2c543d3b08df52a5462f86b9c608" }, + { url = "https://pypi.fury.io/lance-format/-/ver_LQGLW/pylance-6.0.0b1-cp39-abi3-win_amd64.whl", hash = "sha256:4b59e050658ffc2a0e07bbd9fd5ba9d351949ee77f2c1411b90e7c84c9d18080" }, ] [[package]]