From a367cc06db50f14328166f213a37c4b639c751c6 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 12 May 2026 23:02:42 +0800 Subject: [PATCH 01/21] feat: upgrade lance to 4.0.1 and lance-namespace to 0.7.5 - Bump lance-core from 2.0.1 to 4.0.1 in gradle/libs.versions.toml - Bump lance-namespace-core from 0.4.5 to 0.7.5 in gradle/libs.versions.toml API changes addressed: - Transaction -> SourcedTransaction in LancePartitionStatisticStorage (Transaction.commit() removed; newTransactionBuilder() now returns SourcedTransaction.Builder) - CreateEmptyTableRequest/Response -> DeclareTableRequest/Response (createEmptyTable() removed from LanceNamespace interface) - TableApi.createTable() gains two new String params (properties, storageOptions); pass null for both since Gravitino reads from headers - AlterColumnsEntry.rename is now JsonNullable; register JsonNullableModule via new JsonNullableMapperProvider for correct Jackson serialization in the lance-rest-server Testability fix: - Introduce DatasetHolder wrapper in LancePartitionStatisticStorage so that the cached Dataset is mockable in tests; lance 4.0.1 Dataset has many JNI native methods that prevent Mockito's inline mock maker from instrumenting the class directly Closes #10983 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../LancePartitionStatisticStorage.java | 60 ++++++++++++++----- .../TestLancePartitionStatisticStorage.java | 14 ++--- gradle/libs.versions.toml | 4 +- .../common/ops/LanceTableOperations.java | 4 +- .../GravitinoLanceTableOperations.java | 5 +- .../rest/JsonNullableMapperProvider.java | 43 +++++++++++++ .../service/rest/LanceTableOperations.java | 3 +- .../integration/test/LanceRESTServiceIT.java | 37 ++++++------ .../rest/TestLanceNamespaceOperations.java | 9 ++- 9 files changed, 124 insertions(+), 55 deletions(-) create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java diff --git a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java index 724e602372e..73e24e3a4f8 100644 --- a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java +++ b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java @@ -28,6 +28,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.Closeable; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -71,7 +72,7 @@ import org.lance.Fragment; import org.lance.FragmentMetadata; import org.lance.ReadOptions; -import org.lance.Transaction; +import org.lance.SourcedTransaction; import org.lance.WriteParams; import org.lance.ipc.LanceScanner; import org.lance.ipc.ScanOptions; @@ -108,7 +109,7 @@ public class LancePartitionStatisticStorage implements PartitionStatisticStorage private static final String STATISTIC_VALUE_COLUMN = "statistic_value"; private static final String AUDIT_INFO_COLUMN = "audit_info"; - private final Optional> datasetCache; + private final Optional> datasetCache; private static final Schema SCHEMA = new Schema( @@ -204,10 +205,14 @@ public LancePartitionStatisticStorage(Map properties) { .maximumSize(datasetCacheSize) .scheduler(Scheduler.forScheduledExecutorService(this.scheduler)) .evictionListener( - (RemovalListener) + (RemovalListener) (key, value, cause) -> { if (value != null) { - value.close(); + try { + value.close(); + } catch (IOException e) { + LOG.warn("Failed to close evicted Lance dataset", e); + } } }) .build()); @@ -297,7 +302,7 @@ private void appendStatisticsImpl(Long tableId, List datasetRead = getDataset(tableId); List fragmentMetas = createFragmentMetadata(tableId, updates); - Transaction appendTxn = + SourcedTransaction appendTxn = datasetRead .newTransactionBuilder() .operation(Append.builder().fragments(fragmentMetas).build()) @@ -306,7 +311,7 @@ private void appendStatisticsImpl(Long tableId, List newDataset = appendTxn.commit(); Dataset finalNewDataset = newDataset; - datasetCache.ifPresent(cache -> cache.put(tableId, finalNewDataset)); + datasetCache.ifPresent(cache -> cache.put(tableId, new DatasetHolder(finalNewDataset))); } finally { if (!datasetCache.isPresent()) { if (datasetRead != null) { @@ -355,10 +360,10 @@ private void dropStatisticsImpl(Long tableId, List drop @Override public void close() throws IOException { if (datasetCache.isPresent()) { - Cache cache = datasetCache.get(); - for (Dataset dataset : cache.asMap().values()) { + Cache cache = datasetCache.get(); + for (DatasetHolder holder : cache.asMap().values()) { try { - dataset.close(); + holder.close(); } catch (Exception e) { LOG.warn("Failed to close cached Lance dataset", e); } @@ -376,7 +381,7 @@ public void close() throws IOException { } @VisibleForTesting - Cache getDatasetCache() { + Cache getDatasetCache() { return datasetCache.orElse(null); } @@ -563,20 +568,20 @@ private Dataset getDataset(Long tableId) { return datasetCache .map( cache -> { - Dataset cachedDataset = + DatasetHolder holder = cache.get( tableId, id -> { newlyCreated.set(true); - return open(getFilePath(id)); + return new DatasetHolder(open(getFilePath(id))); }); // Ensure dataset uses the latest version if (!newlyCreated.get()) { - cachedDataset.checkoutLatest(); + holder.checkoutLatest(); } - return cachedDataset; + return holder.getDataset(); }) .orElse(open(getFilePath(tableId))); } @@ -609,4 +614,31 @@ private Dataset open(String fileName) { private ThreadFactory newDaemonThreadFactory(String name) { return new ThreadFactoryBuilder().setDaemon(true).setNameFormat(name + "-%d").build(); } + + /** + * Package-private wrapper around a {@link Dataset} stored in the dataset cache. Exists solely to + * allow test code to mock this holder (and thus verify close-ordering) without requiring Mockito + * to instrument the JNI-heavy {@link Dataset} class itself. + */ + static class DatasetHolder implements Closeable { + + private final Dataset dataset; + + DatasetHolder(Dataset dataset) { + this.dataset = dataset; + } + + Dataset getDataset() { + return dataset; + } + + void checkoutLatest() { + dataset.checkoutLatest(); + } + + @Override + public void close() throws IOException { + dataset.close(); + } + } } diff --git a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java index cc298e9b03f..5a31847e3e4 100644 --- a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java +++ b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java @@ -47,9 +47,9 @@ import org.apache.gravitino.stats.PartitionStatisticsUpdate; import org.apache.gravitino.stats.StatisticValue; import org.apache.gravitino.stats.StatisticValues; +import org.apache.gravitino.stats.storage.LancePartitionStatisticStorage.DatasetHolder; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import org.lance.Dataset; import org.mockito.InOrder; public class TestLancePartitionStatisticStorage { @@ -623,10 +623,10 @@ public void testCloseReleasesCachedDatasetBeforeAllocator() throws Exception { BufferAllocator allocator = spy(new RootAllocator(Long.MAX_VALUE)); FieldUtils.writeField(storage, "allocator", allocator, true); - Cache datasetCache = storage.getDatasetCache(); + Cache datasetCache = storage.getDatasetCache(); Assertions.assertNotNull(datasetCache); - Dataset dataset = mock(Dataset.class); + DatasetHolder holder = mock(DatasetHolder.class); VarCharVector buffer = new VarCharVector("test", allocator); buffer.allocateNew(1024); @@ -635,17 +635,17 @@ public void testCloseReleasesCachedDatasetBeforeAllocator() throws Exception { buffer.close(); return null; }) - .when(dataset) + .when(holder) .close(); - datasetCache.put(1L, dataset); + datasetCache.put(1L, holder); storage.close(); Assertions.assertEquals(0, allocator.getAllocatedMemory()); - InOrder inOrder = inOrder(dataset, allocator); - inOrder.verify(dataset).close(); + InOrder inOrder = inOrder(holder, allocator); + inOrder.verify(holder).close(); inOrder.verify(allocator).close(); } finally { diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index cfa98b7416d..49f9ffcb1b1 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -28,8 +28,8 @@ guava = "32.1.3-jre" lombok = "1.18.20" slf4j = "2.0.16" log4j = "2.25.4" -lance = "2.0.1" -lance-namespace = "0.4.5" +lance = "4.0.1" +lance-namespace = "0.7.5" delta-kernel = "3.3.0" jetty = "9.4.58.v20250814" jersey = "2.41" diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java index dae90499580..1ba87a21365 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -20,7 +20,6 @@ import java.util.Map; import java.util.Optional; -import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DeregisterTableResponse; @@ -83,8 +82,7 @@ DeclareTableResponse declareTable( * @deprecated Use {@link #declareTable} instead. */ @Deprecated - @SuppressWarnings("deprecation") - CreateEmptyTableResponse createEmptyTable( + DeclareTableResponse createEmptyTable( String tableId, String delimiter, String tableLocation, Map tableProperties); /** diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java index a1ec2ee9bb7..d3eb52fc9fe 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java @@ -53,7 +53,6 @@ import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.AlterTableAlterColumnsRequest; import org.lance.namespace.model.AlterTableDropColumnsRequest; -import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DeregisterTableResponse; @@ -211,7 +210,7 @@ public DeclareTableResponse declareTable( @Override @SuppressWarnings("deprecation") - public CreateEmptyTableResponse createEmptyTable( + public DeclareTableResponse createEmptyTable( String tableId, String delimiter, String tableLocation, Map tableProperties) { // Empty table creation only supports CREATE mode (not EXIST_OK or OVERWRITE). ImmutableMap props = @@ -223,7 +222,7 @@ public CreateEmptyTableResponse createEmptyTable( CreateTableResponse response = createTable(tableId, "create", delimiter, tableLocation, props, null); - CreateEmptyTableResponse emptyTableResponse = new CreateEmptyTableResponse(); + DeclareTableResponse emptyTableResponse = new DeclareTableResponse(); emptyTableResponse.setLocation(response.getLocation()); emptyTableResponse.setStorageOptions(response.getStorageOptions()); return emptyTableResponse; diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java new file mode 100644 index 00000000000..6ae029211f1 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.fasterxml.jackson.databind.ObjectMapper; +import javax.ws.rs.ext.ContextResolver; +import javax.ws.rs.ext.Provider; +import org.openapitools.jackson.nullable.JsonNullableModule; + +/** + * JAX-RS {@link ContextResolver} that provides an {@link ObjectMapper} with the {@link + * JsonNullableModule} registered. + * + *

lance-namespace 0.7.5 models use {@code JsonNullable} for optional fields, which requires + * this module for correct Jackson serialization/deserialization. + */ +@Provider +public class JsonNullableMapperProvider implements ContextResolver { + + private static final ObjectMapper MAPPER = + new ObjectMapper().registerModule(new JsonNullableModule()); + + @Override + public ObjectMapper getContext(Class type) { + return MAPPER; + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 667eb1b4986..1e2711098c3 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -54,7 +54,6 @@ import org.lance.namespace.model.AlterTableAlterColumnsResponse; import org.lance.namespace.model.AlterTableDropColumnsRequest; import org.lance.namespace.model.AlterTableDropColumnsResponse; -import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DeclareTableRequest; import org.lance.namespace.model.DeclareTableResponse; @@ -164,7 +163,7 @@ public Response createEmptyTable( // conflict. props.putAll(headerProps); - CreateEmptyTableResponse response = + DeclareTableResponse response = lanceNamespace.asTableOps().createEmptyTable(tableId, delimiter, tableLocation, props); return Response.ok(response).build(); } catch (Exception e) { diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java index 3c0661971db..b71f444d654 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java @@ -64,8 +64,6 @@ import org.lance.namespace.model.AlterTableAlterColumnsResponse; import org.lance.namespace.model.AlterTableDropColumnsRequest; import org.lance.namespace.model.AlterTableDropColumnsResponse; -import org.lance.namespace.model.CreateEmptyTableRequest; -import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; import org.lance.namespace.model.CreateTableResponse; @@ -433,12 +431,12 @@ void testCreateEmptyTable() throws ApiException { catalog = createCatalog(CATALOG_NAME); createSchema(); - CreateEmptyTableRequest request = new CreateEmptyTableRequest(); + DeclareTableRequest request = new DeclareTableRequest(); String location = tempDir + "/" + "empty_table/"; request.setLocation(location); request.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "empty_table")); - CreateEmptyTableResponse response = ns.createEmptyTable(request); + DeclareTableResponse response = ns.declareTable(request); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -457,18 +455,18 @@ void testCreateEmptyTable() throws ApiException { Assertions.assertThrows( RuntimeException.class, () -> { - ns.createEmptyTable(request); + ns.declareTable(request); }); Assertions.assertTrue(exception.getMessage().contains("\"code\":5")); // Create an empty table with non-existent location should succeed // since storage is not touched - CreateEmptyTableRequest wrongLocationRequest = new CreateEmptyTableRequest(); + DeclareTableRequest wrongLocationRequest = new DeclareTableRequest(); wrongLocationRequest.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "another_table")); String another_location = tempDir + "/" + "another_location/"; Assertions.assertFalse(new File(another_location).exists()); wrongLocationRequest.setLocation(another_location); - response = ns.createEmptyTable(wrongLocationRequest); + response = ns.declareTable(wrongLocationRequest); Assertions.assertNotNull(response); Assertions.assertEquals(another_location, response.getLocation()); // Will not touch storage, so the path should not be created. @@ -478,8 +476,8 @@ void testCreateEmptyTable() throws ApiException { String correctedLocation = tempDir + "/" + "wrong_location_table/"; wrongLocationRequest.setLocation(correctedLocation); wrongLocationRequest.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "wrong_location_table")); - CreateEmptyTableResponse wrongLocationResponse = - Assertions.assertDoesNotThrow(() -> ns.createEmptyTable(wrongLocationRequest)); + DeclareTableResponse wrongLocationResponse = + Assertions.assertDoesNotThrow(() -> ns.declareTable(wrongLocationRequest)); Assertions.assertNotNull(wrongLocationResponse); Assertions.assertEquals(correctedLocation, wrongLocationResponse.getLocation()); } @@ -855,13 +853,13 @@ void testDeregisterNonExistingTable() { Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); Assertions.assertTrue(exception.getMessage().contains("Table not found")); // Try to create a table and then deregister table - CreateEmptyTableRequest createEmptyTableRequest = new CreateEmptyTableRequest(); + DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "to_be_deregistered_table/"; ids = List.of(CATALOG_NAME, SCHEMA_NAME, "to_be_deregistered_table"); createEmptyTableRequest.setLocation(location); createEmptyTableRequest.setId(ids); - CreateEmptyTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.createEmptyTable(createEmptyTableRequest)); + DeclareTableResponse response = + Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -899,12 +897,12 @@ void testTableExists() { createSchema(); List ids = List.of(CATALOG_NAME, SCHEMA_NAME, "table_exists"); - CreateEmptyTableRequest createEmptyTableRequest = new CreateEmptyTableRequest(); + DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "table_exists/"; createEmptyTableRequest.setLocation(location); createEmptyTableRequest.setId(ids); - CreateEmptyTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.createEmptyTable(createEmptyTableRequest)); + DeclareTableResponse response = + Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -928,12 +926,12 @@ void testDropTable() { createSchema(); List ids = List.of(CATALOG_NAME, SCHEMA_NAME, "table_to_drop"); - CreateEmptyTableRequest createEmptyTableRequest = new CreateEmptyTableRequest(); + DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "table_to_drop/"; createEmptyTableRequest.setLocation(location); createEmptyTableRequest.setId(ids); - CreateEmptyTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.createEmptyTable(createEmptyTableRequest)); + DeclareTableResponse response = + Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -1022,7 +1020,8 @@ private CreateTableResponse createTable( try { return createTableApi() - .createTable(String.join(DELIMITER, ids), body, DELIMITER, mode, additionalHeaders); + .createTable( + String.join(DELIMITER, ids), body, DELIMITER, mode, null, null, additionalHeaders); } catch (ApiException e) { throw toLanceNamespaceException(e); } diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java index 2d8000fc1b8..c7c4e423ec7 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java @@ -60,8 +60,6 @@ import org.lance.namespace.model.AlterTableAlterColumnsResponse; import org.lance.namespace.model.AlterTableDropColumnsRequest; import org.lance.namespace.model.AlterTableDropColumnsResponse; -import org.lance.namespace.model.CreateEmptyTableRequest; -import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; import org.lance.namespace.model.CreateTableResponse; @@ -115,6 +113,7 @@ protected Application configure() { resourceConfig.property(CommonProperties.FEATURE_AUTO_DISCOVERY_DISABLE, true); resourceConfig.property(CommonProperties.MOXY_JSON_FEATURE_DISABLE, true); ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new org.openapitools.jackson.nullable.JsonNullableModule()); JacksonJaxbJsonProvider provider = new JacksonJaxbJsonProvider(); provider.setMapper(mapper); resourceConfig.register(provider); @@ -449,12 +448,12 @@ void testCreateEmptyTable() { String delimiter = "."; // Test normal - CreateEmptyTableResponse createTableResponse = new CreateEmptyTableResponse(); + DeclareTableResponse createTableResponse = new DeclareTableResponse(); createTableResponse.setLocation("/path/to/table"); createTableResponse.setStorageOptions(ImmutableMap.of("key", "value")); when(tableOps.createEmptyTable(any(), any(), any(), any())).thenReturn(createTableResponse); - CreateEmptyTableRequest tableRequest = new CreateEmptyTableRequest(); + DeclareTableRequest tableRequest = new DeclareTableRequest(); tableRequest.setLocation("/path/to/table"); Response resp = @@ -465,7 +464,7 @@ void testCreateEmptyTable() { Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); - CreateEmptyTableResponse response = resp.readEntity(CreateEmptyTableResponse.class); + DeclareTableResponse response = resp.readEntity(DeclareTableResponse.class); Assertions.assertEquals(createTableResponse.getLocation(), response.getLocation()); Assertions.assertEquals(createTableResponse.getStorageOptions(), response.getStorageOptions()); From e074cb138dfcf3ec8ddcfdc5c68ada655954e660 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 09:24:41 +0800 Subject: [PATCH 02/21] fix(lance): fix CatalogGenericCatalogLanceIT for lance 4.0.1 API changes - Transaction -> SourcedTransaction (variable type) - writeParams(Map) -> transactionProperties(Map) on SourcedTransaction.Builder - dataset.commitTransaction(trans) -> trans.commit() Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../integration/test/CatalogGenericCatalogLanceIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java index 4a39f18ccff..f5a98885030 100644 --- a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java +++ b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java @@ -82,7 +82,7 @@ import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; -import org.lance.Transaction; +import org.lance.SourcedTransaction; import org.lance.WriteParams; import org.lance.ipc.LanceScanner; import org.lance.ipc.ScanOptions; @@ -374,7 +374,7 @@ void testLanceTableFormat() { } // Now try to write some data to the dataset - Transaction trans = + SourcedTransaction trans = dataset .newTransactionBuilder() .operation( @@ -388,10 +388,10 @@ void testLanceTableFormat() { new LanceDataValue(3, 300L, "third")), lanceSchema)) .build()) - .writeParams(ImmutableMap.of()) + .transactionProperties(ImmutableMap.of()) .build(); - Dataset newDataset = dataset.commitTransaction(trans); + Dataset newDataset = trans.commit(); try (LanceScanner scanner = newDataset.newScan( new ScanOptions.Builder() From 76857ee17d096b210a997abd2eb613aaa5e2f696 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 19:55:43 +0800 Subject: [PATCH 03/21] fix --- clients/client-python/build.gradle.kts | 46 +++ .../scripts/run_lance_ray_matrix.py | 278 ++++++++++++++++++ .../tests/integration/test_lance_ray.py | 221 ++++++++++++++ docs/lance-rest-integration.md | 34 ++- lance/lance-rest-server/build.gradle.kts | 121 +++++++- 5 files changed, 680 insertions(+), 20 deletions(-) create mode 100644 clients/client-python/scripts/run_lance_ray_matrix.py create mode 100644 clients/client-python/tests/integration/test_lance_ray.py diff --git a/clients/client-python/build.gradle.kts b/clients/client-python/build.gradle.kts index 8c80ae04d2e..da2007a7c3d 100644 --- a/clients/client-python/build.gradle.kts +++ b/clients/client-python/build.gradle.kts @@ -239,6 +239,52 @@ tasks { finalizedBy(unitCoverageReport) } + // Run tests/integration/test_lance_ray.py against multiple lance-ray + // versions. Each version is exercised inside its own venv under + // build/lance-ray-matrix/.venv-/ (cached across runs). + // Override the matrix with `-PlanceRayVersions=0.4.2,0.4.1,0.4.0`. + register("lanceRayMatrixTest") { + group = "verification" + description = + "Run tests/integration/test_lance_ray.py against multiple lance-ray " + + "versions. Override with -PlanceRayVersions= (default: " + + "tracks docs/lance-rest-integration.md Compatibility Matrix)." + + val versions = project.findProperty("lanceRayVersions") as? String + val keepGoing = project.hasProperty("lanceRayKeepGoing") + val script = projectDir.resolve("scripts/run_lance_ray_matrix.py") + val gravitinoHome = file("${project.rootDir}/distribution/package") + + doFirst { + gravitinoServer("start") + } + doLast { + try { + val args = mutableListOf( + "python3", + script.absolutePath, + "--gravitino-home", + gravitinoHome.absolutePath, + ) + if (!versions.isNullOrBlank()) { + args += listOf("--versions", versions) + } + if (keepGoing) { + args += "--keep-going" + } + val proc = ProcessBuilder(args) + .inheritIO() + .start() + val exit = proc.waitFor() + if (exit != 0) { + throw GradleException("lance-ray matrix failed with exit code $exit") + } + } finally { + gravitinoServer("stop") + } + } + } + register("test", VenvTask::class) { val skipUTs = project.hasProperty("skipTests") val skipITs = project.hasProperty("skipITs") diff --git a/clients/client-python/scripts/run_lance_ray_matrix.py b/clients/client-python/scripts/run_lance_ray_matrix.py new file mode 100644 index 00000000000..78e25695d6a --- /dev/null +++ b/clients/client-python/scripts/run_lance_ray_matrix.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Run ``tests/integration/test_lance_ray.py`` against multiple lance-ray +versions to validate the supported range advertised in the Compatibility +Matrix (``docs/lance-rest-integration.md``). + +For each version we provision a dedicated venv under +``clients/client-python/build/lance-ray-matrix/.venv-/`` and install +``ray``, ``lance-ray==``, ``lance-namespace``, ``requests``, plus the +in-tree ``apache-gravitino`` distribution (editable). The unittest itself is +launched per-version with ``python -m unittest -v +tests.integration.test_lance_ray``; results are collected into a pass/fail +table at the end. + +The caller is responsible for starting the Gravitino server (with the +auxiliary lance-rest service enabled). The Gradle wrapper task +``:clients:client-python:lanceRayMatrixTest`` handles that. For ad-hoc local +use:: + + distribution/package/bin/gravitino.sh start + python3 clients/client-python/scripts/run_lance_ray_matrix.py \ + --versions 0.4.2,0.4.1 \ + --gravitino-home distribution/package + distribution/package/bin/gravitino.sh stop + +Each test class will append its own metalake binding to ``gravitino.conf`` and +restart the server itself, so back-to-back runs across versions are safe. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import List + +# Default version set tracks what the Compatibility Matrix in +# docs/lance-rest-integration.md claims to support. Keep these in sync. +DEFAULT_VERSIONS = ["0.4.2", "0.4.1", "0.4.0", "0.3.0"] + + +REPO_ROOT = Path(__file__).resolve().parents[3] +PYTHON_CLIENT_DIR = REPO_ROOT / "clients" / "client-python" +DEFAULT_MATRIX_DIR = PYTHON_CLIENT_DIR / "build" / "lance-ray-matrix" +DEFAULT_GRAVITINO_HOME = REPO_ROOT / "distribution" / "package" + + +@dataclass +class VersionResult: + version: str + status: str # "ok", "fail", "setup-error" + details: str + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument( + "--versions", + default=",".join(DEFAULT_VERSIONS), + help="Comma-separated list of lance-ray versions to test. " + f"Default: {','.join(DEFAULT_VERSIONS)}", + ) + p.add_argument( + "--python", + default=sys.executable, + help="Path to the host python interpreter used to bootstrap each " + "version's venv. Default: %(default)s", + ) + p.add_argument( + "--matrix-dir", + default=str(DEFAULT_MATRIX_DIR), + help="Directory under which per-version venvs are created and " + "cached across runs. Default: %(default)s", + ) + p.add_argument( + "--gravitino-home", + default=str(DEFAULT_GRAVITINO_HOME), + help="Path to the built Gravitino distribution package. The " + "lance-rest aux service must be enabled there. Default: %(default)s", + ) + p.add_argument( + "--ray-spec", + default="ray", + help="Pip spec for ray. Pin it (e.g. 'ray==2.55.1') if you need " + "reproducibility. Default: pip picks a compatible version per " + "lance-ray.", + ) + p.add_argument( + "--lance-namespace-spec", + default="lance-namespace", + help="Pip spec for lance-namespace. Default: latest.", + ) + p.add_argument( + "--keep-going", + action="store_true", + help="Continue to the next version after a failure instead of " + "stopping on the first failed run.", + ) + return p.parse_args() + + +def run(cmd: List[str], **kwargs) -> subprocess.CompletedProcess: + print(f"[matrix] $ {' '.join(cmd)}") + return subprocess.run(cmd, check=False, **kwargs) + + +def ensure_venv(python: str, venv_dir: Path) -> Path: + """Create the venv if it doesn't already exist. Returns the venv python path.""" + venv_python = venv_dir / "bin" / "python" + if not venv_python.exists(): + venv_dir.parent.mkdir(parents=True, exist_ok=True) + rc = run([python, "-m", "venv", str(venv_dir)]).returncode + if rc != 0: + raise RuntimeError(f"Failed to create venv at {venv_dir}") + return venv_python + + +def install_deps( + venv_python: Path, + version: str, + ray_spec: str, + lance_namespace_spec: str, +) -> None: + rc = run( + [ + str(venv_python), + "-m", + "pip", + "install", + "--upgrade", + "pip", + "wheel", + ] + ).returncode + if rc != 0: + raise RuntimeError("pip upgrade failed in venv") + + rc = run( + [ + str(venv_python), + "-m", + "pip", + "install", + ray_spec, + f"lance-ray=={version}", + lance_namespace_spec, + "requests", + ] + ).returncode + if rc != 0: + raise RuntimeError(f"Failed to install lance-ray=={version} deps") + + rc = run( + [ + str(venv_python), + "-m", + "pip", + "install", + "-e", + str(PYTHON_CLIENT_DIR), + ] + ).returncode + if rc != 0: + raise RuntimeError("Failed to install apache-gravitino in editable mode") + + +def generate_version_ini(venv_python: Path) -> None: + # The python client reads gravitino/version.ini at runtime. It is + # produced by scripts/generate_version.py and is gitignored, so we + # regenerate it here to make the matrix runnable on fresh checkouts. + script = PYTHON_CLIENT_DIR / "scripts" / "generate_version.py" + rc = run( + [str(venv_python), str(script)], + cwd=str(PYTHON_CLIENT_DIR), + ).returncode + if rc != 0: + raise RuntimeError("Failed to generate version.ini for python client") + + +def run_unittest(venv_python: Path, gravitino_home: Path) -> int: + env = os.environ.copy() + env["PYTHONPATH"] = str(PYTHON_CLIENT_DIR) + env["GRAVITINO_HOME"] = str(gravitino_home) + env["START_EXTERNAL_GRAVITINO"] = "true" + cmd = [ + str(venv_python), + "-m", + "unittest", + "-v", + "tests.integration.test_lance_ray", + ] + print(f"[matrix] $ PYTHONPATH=... GRAVITINO_HOME=... {' '.join(cmd)}") + return subprocess.run(cmd, cwd=str(PYTHON_CLIENT_DIR), env=env).returncode + + +def main() -> int: + args = parse_args() + versions = [v.strip() for v in args.versions.split(",") if v.strip()] + if not versions: + print("--versions must contain at least one entry", file=sys.stderr) + return 2 + + matrix_dir = Path(args.matrix_dir).resolve() + gravitino_home = Path(args.gravitino_home).resolve() + if not (gravitino_home / "bin" / "gravitino.sh").exists(): + print( + f"GRAVITINO_HOME={gravitino_home} does not look like a " + "Gravitino distribution package (missing bin/gravitino.sh). " + "Run `./gradlew compileDistribution -PskipWeb=true -x test` first.", + file=sys.stderr, + ) + return 2 + + results: List[VersionResult] = [] + for version in versions: + print(f"\n========== lance-ray=={version} ==========") + venv_dir = matrix_dir / f".venv-{version}" + try: + venv_python = ensure_venv(args.python, venv_dir) + install_deps( + venv_python, + version, + args.ray_spec, + args.lance_namespace_spec, + ) + generate_version_ini(venv_python) + except Exception as e: # pylint: disable=broad-exception-caught + print(f"[matrix] {version}: setup failed: {e}", file=sys.stderr) + results.append(VersionResult(version, "setup-error", str(e))) + if not args.keep_going: + break + continue + + rc = run_unittest(venv_python, gravitino_home) + if rc == 0: + print(f"[matrix] {version}: PASS") + results.append(VersionResult(version, "ok", "tests passed")) + else: + print(f"[matrix] {version}: FAIL (exit={rc})") + results.append( + VersionResult(version, "fail", f"unittest exit {rc}") + ) + if not args.keep_going: + break + + print("\n========== summary ==========") + width = max(len(r.version) for r in results) if results else 0 + for r in results: + print(f" lance-ray=={r.version.ljust(width)} {r.status:11s} {r.details}") + + any_fail = any(r.status != "ok" for r in results) + return 1 if any_fail else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py new file mode 100644 index 00000000000..9e7833fdeb2 --- /dev/null +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -0,0 +1,221 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +import os +import shutil +import tempfile +import time +import unittest +from random import randint +from typing import Optional + +import requests + +from gravitino import ( + Catalog, + GravitinoAdminClient, + GravitinoClient, +) +from tests.integration.integration_test_env import IntegrationTestEnv + +logger = logging.getLogger(__name__) + +LANCE_REST_PORT = 9101 +LANCE_REST_BASE_URL = f"http://localhost:{LANCE_REST_PORT}/lance" + +# The Lance REST server runs as an auxiliary service inside the main +# Gravitino process (gravitino.auxService.names = ...,lance-rest), so its +# bind metalake is configured in the *main* gravitino.conf rather than the +# standalone lance-rest conf file. +MAIN_CONF_FILE = "conf/gravitino.conf" +LANCE_REST_METALAKE_KEY = "gravitino.lance-rest.gravitino-metalake" + + +def _missing_lance_ray_deps() -> Optional[str]: + missing = [] + for mod in ("ray", "lance_ray", "lance_namespace"): + try: + __import__(mod) + except ImportError: + missing.append(mod) + return ", ".join(missing) if missing else None + + +@unittest.skipIf( + _missing_lance_ray_deps() is not None, + f"lance-ray test deps not installed: {_missing_lance_ray_deps()}. " + "Install with: pip install ray lance-ray lance-namespace", +) +class TestLanceRayIntegration(IntegrationTestEnv): + """End-to-end test for the lance-ray Python client against a Gravitino-backed + Lance REST namespace. Mirrors the ``ray.data`` -> ``write_lance`` -> + ``read_lance`` flow from the upstream lance-ray docs. + """ + + METALAKE_NAME: str = "lance_ray_metalake_" + str(randint(1, 100000)) + CATALOG_NAME: str = "lance_catalog" + SCHEMA_NAME: str = "schema" + TABLE_NAME: str = "lance_ray_tbl_" + str(randint(1, 100000)) + + gravitino_admin_client: Optional[GravitinoAdminClient] = None + gravitino_client: Optional[GravitinoClient] = None + temp_dir: Optional[str] = None + main_conf_path: Optional[str] = None + + @classmethod + def setUpClass(cls): + super().setUpClass() + + gravitino_home = os.environ.get("GRAVITINO_HOME") + if not gravitino_home: + raise RuntimeError( + "GRAVITINO_HOME must be set to the distribution package directory" + ) + cls.main_conf_path = os.path.join(gravitino_home, MAIN_CONF_FILE) + + # Bind the lance-rest aux service to our test metalake and restart + # so the change takes effect. ``IntegrationTestEnv.restart_server`` + # works whether ``START_EXTERNAL_GRAVITINO`` was used or not. + cls._append_conf( + {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, cls.main_conf_path + ) + cls.restart_server() + if not cls._wait_for_lance_rest_ready(): + raise RuntimeError( + "Lance REST aux service did not become ready in time at " + + LANCE_REST_BASE_URL + ) + + cls.gravitino_admin_client = GravitinoAdminClient("http://localhost:8090") + cls.gravitino_admin_client.create_metalake( + cls.METALAKE_NAME, + comment="lance-ray IT metalake", + properties={}, + ) + cls.gravitino_client = GravitinoClient( + uri="http://localhost:8090", metalake_name=cls.METALAKE_NAME + ) + cls.temp_dir = tempfile.mkdtemp(prefix="lance_ray_it_") + cls.gravitino_client.create_catalog( + name=cls.CATALOG_NAME, + catalog_type=Catalog.Type.RELATIONAL, + provider="lakehouse-generic", + comment="lance-ray IT catalog", + properties={"location": cls.temp_dir}, + ) + catalog = cls.gravitino_client.load_catalog(cls.CATALOG_NAME) + catalog.as_schemas().create_schema( + schema_name=cls.SCHEMA_NAME, + comment="lance-ray IT schema", + properties={}, + ) + + @classmethod + def tearDownClass(cls): + failures = [] + + try: + if cls.gravitino_client is not None: + cls.gravitino_client.drop_catalog(name=cls.CATALOG_NAME, force=True) + except Exception as e: # pylint: disable=broad-exception-caught + failures.append(("drop catalog", e)) + + try: + if cls.gravitino_admin_client is not None: + cls.gravitino_admin_client.drop_metalake( + name=cls.METALAKE_NAME, force=True + ) + except Exception as e: # pylint: disable=broad-exception-caught + failures.append(("drop metalake", e)) + + try: + if cls.main_conf_path is not None: + cls._reset_conf( + {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, + cls.main_conf_path, + ) + except Exception as e: # pylint: disable=broad-exception-caught + failures.append(("reset lance-rest aux conf", e)) + + try: + if cls.temp_dir and os.path.exists(cls.temp_dir): + shutil.rmtree(cls.temp_dir, ignore_errors=True) + except Exception as e: # pylint: disable=broad-exception-caught + failures.append(("remove temp dir", e)) + + for step, err in failures: + logger.warning("Cleanup step %s failed: %s", step, err) + + super().tearDownClass() + + @staticmethod + def _wait_for_lance_rest_ready(timeout_s: float = 60.0) -> bool: + # Probe any registered Jersey path on the lance servlet. A 4xx + # response is fine; what we want is *any* response from the lance + # mount instead of connection refused or the bare Jetty 404. + deadline = time.monotonic() + timeout_s + url = LANCE_REST_BASE_URL + "/v1/namespace/list" + while time.monotonic() < deadline: + try: + resp = requests.get(url, timeout=2) + if resp.status_code < 500: + return True + except requests.RequestException: + pass + time.sleep(0.5) + return False + + def test_write_read_filter_via_lance_ray(self): + # Imports are deferred so the skipIf decorator handles missing deps + # cleanly without import errors at module load time. + import ray + from lance_ray import read_lance, write_lance + + ns_properties = {"uri": LANCE_REST_BASE_URL} + table_id = [self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME] + + ray.init( + ignore_reinit_error=True, + num_cpus=2, + include_dashboard=False, + log_to_driver=False, + ) + try: + data = ray.data.range(1000).map( + lambda row: {"id": row["id"], "value": row["id"] * 2} + ) + + write_lance( + data, + namespace_impl="rest", + namespace_properties=ns_properties, + table_id=table_id, + ) + + ray_dataset = read_lance( + namespace_impl="rest", + namespace_properties=ns_properties, + table_id=table_id, + ) + # value = id * 2, value < 100 => id in [0, 49] => 50 rows + filtered_count = ray_dataset.filter( + lambda row: row["value"] < 100 + ).count() + self.assertEqual(50, filtered_count) + finally: + ray.shutdown() diff --git a/docs/lance-rest-integration.md b/docs/lance-rest-integration.md index 66fbaff4f37..b213c1a35c2 100644 --- a/docs/lance-rest-integration.md +++ b/docs/lance-rest-integration.md @@ -22,16 +22,42 @@ The following table outlines the tested compatibility between Gravitino versions | Gravitino Version (Lance REST) | Supported lance-spark Versions | Supported lance-ray Versions | |--------------------------------|--------------------------------|------------------------------| -| 1.1.1 - 1.2.1 | 0.0.10 - 0.0.15 | 0.0.6 - 0.0.8 | -| 1.3.0 | 0.1.0 - 0.2.0 | 0.0.6 - 0.2.0 | +| 1.1.1 - 1.2.1 | 0.0.10 - 0.0.15 | 0.0.6 - 0.0.8 | +| 1.3.0 | 0.1.0 - 0.4.0 | 0.3.0 - 0.4.2 | :::note - These version ranges show which versions are expected to work together. -- Not all versions in these ranges have been tested. Only some versions were tested. +- For Gravitino 1.3.0, the explicitly verified release versions are + `lance-spark` {0.1.0, 0.1.1, 0.2.0, 0.4.0} and `lance-ray` + {0.3.0, 0.4.2}. lance-ray 0.2.0 and earlier are *not* supported on 1.3.0 + because pip resolves them with an older `lance-namespace` whose request + schema is incompatible with the upgraded server-side `lance-namespace-core` + (0.7.5+). - Before using in production, please test the exact connector versions in your own environment. - The Lance ecosystem is changing quickly, so some versions may introduce breaking changes. ::: +#### Reproducing the matrix locally + +Both connectors ship with a multi-version integration test driver so the +matrix can be re-verified (and extended) without ad-hoc scripting: + +```bash +# lance-spark — runs LanceSparkRESTServiceIT once per bundle version +./gradlew :lance:lance-rest-server:lanceSparkMatrixTest \ + -PlanceSparkBundleVersions=0.1.0,0.1.1,0.2.0,0.4.0 \ + -PskipDockerTests=true +# Per-version JUnit reports land under +# lance/lance-rest-server/build/reports/lance-spark-matrix//. + +# lance-ray — provisions a venv per version under +# clients/client-python/build/lance-ray-matrix/.venv-/ and runs +# tests/integration/test_lance_ray.py against each. The Gradle wrapper +# below starts / stops Gravitino automatically. +./gradlew :clients:client-python:lanceRayMatrixTest \ + -PlanceRayVersions=0.4.2,0.3.0 +``` + ### Why Maintain a Compatibility Matrix? The Lance ecosystem is under active development, with frequent updates to APIs and features. Gravitino's Lance REST service depends on specific connector behaviors to ensure reliable operation. Using incompatible versions may result in: @@ -79,7 +105,7 @@ logging.basicConfig(level=logging.INFO) # Replace /path/to/lance-spark-bundle-3.5_2.12-X.X.XX.jar with your actual JAR path and version; # refer to the compatibility matrix for supported lance-spark versions. os.environ["PYSPARK_SUBMIT_ARGS"] = ( - "--jars /path/to/lance-spark-bundle-3.5_2.12-0.0.15.jar " + "--jars /path/to/lance-spark-bundle-3.5_2.12-0.4.0.jar " "--conf \"spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" " "--conf \"spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED\" " "--master local[1] pyspark-shell" diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts index 0198099df51..8d159c7749d 100644 --- a/lance/lance-rest-server/build.gradle.kts +++ b/lance/lance-rest-server/build.gradle.kts @@ -28,13 +28,34 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extra["defaultScalaVersion"].toString() val sparkVersion: String = libs.versions.spark35.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() -val lanceSparkBundleVersion = "0.2.0" +// Comma-separated list of lance-spark-bundle versions to test against. +// The default is the latest supported version; the integration test matrix +// (`:lance:lance-rest-server:lanceSparkMatrixTest`) covers every version in +// this list. Override via `-PlanceSparkBundleVersions=0.2.0,0.3.0,0.4.0`. +val lanceSparkBundleVersions: List = + ((project.properties["lanceSparkBundleVersions"] as? String) ?: "0.4.0") + .split(",").map { it.trim() }.filter { it.isNotEmpty() } +if (lanceSparkBundleVersions.isEmpty()) { + throw GradleException("lanceSparkBundleVersions must contain at least one version") +} +val primaryLanceSparkBundleVersion: String = lanceSparkBundleVersions.first() val lanceSparkBundleJarPathProperty = "gravitino.lance.spark.bundle.jar" -val lanceSparkBundleDir = layout.buildDirectory.dir("lance-spark-bundle") -val lanceSparkBundle by configurations.creating { - isCanBeConsumed = false - isCanBeResolved = true - isTransitive = false + +fun lanceSparkBundleConfigName(version: String): String = + "lanceSparkBundle_" + version.replace(".", "_").replace("-", "_") +fun lanceSparkBundleDirFor(version: String) = + layout.buildDirectory.dir("lance-spark-bundle/$version") +fun lanceSparkPrepareTaskName(version: String): String = + "prepareLanceSparkBundle_" + version.replace(".", "_").replace("-", "_") +fun lanceSparkTestTaskName(version: String): String = + "testLanceSparkBundle_" + version.replace(".", "_").replace("-", "_") + +lanceSparkBundleVersions.forEach { version -> + configurations.create(lanceSparkBundleConfigName(version)) { + isCanBeConsumed = false + isCanBeResolved = true + isTransitive = false + } } dependencies { @@ -80,10 +101,12 @@ dependencies { testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(libs.lance) - add( - lanceSparkBundle.name, - "org.lance:lance-spark-bundle-3.5_2.12:$lanceSparkBundleVersion" - ) + lanceSparkBundleVersions.forEach { version -> + add( + lanceSparkBundleConfigName(version), + "org.lance:lance-spark-bundle-3.5_2.12:$version" + ) + } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { @@ -119,10 +142,17 @@ tasks { from(configurations.runtimeClasspath) into("build/libs") } - val prepareLanceSparkBundle by registering(Sync::class) { - from(lanceSparkBundle) - into(lanceSparkBundleDir) + // One Sync task per lance-spark-bundle version. Each task lays down its + // bundle jar under build/lance-spark-bundle// so per-version Test + // tasks pick up the right jar without colliding. + lanceSparkBundleVersions.forEach { version -> + register(lanceSparkPrepareTaskName(version)) { + from(configurations.getByName(lanceSparkBundleConfigName(version))) + into(lanceSparkBundleDirFor(version)) + } } + val primaryPrepareLanceSparkBundle = + named(lanceSparkPrepareTaskName(primaryLanceSparkBundleVersion)) jar { finalizedBy(copyDepends) @@ -153,13 +183,14 @@ tasks { } test { - dependsOn(prepareLanceSparkBundle) + dependsOn(primaryPrepareLanceSparkBundle) + val primaryBundleDir = lanceSparkBundleDirFor(primaryLanceSparkBundleVersion) doFirst { val bundleJar = - lanceSparkBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } + primaryBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } ?: throw GradleException( - "Expected exactly one Lance Spark bundle jar in ${lanceSparkBundleDir.get().asFile}" + "Expected exactly one Lance Spark bundle jar in ${primaryBundleDir.get().asFile}" ) systemProperty(lanceSparkBundleJarPathProperty, bundleJar.absolutePath) } @@ -169,6 +200,64 @@ tasks { dependsOn(":catalogs:catalog-lakehouse-generic:jar") } } + + // Per-version Test task that only runs LanceSparkRESTServiceIT against a + // specific lance-spark-bundle. Each task downloads its bundle through the + // matching Sync task and points the IT JVM at it via system property. + lanceSparkBundleVersions.forEach { version -> + register(lanceSparkTestTaskName(version)) { + group = "verification" + description = + "Run LanceSparkRESTServiceIT against lance-spark-bundle $version" + + dependsOn(named(lanceSparkPrepareTaskName(version))) + dependsOn(named("jar")) + val versionTestMode = project.properties["testMode"] as? String ?: "embedded" + if (versionTestMode == "embedded") { + dependsOn(":catalogs:catalog-lakehouse-generic:jar") + } + + testClassesDirs = sourceSets["test"].output.classesDirs + classpath = sourceSets["test"].runtimeClasspath + useJUnitPlatform() + filter { includeTestsMatching("*LanceSparkRESTServiceIT*") } + + val versionBundleDir = lanceSparkBundleDirFor(version) + doFirst { + val bundleJar = + versionBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } + ?: throw GradleException( + "Expected exactly one Lance Spark bundle jar in " + + "${versionBundleDir.get().asFile} for version $version" + ) + systemProperty(lanceSparkBundleJarPathProperty, bundleJar.absolutePath) + println("[lance-spark-matrix] running IT against bundle $version -> ${bundleJar.name}") + } + + // Send per-version reports to a separate directory so a matrix run + // doesn't overwrite results across versions. + val versionSlug = version.replace(".", "_").replace("-", "_") + reports { + html.outputLocation.set( + layout.buildDirectory.dir("reports/lance-spark-matrix/$versionSlug") + ) + junitXml.outputLocation.set( + layout.buildDirectory.dir("test-results/lance-spark-matrix/$versionSlug") + ) + } + } + } + + register("lanceSparkMatrixTest") { + group = "verification" + description = + "Run LanceSparkRESTServiceIT against every version in -PlanceSparkBundleVersions " + + "(default: $primaryLanceSparkBundleVersion). Reports land under " + + "build/reports/lance-spark-matrix//." + dependsOn( + lanceSparkBundleVersions.map { named(lanceSparkTestTaskName(it)) } + ) + } } tasks.test { From 6cb2f908fbdbe3cdc217ccb7d5ee1b05455677a1 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 20:22:07 +0800 Subject: [PATCH 04/21] fix --- clients/client-python/requirements-dev.txt | 8 ++ .../tests/integration/test_lance_ray.py | 98 ++++++++++++------- 2 files changed, 73 insertions(+), 33 deletions(-) diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index e4335e2a98a..0899859abe4 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -33,3 +33,11 @@ jwcrypto==1.5.6 sphinx==7.1.2 furo==2024.8.6 banks==2.4.1 + +# Lance integration deps. Pinned so the default integration test runs against +# a single, known-good (server-side `lance-namespace-core` 0.7.5+) combination. +# The multi-version matrix (`:clients:client-python:lanceRayMatrixTest`) keeps +# its own per-version venvs and does not consume these pins. +ray==2.55.1 +lance-ray==0.4.2 +lance-namespace==0.7.6 diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index 9e7833fdeb2..0910516a879 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -67,7 +67,12 @@ class TestLanceRayIntegration(IntegrationTestEnv): ``read_lance`` flow from the upstream lance-ray docs. """ - METALAKE_NAME: str = "lance_ray_metalake_" + str(randint(1, 100000)) + # Metalake name is fixed (not randomized) so back-to-back runs in the + # same Gravitino process can detect that the lance-rest aux service is + # already bound and skip the costly server restart. The per-test table + # name still gets a random suffix to keep individual test methods + # isolated. + METALAKE_NAME: str = "lance_ray_test_metalake" CATALOG_NAME: str = "lance_catalog" SCHEMA_NAME: str = "schema" TABLE_NAME: str = "lance_ray_tbl_" + str(randint(1, 100000)) @@ -88,13 +93,16 @@ def setUpClass(cls): ) cls.main_conf_path = os.path.join(gravitino_home, MAIN_CONF_FILE) - # Bind the lance-rest aux service to our test metalake and restart - # so the change takes effect. ``IntegrationTestEnv.restart_server`` - # works whether ``START_EXTERNAL_GRAVITINO`` was used or not. - cls._append_conf( - {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, cls.main_conf_path - ) - cls.restart_server() + # Bind the lance-rest aux service to our test metalake. If the same + # binding is already present (e.g. an earlier run in the same Gradle + # session left it there), skip the conf write and the restart. This + # avoids restarting Gravitino in the middle of the IT suite when the + # test class is replayed, which would briefly disrupt other ITs. + if not cls._lance_metalake_already_bound(): + cls._append_conf( + {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, cls.main_conf_path + ) + cls.restart_server() if not cls._wait_for_lance_rest_ready(): raise RuntimeError( "Lance REST aux service did not become ready in time at " @@ -102,28 +110,43 @@ def setUpClass(cls): ) cls.gravitino_admin_client = GravitinoAdminClient("http://localhost:8090") - cls.gravitino_admin_client.create_metalake( - cls.METALAKE_NAME, - comment="lance-ray IT metalake", - properties={}, - ) + # Idempotent: tolerate a metalake left over from a prior failed run. + try: + cls.gravitino_admin_client.create_metalake( + cls.METALAKE_NAME, + comment="lance-ray IT metalake", + properties={}, + ) + except Exception as e: # pylint: disable=broad-exception-caught + if "already exists" not in str(e).lower(): + raise + logger.info("Metalake %s already exists, reusing", cls.METALAKE_NAME) cls.gravitino_client = GravitinoClient( uri="http://localhost:8090", metalake_name=cls.METALAKE_NAME ) cls.temp_dir = tempfile.mkdtemp(prefix="lance_ray_it_") - cls.gravitino_client.create_catalog( - name=cls.CATALOG_NAME, - catalog_type=Catalog.Type.RELATIONAL, - provider="lakehouse-generic", - comment="lance-ray IT catalog", - properties={"location": cls.temp_dir}, - ) + # Idempotent catalog + schema creation too. + try: + cls.gravitino_client.create_catalog( + name=cls.CATALOG_NAME, + catalog_type=Catalog.Type.RELATIONAL, + provider="lakehouse-generic", + comment="lance-ray IT catalog", + properties={"location": cls.temp_dir}, + ) + except Exception as e: # pylint: disable=broad-exception-caught + if "already exists" not in str(e).lower(): + raise catalog = cls.gravitino_client.load_catalog(cls.CATALOG_NAME) - catalog.as_schemas().create_schema( - schema_name=cls.SCHEMA_NAME, - comment="lance-ray IT schema", - properties={}, - ) + try: + catalog.as_schemas().create_schema( + schema_name=cls.SCHEMA_NAME, + comment="lance-ray IT schema", + properties={}, + ) + except Exception as e: # pylint: disable=broad-exception-caught + if "already exists" not in str(e).lower(): + raise @classmethod def tearDownClass(cls): @@ -143,14 +166,12 @@ def tearDownClass(cls): except Exception as e: # pylint: disable=broad-exception-caught failures.append(("drop metalake", e)) - try: - if cls.main_conf_path is not None: - cls._reset_conf( - {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, - cls.main_conf_path, - ) - except Exception as e: # pylint: disable=broad-exception-caught - failures.append(("reset lance-rest aux conf", e)) + # Intentionally do NOT reset the lance-rest metalake binding in + # gravitino.conf. Removing it would force the next setUpClass to + # restart the server, which is disruptive when this IT is replayed + # back-to-back or alongside other ITs in the same Gradle invocation. + # The conf line is regenerated by `compileDistribution`, so it does + # not survive a fresh distribution build. try: if cls.temp_dir and os.path.exists(cls.temp_dir): @@ -163,6 +184,17 @@ def tearDownClass(cls): super().tearDownClass() + @classmethod + def _lance_metalake_already_bound(cls) -> bool: + if cls.main_conf_path is None or not os.path.exists(cls.main_conf_path): + return False + needle = f"{LANCE_REST_METALAKE_KEY} = {cls.METALAKE_NAME}" + with open(cls.main_conf_path, encoding="utf-8") as f: + for line in f: + if line.strip() == needle: + return True + return False + @staticmethod def _wait_for_lance_rest_ready(timeout_s: float = 60.0) -> bool: # Probe any registered Jersey path on the lance servlet. A 4xx From 5c5b5db33f965731adf129355fe913050f991c29 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 20:02:22 +0800 Subject: [PATCH 05/21] fix(lance): address review comments --- .../LancePartitionStatisticStorage.java | 25 ++++++++-------- .../TestLancePartitionStatisticStorage.java | 30 +++++++++++++++++++ .../rest/TestLanceNamespaceOperations.java | 3 +- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java index 73e24e3a4f8..d3f3d426f37 100644 --- a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java +++ b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java @@ -204,15 +204,12 @@ public LancePartitionStatisticStorage(Map properties) { Caffeine.newBuilder() .maximumSize(datasetCacheSize) .scheduler(Scheduler.forScheduledExecutorService(this.scheduler)) - .evictionListener( + .executor(Runnable::run) + .removalListener( (RemovalListener) (key, value, cause) -> { if (value != null) { - try { - value.close(); - } catch (IOException e) { - LOG.warn("Failed to close evicted Lance dataset", e); - } + closeDatasetHolder(value); } }) .build()); @@ -361,14 +358,8 @@ private void dropStatisticsImpl(Long tableId, List drop public void close() throws IOException { if (datasetCache.isPresent()) { Cache cache = datasetCache.get(); - for (DatasetHolder holder : cache.asMap().values()) { - try { - holder.close(); - } catch (Exception e) { - LOG.warn("Failed to close cached Lance dataset", e); - } - } cache.invalidateAll(); + cache.cleanUp(); } if (allocator != null) { @@ -615,6 +606,14 @@ private ThreadFactory newDaemonThreadFactory(String name) { return new ThreadFactoryBuilder().setDaemon(true).setNameFormat(name + "-%d").build(); } + private static void closeDatasetHolder(DatasetHolder holder) { + try { + holder.close(); + } catch (IOException | RuntimeException e) { + LOG.warn("Failed to close cached Lance dataset", e); + } + } + /** * Package-private wrapper around a {@link Dataset} stored in the dataset cache. Exists solely to * allow test code to mock this holder (and thus verify close-ordering) without requiring Mockito diff --git a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java index 5a31847e3e4..3a1ab82cf82 100644 --- a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java +++ b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.github.benmanes.caffeine.cache.Cache; @@ -652,4 +653,33 @@ public void testCloseReleasesCachedDatasetBeforeAllocator() throws Exception { FileUtils.deleteDirectory(new File(location)); } } + + @Test + public void testDatasetCacheClosesPreviousHolderOnReplacement() throws Exception { + String location = Files.createTempDirectory("lance_stats_replace_cache").toString(); + Map properties = Maps.newHashMap(); + properties.put("location", location); + properties.put("datasetCacheSize", "10"); + + LancePartitionStatisticStorage storage = new LancePartitionStatisticStorage(properties); + + try { + Cache datasetCache = storage.getDatasetCache(); + Assertions.assertNotNull(datasetCache); + + DatasetHolder previousHolder = mock(DatasetHolder.class); + DatasetHolder newHolder = mock(DatasetHolder.class); + + datasetCache.put(1L, previousHolder); + datasetCache.put(1L, newHolder); + + verify(previousHolder).close(); + + storage.close(); + + verify(newHolder).close(); + } finally { + FileUtils.deleteDirectory(new File(location)); + } + } } diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java index c7c4e423ec7..9f6d355e9f8 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java @@ -78,6 +78,7 @@ import org.lance.namespace.model.RegisterTableRequest; import org.lance.namespace.model.RegisterTableResponse; import org.mockito.Mockito; +import org.openapitools.jackson.nullable.JsonNullableModule; @SuppressWarnings("deprecation") public class TestLanceNamespaceOperations extends JerseyTest { @@ -113,7 +114,7 @@ protected Application configure() { resourceConfig.property(CommonProperties.FEATURE_AUTO_DISCOVERY_DISABLE, true); resourceConfig.property(CommonProperties.MOXY_JSON_FEATURE_DISABLE, true); ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new org.openapitools.jackson.nullable.JsonNullableModule()); + mapper.registerModule(new JsonNullableModule()); JacksonJaxbJsonProvider provider = new JacksonJaxbJsonProvider(); provider.setMapper(mapper); resourceConfig.register(provider); From 21a1c430dad2345dd4b47699f5e18d93b3434709 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 21:10:37 +0800 Subject: [PATCH 06/21] fix --- .../tests/integration/test_lance_ray.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index 0910516a879..d7c37cd949a 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -109,6 +109,16 @@ def setUpClass(cls): + LANCE_REST_BASE_URL ) + # Probe whether the server-side `lance-namespace-core` is new enough + # to deserialize requests from the installed PyPI `lance-namespace`. + # We skip cleanly (rather than fail) when the server is older — this + # happens on branches that haven't merged the `lance-namespace-core` + # upgrade yet. The probe runs *before* metalake/catalog/schema setup + # so a skipped run leaves no fixtures behind. + skip_reason = cls._check_lance_namespace_compat() + if skip_reason is not None: + raise unittest.SkipTest(skip_reason) + cls.gravitino_admin_client = GravitinoAdminClient("http://localhost:8090") # Idempotent: tolerate a metalake left over from a prior failed run. try: @@ -184,6 +194,60 @@ def tearDownClass(cls): super().tearDownClass() + @classmethod + def _check_lance_namespace_compat(cls) -> Optional[str]: + """Detect server/client schema drift on lance-namespace. + + The lance-namespace REST model evolves: newer client versions add + request fields (e.g. ``check_declared``) that older + ``lance-namespace-core`` builds on the server side reject with + Jackson's "Unrecognized field ... not marked as ignorable". This + helper sends a harmless ``describe_table`` for a bogus table id so + we can observe the schema-validation error without doing any real + work. Returns a skip reason on incompatibility, or ``None`` if the + server understands the request shape. + """ + try: + import lance_namespace + from lance_namespace import DescribeTableRequest + except ImportError: + # `@unittest.skipIf` on the class already handles this case; if + # we reach here something odd is going on but it's not our job + # to recover from it. + return None + + try: + ns = lance_namespace.connect("rest", {"uri": LANCE_REST_BASE_URL}) + except Exception as e: # pylint: disable=broad-exception-caught + return f"unable to connect to lance-rest aux service: {e}" + + probe = DescribeTableRequest( + id=["__probe__", "__probe__", "__probe__"] + ) + try: + ns.describe_table(probe) + # Unlikely but not impossible: the probe table actually exists + # in a leftover metalake. That's still a compatible server. + return None + except Exception as e: # pylint: disable=broad-exception-caught + msg = str(e) + if ( + "Unrecognized field" in msg + or "not marked as ignorable" in msg + ): + short = msg.splitlines()[0][:200] + return ( + "lance-rest server's lance-namespace-core is older than " + "the client's lance-namespace (request schema mismatch). " + f"Server reported: {short}. To run this test, upgrade " + "the server (e.g. via PR #11060 -> lance-namespace 0.7.5) " + "or roll the client back to a matching version." + ) + # Any other error (table-not-found, metalake-not-found, etc.) + # means the server *did* deserialize the request — i.e. schemas + # are compatible. + return None + @classmethod def _lance_metalake_already_bound(cls) -> bool: if cls.main_conf_path is None or not os.path.exists(cls.main_conf_path): From 3376bdd438de4d36f0b2ec7b8784d14a4373424e Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 20:46:56 +0800 Subject: [PATCH 07/21] fix(python): address lance ray lint issues --- clients/client-python/scripts/run_lance_ray_matrix.py | 8 ++++---- clients/client-python/tests/integration/test_lance_ray.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clients/client-python/scripts/run_lance_ray_matrix.py b/clients/client-python/scripts/run_lance_ray_matrix.py index 78e25695d6a..3692bf2f6c7 100644 --- a/clients/client-python/scripts/run_lance_ray_matrix.py +++ b/clients/client-python/scripts/run_lance_ray_matrix.py @@ -212,7 +212,9 @@ def run_unittest(venv_python: Path, gravitino_home: Path) -> int: "tests.integration.test_lance_ray", ] print(f"[matrix] $ PYTHONPATH=... GRAVITINO_HOME=... {' '.join(cmd)}") - return subprocess.run(cmd, cwd=str(PYTHON_CLIENT_DIR), env=env).returncode + return subprocess.run( + cmd, cwd=str(PYTHON_CLIENT_DIR), env=env, check=False + ).returncode def main() -> int: @@ -259,9 +261,7 @@ def main() -> int: results.append(VersionResult(version, "ok", "tests passed")) else: print(f"[matrix] {version}: FAIL (exit={rc})") - results.append( - VersionResult(version, "fail", f"unittest exit {rc}") - ) + results.append(VersionResult(version, "fail", f"unittest exit {rc}")) if not args.keep_going: break diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index d7c37cd949a..86f10399f0d 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -279,9 +279,12 @@ def _wait_for_lance_rest_ready(timeout_s: float = 60.0) -> bool: def test_write_read_filter_via_lance_ray(self): # Imports are deferred so the skipIf decorator handles missing deps # cleanly without import errors at module load time. + # pylint: disable=import-outside-toplevel import ray from lance_ray import read_lance, write_lance + # pylint: enable=import-outside-toplevel + ns_properties = {"uri": LANCE_REST_BASE_URL} table_id = [self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME] @@ -309,9 +312,7 @@ def test_write_read_filter_via_lance_ray(self): table_id=table_id, ) # value = id * 2, value < 100 => id in [0, 49] => 50 rows - filtered_count = ray_dataset.filter( - lambda row: row["value"] < 100 - ).count() + filtered_count = ray_dataset.filter(lambda row: row["value"] < 100).count() self.assertEqual(50, filtered_count) finally: ray.shutdown() From 2129830de9326d55606587c2531bc6e5707c1025 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 13 May 2026 21:31:12 +0800 Subject: [PATCH 08/21] fix(python): suppress lance namespace deferred imports --- .../tests/integration/test_lance_ray.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index 86f10399f0d..2efda305349 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -208,8 +208,11 @@ def _check_lance_namespace_compat(cls) -> Optional[str]: server understands the request shape. """ try: + # pylint: disable=import-outside-toplevel import lance_namespace from lance_namespace import DescribeTableRequest + + # pylint: enable=import-outside-toplevel except ImportError: # `@unittest.skipIf` on the class already handles this case; if # we reach here something odd is going on but it's not our job @@ -221,9 +224,7 @@ def _check_lance_namespace_compat(cls) -> Optional[str]: except Exception as e: # pylint: disable=broad-exception-caught return f"unable to connect to lance-rest aux service: {e}" - probe = DescribeTableRequest( - id=["__probe__", "__probe__", "__probe__"] - ) + probe = DescribeTableRequest(id=["__probe__", "__probe__", "__probe__"]) try: ns.describe_table(probe) # Unlikely but not impossible: the probe table actually exists @@ -231,10 +232,7 @@ def _check_lance_namespace_compat(cls) -> Optional[str]: return None except Exception as e: # pylint: disable=broad-exception-caught msg = str(e) - if ( - "Unrecognized field" in msg - or "not marked as ignorable" in msg - ): + if "Unrecognized field" in msg or "not marked as ignorable" in msg: short = msg.splitlines()[0][:200] return ( "lance-rest server's lance-namespace-core is older than " From ec8bfa6238e8694dbc47620d9525fa4bcb2ab1c5 Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 14 May 2026 10:40:56 +0800 Subject: [PATCH 09/21] fix(lance): address review comments --- clients/client-python/build.gradle.kts | 7 ++- .../scripts/run_lance_ray_matrix.py | 4 +- .../tests/integration/test_lance_ray.py | 44 ++++++++++++++----- .../rest/JsonNullableMapperProvider.java | 3 +- .../service/rest/LanceTableOperations.java | 4 +- 5 files changed, 46 insertions(+), 16 deletions(-) diff --git a/clients/client-python/build.gradle.kts b/clients/client-python/build.gradle.kts index da2007a7c3d..df362641388 100644 --- a/clients/client-python/build.gradle.kts +++ b/clients/client-python/build.gradle.kts @@ -243,6 +243,7 @@ tasks { // versions. Each version is exercised inside its own venv under // build/lance-ray-matrix/.venv-/ (cached across runs). // Override the matrix with `-PlanceRayVersions=0.4.2,0.4.1,0.4.0`. + // Override the bootstrap interpreter with `-PlanceRayPython=/path/to/python`. register("lanceRayMatrixTest") { group = "verification" description = @@ -252,6 +253,8 @@ tasks { val versions = project.findProperty("lanceRayVersions") as? String val keepGoing = project.hasProperty("lanceRayKeepGoing") + val pythonExecutable = + (project.findProperty("lanceRayPython") as? String)?.takeIf { it.isNotBlank() } ?: "python3" val script = projectDir.resolve("scripts/run_lance_ray_matrix.py") val gravitinoHome = file("${project.rootDir}/distribution/package") @@ -261,8 +264,10 @@ tasks { doLast { try { val args = mutableListOf( - "python3", + pythonExecutable, script.absolutePath, + "--python", + pythonExecutable, "--gravitino-home", gravitinoHome.absolutePath, ) diff --git a/clients/client-python/scripts/run_lance_ray_matrix.py b/clients/client-python/scripts/run_lance_ray_matrix.py index 3692bf2f6c7..785a552ad7c 100644 --- a/clients/client-python/scripts/run_lance_ray_matrix.py +++ b/clients/client-python/scripts/run_lance_ray_matrix.py @@ -40,7 +40,8 @@ distribution/package/bin/gravitino.sh stop Each test class will append its own metalake binding to ``gravitino.conf`` and -restart the server itself, so back-to-back runs across versions are safe. +restart the server itself. The matrix runner opts into keeping that binding +between versions, so back-to-back runs avoid unnecessary Gravitino restarts. """ from __future__ import annotations @@ -204,6 +205,7 @@ def run_unittest(venv_python: Path, gravitino_home: Path) -> int: env["PYTHONPATH"] = str(PYTHON_CLIENT_DIR) env["GRAVITINO_HOME"] = str(gravitino_home) env["START_EXTERNAL_GRAVITINO"] = "true" + env["LANCE_RAY_KEEP_GRAVITINO_CONF"] = "true" cmd = [ str(venv_python), "-m", diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index 2efda305349..a43d75913be 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -44,6 +44,7 @@ # standalone lance-rest conf file. MAIN_CONF_FILE = "conf/gravitino.conf" LANCE_REST_METALAKE_KEY = "gravitino.lance-rest.gravitino-metalake" +KEEP_GRAVITINO_CONF_ENV = "LANCE_RAY_KEEP_GRAVITINO_CONF" def _missing_lance_ray_deps() -> Optional[str]: @@ -81,6 +82,7 @@ class TestLanceRayIntegration(IntegrationTestEnv): gravitino_client: Optional[GravitinoClient] = None temp_dir: Optional[str] = None main_conf_path: Optional[str] = None + appended_lance_rest_conf: bool = False @classmethod def setUpClass(cls): @@ -99,9 +101,8 @@ def setUpClass(cls): # avoids restarting Gravitino in the middle of the IT suite when the # test class is replayed, which would briefly disrupt other ITs. if not cls._lance_metalake_already_bound(): - cls._append_conf( - {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME}, cls.main_conf_path - ) + cls._append_conf(cls._lance_rest_config(), cls.main_conf_path) + cls.appended_lance_rest_conf = True cls.restart_server() if not cls._wait_for_lance_rest_ready(): raise RuntimeError( @@ -117,6 +118,7 @@ def setUpClass(cls): # so a skipped run leaves no fixtures behind. skip_reason = cls._check_lance_namespace_compat() if skip_reason is not None: + cls._reset_lance_rest_conf_if_needed() raise unittest.SkipTest(skip_reason) cls.gravitino_admin_client = GravitinoAdminClient("http://localhost:8090") @@ -176,12 +178,10 @@ def tearDownClass(cls): except Exception as e: # pylint: disable=broad-exception-caught failures.append(("drop metalake", e)) - # Intentionally do NOT reset the lance-rest metalake binding in - # gravitino.conf. Removing it would force the next setUpClass to - # restart the server, which is disruptive when this IT is replayed - # back-to-back or alongside other ITs in the same Gradle invocation. - # The conf line is regenerated by `compileDistribution`, so it does - # not survive a fresh distribution build. + try: + cls._reset_lance_rest_conf_if_needed() + except Exception as e: # pylint: disable=broad-exception-caught + failures.append(("reset lance-rest conf", e)) try: if cls.temp_dir and os.path.exists(cls.temp_dir): @@ -237,8 +237,8 @@ def _check_lance_namespace_compat(cls) -> Optional[str]: return ( "lance-rest server's lance-namespace-core is older than " "the client's lance-namespace (request schema mismatch). " - f"Server reported: {short}. To run this test, upgrade " - "the server (e.g. via PR #11060 -> lance-namespace 0.7.5) " + f"Server reported: {short}. To run this test, use " + "lance-namespace-core 0.7.5 or newer on the server, " "or roll the client back to a matching version." ) # Any other error (table-not-found, metalake-not-found, etc.) @@ -257,6 +257,28 @@ def _lance_metalake_already_bound(cls) -> bool: return True return False + @classmethod + def _lance_rest_config(cls): + return {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME} + + @classmethod + def _should_keep_lance_rest_conf(cls) -> bool: + return os.environ.get(KEEP_GRAVITINO_CONF_ENV, "").lower() == "true" + + @classmethod + def _reset_lance_rest_conf_if_needed(cls) -> None: + if not cls.appended_lance_rest_conf or cls.main_conf_path is None: + return + if cls._should_keep_lance_rest_conf(): + logger.info( + "Keeping lance-rest Gravitino conf because %s=true", + KEEP_GRAVITINO_CONF_ENV, + ) + return + cls._reset_conf(cls._lance_rest_config(), cls.main_conf_path) + cls.appended_lance_rest_conf = False + cls.restart_server() + @staticmethod def _wait_for_lance_rest_ready(timeout_s: float = 60.0) -> bool: # Probe any registered Jersey path on the lance servlet. A 4xx diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java index 6ae029211f1..31470e1a7bf 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/JsonNullableMapperProvider.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import javax.ws.rs.ext.ContextResolver; import javax.ws.rs.ext.Provider; +import org.apache.gravitino.server.web.ObjectMapperProvider; import org.openapitools.jackson.nullable.JsonNullableModule; /** @@ -34,7 +35,7 @@ public class JsonNullableMapperProvider implements ContextResolver { private static final ObjectMapper MAPPER = - new ObjectMapper().registerModule(new JsonNullableModule()); + ObjectMapperProvider.objectMapper().copy().registerModule(new JsonNullableModule()); @Override public ObjectMapper getContext(Class type) { diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 1e2711098c3..b0f7cd7b774 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -134,8 +134,8 @@ public Response createTable( } /** - * According to the spec of lance-namespace with version 0.0.20 to 0.31, createEmptyTable only - * stores the table metadata including its location, and will never touch lance storage. + * Legacy create-empty endpoint kept for clients that still call it. It only records table + * metadata, including its location, and does not touch Lance storage. */ @POST @Path("/create-empty") From 3c3a51b191644f14d22a50747de4c74ffd86bffa Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 15 May 2026 16:35:09 +0800 Subject: [PATCH 10/21] fix(lance): pin matrix dependencies and exclude apache-client from core Pin lance-namespace to 0.7.5 (matching server-side lance-namespace-core) in requirements-dev.txt and pin the matrix runner's --ray-spec / --lance-namespace-spec defaults so back-to-back matrix runs do not drift with PyPI (per review feedback). Also exclude lance-namespace-apache-client from the core module: lance-core 4.0.1 pulls it in transitively but core never calls into it, and leaving it on the main classpath shadows the lance-rest aux service's own 0.7.5 copy (parent-first delegation), which silently downgraded request deserialization and dropped fields like `check_declared`. Co-Authored-By: Claude Opus 4.7 --- clients/client-python/requirements-dev.txt | 2 +- .../client-python/scripts/run_lance_ray_matrix.py | 15 +++++++++------ core/build.gradle.kts | 7 +++++++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index 0899859abe4..83a37fad97e 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -40,4 +40,4 @@ banks==2.4.1 # its own per-version venvs and does not consume these pins. ray==2.55.1 lance-ray==0.4.2 -lance-namespace==0.7.6 +lance-namespace==0.7.5 diff --git a/clients/client-python/scripts/run_lance_ray_matrix.py b/clients/client-python/scripts/run_lance_ray_matrix.py index 785a552ad7c..3c6cbdc9495 100644 --- a/clients/client-python/scripts/run_lance_ray_matrix.py +++ b/clients/client-python/scripts/run_lance_ray_matrix.py @@ -103,15 +103,18 @@ def parse_args() -> argparse.Namespace: ) p.add_argument( "--ray-spec", - default="ray", - help="Pip spec for ray. Pin it (e.g. 'ray==2.55.1') if you need " - "reproducibility. Default: pip picks a compatible version per " - "lance-ray.", + default="ray==2.55.1", + help="Pip spec for ray. Pinned by default so the matrix is " + "reproducible across runs and does not drift with PyPI. Override " + "(e.g. 'ray') to let pip pick a compatible version per lance-ray. " + "Default: %(default)s", ) p.add_argument( "--lance-namespace-spec", - default="lance-namespace", - help="Pip spec for lance-namespace. Default: latest.", + default="lance-namespace==0.7.5", + help="Pip spec for lance-namespace. Pinned by default so the " + "matrix is reproducible and does not drift with PyPI. Override " + "(e.g. 'lance-namespace') for latest. Default: %(default)s", ) p.add_argument( "--keep-going", diff --git a/core/build.gradle.kts b/core/build.gradle.kts index bdf52f367a9..58afc132d8c 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -54,6 +54,13 @@ dependencies { exclude(group = "com.fasterxml.jackson.jaxrs", module = "jackson-jaxrs-json-provider") // using gravitino's version exclude(group = "org.apache.httpcomponents.client5", module = "*") // provided by gravitino exclude(group = "org.lance", module = "lance-namespace-core") // This is unnecessary in the core module + // Same rationale as lance-namespace-core: lance-core 4.0.1 declares + // lance-namespace-apache-client as a transitive, but core never calls into it. + // Leaving it on the main classpath shadows the lance-rest aux service's own + // lance-namespace-apache-client (loaded via lance-rest-server/libs/), and + // because the aux classloader is parent-first, the older transitive wins + // on request deserialization (e.g. dropping fields like `check_declared`). + exclude(group = "org.lance", module = "lance-namespace-apache-client") } implementation(libs.mybatis) From 6333e17485514bedb61ec3cf5d5786b629b74cc3 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 15 May 2026 16:48:00 +0800 Subject: [PATCH 11/21] test(lance): split matrix tooling from upgrade --- clients/client-python/build.gradle.kts | 51 ---- clients/client-python/requirements-dev.txt | 6 +- .../scripts/run_lance_ray_matrix.py | 283 ------------------ .../tests/integration/test_lance_ray.py | 29 +- docs/lance-rest-integration.md | 24 +- lance/lance-rest-server/build.gradle.kts | 121 +------- 6 files changed, 28 insertions(+), 486 deletions(-) delete mode 100644 clients/client-python/scripts/run_lance_ray_matrix.py diff --git a/clients/client-python/build.gradle.kts b/clients/client-python/build.gradle.kts index df362641388..8c80ae04d2e 100644 --- a/clients/client-python/build.gradle.kts +++ b/clients/client-python/build.gradle.kts @@ -239,57 +239,6 @@ tasks { finalizedBy(unitCoverageReport) } - // Run tests/integration/test_lance_ray.py against multiple lance-ray - // versions. Each version is exercised inside its own venv under - // build/lance-ray-matrix/.venv-/ (cached across runs). - // Override the matrix with `-PlanceRayVersions=0.4.2,0.4.1,0.4.0`. - // Override the bootstrap interpreter with `-PlanceRayPython=/path/to/python`. - register("lanceRayMatrixTest") { - group = "verification" - description = - "Run tests/integration/test_lance_ray.py against multiple lance-ray " + - "versions. Override with -PlanceRayVersions= (default: " + - "tracks docs/lance-rest-integration.md Compatibility Matrix)." - - val versions = project.findProperty("lanceRayVersions") as? String - val keepGoing = project.hasProperty("lanceRayKeepGoing") - val pythonExecutable = - (project.findProperty("lanceRayPython") as? String)?.takeIf { it.isNotBlank() } ?: "python3" - val script = projectDir.resolve("scripts/run_lance_ray_matrix.py") - val gravitinoHome = file("${project.rootDir}/distribution/package") - - doFirst { - gravitinoServer("start") - } - doLast { - try { - val args = mutableListOf( - pythonExecutable, - script.absolutePath, - "--python", - pythonExecutable, - "--gravitino-home", - gravitinoHome.absolutePath, - ) - if (!versions.isNullOrBlank()) { - args += listOf("--versions", versions) - } - if (keepGoing) { - args += "--keep-going" - } - val proc = ProcessBuilder(args) - .inheritIO() - .start() - val exit = proc.waitFor() - if (exit != 0) { - throw GradleException("lance-ray matrix failed with exit code $exit") - } - } finally { - gravitinoServer("stop") - } - } - } - register("test", VenvTask::class) { val skipUTs = project.hasProperty("skipTests") val skipITs = project.hasProperty("skipITs") diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index 83a37fad97e..867d2b08ea9 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -34,10 +34,8 @@ sphinx==7.1.2 furo==2024.8.6 banks==2.4.1 -# Lance integration deps. Pinned so the default integration test runs against -# a single, known-good (server-side `lance-namespace-core` 0.7.5+) combination. -# The multi-version matrix (`:clients:client-python:lanceRayMatrixTest`) keeps -# its own per-version venvs and does not consume these pins. +# Lance integration deps. Pinned so integration tests run against a single, +# known-good server-side `lance-namespace-core` 0.7.5+ combination. ray==2.55.1 lance-ray==0.4.2 lance-namespace==0.7.5 diff --git a/clients/client-python/scripts/run_lance_ray_matrix.py b/clients/client-python/scripts/run_lance_ray_matrix.py deleted file mode 100644 index 3c6cbdc9495..00000000000 --- a/clients/client-python/scripts/run_lance_ray_matrix.py +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env python3 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Run ``tests/integration/test_lance_ray.py`` against multiple lance-ray -versions to validate the supported range advertised in the Compatibility -Matrix (``docs/lance-rest-integration.md``). - -For each version we provision a dedicated venv under -``clients/client-python/build/lance-ray-matrix/.venv-/`` and install -``ray``, ``lance-ray==``, ``lance-namespace``, ``requests``, plus the -in-tree ``apache-gravitino`` distribution (editable). The unittest itself is -launched per-version with ``python -m unittest -v -tests.integration.test_lance_ray``; results are collected into a pass/fail -table at the end. - -The caller is responsible for starting the Gravitino server (with the -auxiliary lance-rest service enabled). The Gradle wrapper task -``:clients:client-python:lanceRayMatrixTest`` handles that. For ad-hoc local -use:: - - distribution/package/bin/gravitino.sh start - python3 clients/client-python/scripts/run_lance_ray_matrix.py \ - --versions 0.4.2,0.4.1 \ - --gravitino-home distribution/package - distribution/package/bin/gravitino.sh stop - -Each test class will append its own metalake binding to ``gravitino.conf`` and -restart the server itself. The matrix runner opts into keeping that binding -between versions, so back-to-back runs avoid unnecessary Gravitino restarts. -""" - -from __future__ import annotations - -import argparse -import os -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import List - -# Default version set tracks what the Compatibility Matrix in -# docs/lance-rest-integration.md claims to support. Keep these in sync. -DEFAULT_VERSIONS = ["0.4.2", "0.4.1", "0.4.0", "0.3.0"] - - -REPO_ROOT = Path(__file__).resolve().parents[3] -PYTHON_CLIENT_DIR = REPO_ROOT / "clients" / "client-python" -DEFAULT_MATRIX_DIR = PYTHON_CLIENT_DIR / "build" / "lance-ray-matrix" -DEFAULT_GRAVITINO_HOME = REPO_ROOT / "distribution" / "package" - - -@dataclass -class VersionResult: - version: str - status: str # "ok", "fail", "setup-error" - details: str - - -def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - p.add_argument( - "--versions", - default=",".join(DEFAULT_VERSIONS), - help="Comma-separated list of lance-ray versions to test. " - f"Default: {','.join(DEFAULT_VERSIONS)}", - ) - p.add_argument( - "--python", - default=sys.executable, - help="Path to the host python interpreter used to bootstrap each " - "version's venv. Default: %(default)s", - ) - p.add_argument( - "--matrix-dir", - default=str(DEFAULT_MATRIX_DIR), - help="Directory under which per-version venvs are created and " - "cached across runs. Default: %(default)s", - ) - p.add_argument( - "--gravitino-home", - default=str(DEFAULT_GRAVITINO_HOME), - help="Path to the built Gravitino distribution package. The " - "lance-rest aux service must be enabled there. Default: %(default)s", - ) - p.add_argument( - "--ray-spec", - default="ray==2.55.1", - help="Pip spec for ray. Pinned by default so the matrix is " - "reproducible across runs and does not drift with PyPI. Override " - "(e.g. 'ray') to let pip pick a compatible version per lance-ray. " - "Default: %(default)s", - ) - p.add_argument( - "--lance-namespace-spec", - default="lance-namespace==0.7.5", - help="Pip spec for lance-namespace. Pinned by default so the " - "matrix is reproducible and does not drift with PyPI. Override " - "(e.g. 'lance-namespace') for latest. Default: %(default)s", - ) - p.add_argument( - "--keep-going", - action="store_true", - help="Continue to the next version after a failure instead of " - "stopping on the first failed run.", - ) - return p.parse_args() - - -def run(cmd: List[str], **kwargs) -> subprocess.CompletedProcess: - print(f"[matrix] $ {' '.join(cmd)}") - return subprocess.run(cmd, check=False, **kwargs) - - -def ensure_venv(python: str, venv_dir: Path) -> Path: - """Create the venv if it doesn't already exist. Returns the venv python path.""" - venv_python = venv_dir / "bin" / "python" - if not venv_python.exists(): - venv_dir.parent.mkdir(parents=True, exist_ok=True) - rc = run([python, "-m", "venv", str(venv_dir)]).returncode - if rc != 0: - raise RuntimeError(f"Failed to create venv at {venv_dir}") - return venv_python - - -def install_deps( - venv_python: Path, - version: str, - ray_spec: str, - lance_namespace_spec: str, -) -> None: - rc = run( - [ - str(venv_python), - "-m", - "pip", - "install", - "--upgrade", - "pip", - "wheel", - ] - ).returncode - if rc != 0: - raise RuntimeError("pip upgrade failed in venv") - - rc = run( - [ - str(venv_python), - "-m", - "pip", - "install", - ray_spec, - f"lance-ray=={version}", - lance_namespace_spec, - "requests", - ] - ).returncode - if rc != 0: - raise RuntimeError(f"Failed to install lance-ray=={version} deps") - - rc = run( - [ - str(venv_python), - "-m", - "pip", - "install", - "-e", - str(PYTHON_CLIENT_DIR), - ] - ).returncode - if rc != 0: - raise RuntimeError("Failed to install apache-gravitino in editable mode") - - -def generate_version_ini(venv_python: Path) -> None: - # The python client reads gravitino/version.ini at runtime. It is - # produced by scripts/generate_version.py and is gitignored, so we - # regenerate it here to make the matrix runnable on fresh checkouts. - script = PYTHON_CLIENT_DIR / "scripts" / "generate_version.py" - rc = run( - [str(venv_python), str(script)], - cwd=str(PYTHON_CLIENT_DIR), - ).returncode - if rc != 0: - raise RuntimeError("Failed to generate version.ini for python client") - - -def run_unittest(venv_python: Path, gravitino_home: Path) -> int: - env = os.environ.copy() - env["PYTHONPATH"] = str(PYTHON_CLIENT_DIR) - env["GRAVITINO_HOME"] = str(gravitino_home) - env["START_EXTERNAL_GRAVITINO"] = "true" - env["LANCE_RAY_KEEP_GRAVITINO_CONF"] = "true" - cmd = [ - str(venv_python), - "-m", - "unittest", - "-v", - "tests.integration.test_lance_ray", - ] - print(f"[matrix] $ PYTHONPATH=... GRAVITINO_HOME=... {' '.join(cmd)}") - return subprocess.run( - cmd, cwd=str(PYTHON_CLIENT_DIR), env=env, check=False - ).returncode - - -def main() -> int: - args = parse_args() - versions = [v.strip() for v in args.versions.split(",") if v.strip()] - if not versions: - print("--versions must contain at least one entry", file=sys.stderr) - return 2 - - matrix_dir = Path(args.matrix_dir).resolve() - gravitino_home = Path(args.gravitino_home).resolve() - if not (gravitino_home / "bin" / "gravitino.sh").exists(): - print( - f"GRAVITINO_HOME={gravitino_home} does not look like a " - "Gravitino distribution package (missing bin/gravitino.sh). " - "Run `./gradlew compileDistribution -PskipWeb=true -x test` first.", - file=sys.stderr, - ) - return 2 - - results: List[VersionResult] = [] - for version in versions: - print(f"\n========== lance-ray=={version} ==========") - venv_dir = matrix_dir / f".venv-{version}" - try: - venv_python = ensure_venv(args.python, venv_dir) - install_deps( - venv_python, - version, - args.ray_spec, - args.lance_namespace_spec, - ) - generate_version_ini(venv_python) - except Exception as e: # pylint: disable=broad-exception-caught - print(f"[matrix] {version}: setup failed: {e}", file=sys.stderr) - results.append(VersionResult(version, "setup-error", str(e))) - if not args.keep_going: - break - continue - - rc = run_unittest(venv_python, gravitino_home) - if rc == 0: - print(f"[matrix] {version}: PASS") - results.append(VersionResult(version, "ok", "tests passed")) - else: - print(f"[matrix] {version}: FAIL (exit={rc})") - results.append(VersionResult(version, "fail", f"unittest exit {rc}")) - if not args.keep_going: - break - - print("\n========== summary ==========") - width = max(len(r.version) for r in results) if results else 0 - for r in results: - print(f" lance-ray=={r.version.ljust(width)} {r.status:11s} {r.details}") - - any_fail = any(r.status != "ok" for r in results) - return 1 if any_fail else 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index a43d75913be..aaf6c45ca3d 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -44,7 +44,6 @@ # standalone lance-rest conf file. MAIN_CONF_FILE = "conf/gravitino.conf" LANCE_REST_METALAKE_KEY = "gravitino.lance-rest.gravitino-metalake" -KEEP_GRAVITINO_CONF_ENV = "LANCE_RAY_KEEP_GRAVITINO_CONF" def _missing_lance_ray_deps() -> Optional[str]: @@ -68,11 +67,9 @@ class TestLanceRayIntegration(IntegrationTestEnv): ``read_lance`` flow from the upstream lance-ray docs. """ - # Metalake name is fixed (not randomized) so back-to-back runs in the - # same Gravitino process can detect that the lance-rest aux service is - # already bound and skip the costly server restart. The per-test table - # name still gets a random suffix to keep individual test methods - # isolated. + # Metalake name is fixed because the lance-rest aux service binds to a + # single metalake from gravitino.conf. The per-test table name still gets + # a random suffix to keep individual test methods isolated. METALAKE_NAME: str = "lance_ray_test_metalake" CATALOG_NAME: str = "lance_catalog" SCHEMA_NAME: str = "schema" @@ -98,8 +95,8 @@ def setUpClass(cls): # Bind the lance-rest aux service to our test metalake. If the same # binding is already present (e.g. an earlier run in the same Gradle # session left it there), skip the conf write and the restart. This - # avoids restarting Gravitino in the middle of the IT suite when the - # test class is replayed, which would briefly disrupt other ITs. + # avoids appending the same conf entry twice if a prior failed run + # already left the binding behind. if not cls._lance_metalake_already_bound(): cls._append_conf(cls._lance_rest_config(), cls.main_conf_path) cls.appended_lance_rest_conf = True @@ -118,7 +115,7 @@ def setUpClass(cls): # so a skipped run leaves no fixtures behind. skip_reason = cls._check_lance_namespace_compat() if skip_reason is not None: - cls._reset_lance_rest_conf_if_needed() + cls._reset_lance_rest_conf() raise unittest.SkipTest(skip_reason) cls.gravitino_admin_client = GravitinoAdminClient("http://localhost:8090") @@ -179,7 +176,7 @@ def tearDownClass(cls): failures.append(("drop metalake", e)) try: - cls._reset_lance_rest_conf_if_needed() + cls._reset_lance_rest_conf() except Exception as e: # pylint: disable=broad-exception-caught failures.append(("reset lance-rest conf", e)) @@ -262,19 +259,9 @@ def _lance_rest_config(cls): return {LANCE_REST_METALAKE_KEY: cls.METALAKE_NAME} @classmethod - def _should_keep_lance_rest_conf(cls) -> bool: - return os.environ.get(KEEP_GRAVITINO_CONF_ENV, "").lower() == "true" - - @classmethod - def _reset_lance_rest_conf_if_needed(cls) -> None: + def _reset_lance_rest_conf(cls) -> None: if not cls.appended_lance_rest_conf or cls.main_conf_path is None: return - if cls._should_keep_lance_rest_conf(): - logger.info( - "Keeping lance-rest Gravitino conf because %s=true", - KEEP_GRAVITINO_CONF_ENV, - ) - return cls._reset_conf(cls._lance_rest_config(), cls.main_conf_path) cls.appended_lance_rest_conf = False cls.restart_server() diff --git a/docs/lance-rest-integration.md b/docs/lance-rest-integration.md index b213c1a35c2..76ee184e0e8 100644 --- a/docs/lance-rest-integration.md +++ b/docs/lance-rest-integration.md @@ -37,27 +37,6 @@ The following table outlines the tested compatibility between Gravitino versions - The Lance ecosystem is changing quickly, so some versions may introduce breaking changes. ::: -#### Reproducing the matrix locally - -Both connectors ship with a multi-version integration test driver so the -matrix can be re-verified (and extended) without ad-hoc scripting: - -```bash -# lance-spark — runs LanceSparkRESTServiceIT once per bundle version -./gradlew :lance:lance-rest-server:lanceSparkMatrixTest \ - -PlanceSparkBundleVersions=0.1.0,0.1.1,0.2.0,0.4.0 \ - -PskipDockerTests=true -# Per-version JUnit reports land under -# lance/lance-rest-server/build/reports/lance-spark-matrix//. - -# lance-ray — provisions a venv per version under -# clients/client-python/build/lance-ray-matrix/.venv-/ and runs -# tests/integration/test_lance_ray.py against each. The Gradle wrapper -# below starts / stops Gravitino automatically. -./gradlew :clients:client-python:lanceRayMatrixTest \ - -PlanceRayVersions=0.4.2,0.3.0 -``` - ### Why Maintain a Compatibility Matrix? The Lance ecosystem is under active development, with frequent updates to APIs and features. Gravitino's Lance REST service depends on specific connector behaviors to ensure reliable operation. Using incompatible versions may result in: @@ -201,7 +180,8 @@ pip install lance-ray :::info - Ray will be automatically installed if not already present -- The lance-namespace version must be less than or equal to 0.4.5. +- For Gravitino 1.3.0, use a `lance-namespace` client compatible with + server-side `lance-namespace-core` 0.7.5 or newer. - Ensure Ray version compatibility in your environment before deployment ::: diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts index 8d159c7749d..8a91cc1322d 100644 --- a/lance/lance-rest-server/build.gradle.kts +++ b/lance/lance-rest-server/build.gradle.kts @@ -28,34 +28,13 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extra["defaultScalaVersion"].toString() val sparkVersion: String = libs.versions.spark35.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() -// Comma-separated list of lance-spark-bundle versions to test against. -// The default is the latest supported version; the integration test matrix -// (`:lance:lance-rest-server:lanceSparkMatrixTest`) covers every version in -// this list. Override via `-PlanceSparkBundleVersions=0.2.0,0.3.0,0.4.0`. -val lanceSparkBundleVersions: List = - ((project.properties["lanceSparkBundleVersions"] as? String) ?: "0.4.0") - .split(",").map { it.trim() }.filter { it.isNotEmpty() } -if (lanceSparkBundleVersions.isEmpty()) { - throw GradleException("lanceSparkBundleVersions must contain at least one version") -} -val primaryLanceSparkBundleVersion: String = lanceSparkBundleVersions.first() +val lanceSparkBundleVersion = "0.4.0" val lanceSparkBundleJarPathProperty = "gravitino.lance.spark.bundle.jar" - -fun lanceSparkBundleConfigName(version: String): String = - "lanceSparkBundle_" + version.replace(".", "_").replace("-", "_") -fun lanceSparkBundleDirFor(version: String) = - layout.buildDirectory.dir("lance-spark-bundle/$version") -fun lanceSparkPrepareTaskName(version: String): String = - "prepareLanceSparkBundle_" + version.replace(".", "_").replace("-", "_") -fun lanceSparkTestTaskName(version: String): String = - "testLanceSparkBundle_" + version.replace(".", "_").replace("-", "_") - -lanceSparkBundleVersions.forEach { version -> - configurations.create(lanceSparkBundleConfigName(version)) { - isCanBeConsumed = false - isCanBeResolved = true - isTransitive = false - } +val lanceSparkBundleDir = layout.buildDirectory.dir("lance-spark-bundle") +val lanceSparkBundle by configurations.creating { + isCanBeConsumed = false + isCanBeResolved = true + isTransitive = false } dependencies { @@ -101,12 +80,10 @@ dependencies { testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(libs.lance) - lanceSparkBundleVersions.forEach { version -> - add( - lanceSparkBundleConfigName(version), - "org.lance:lance-spark-bundle-3.5_2.12:$version" - ) - } + add( + lanceSparkBundle.name, + "org.lance:lance-spark-bundle-3.5_2.12:$lanceSparkBundleVersion" + ) testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { @@ -142,17 +119,10 @@ tasks { from(configurations.runtimeClasspath) into("build/libs") } - // One Sync task per lance-spark-bundle version. Each task lays down its - // bundle jar under build/lance-spark-bundle// so per-version Test - // tasks pick up the right jar without colliding. - lanceSparkBundleVersions.forEach { version -> - register(lanceSparkPrepareTaskName(version)) { - from(configurations.getByName(lanceSparkBundleConfigName(version))) - into(lanceSparkBundleDirFor(version)) - } + val prepareLanceSparkBundle by registering(Sync::class) { + from(lanceSparkBundle) + into(lanceSparkBundleDir) } - val primaryPrepareLanceSparkBundle = - named(lanceSparkPrepareTaskName(primaryLanceSparkBundleVersion)) jar { finalizedBy(copyDepends) @@ -183,14 +153,13 @@ tasks { } test { - dependsOn(primaryPrepareLanceSparkBundle) + dependsOn(prepareLanceSparkBundle) - val primaryBundleDir = lanceSparkBundleDirFor(primaryLanceSparkBundleVersion) doFirst { val bundleJar = - primaryBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } + lanceSparkBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } ?: throw GradleException( - "Expected exactly one Lance Spark bundle jar in ${primaryBundleDir.get().asFile}" + "Expected exactly one Lance Spark bundle jar in ${lanceSparkBundleDir.get().asFile}" ) systemProperty(lanceSparkBundleJarPathProperty, bundleJar.absolutePath) } @@ -200,64 +169,6 @@ tasks { dependsOn(":catalogs:catalog-lakehouse-generic:jar") } } - - // Per-version Test task that only runs LanceSparkRESTServiceIT against a - // specific lance-spark-bundle. Each task downloads its bundle through the - // matching Sync task and points the IT JVM at it via system property. - lanceSparkBundleVersions.forEach { version -> - register(lanceSparkTestTaskName(version)) { - group = "verification" - description = - "Run LanceSparkRESTServiceIT against lance-spark-bundle $version" - - dependsOn(named(lanceSparkPrepareTaskName(version))) - dependsOn(named("jar")) - val versionTestMode = project.properties["testMode"] as? String ?: "embedded" - if (versionTestMode == "embedded") { - dependsOn(":catalogs:catalog-lakehouse-generic:jar") - } - - testClassesDirs = sourceSets["test"].output.classesDirs - classpath = sourceSets["test"].runtimeClasspath - useJUnitPlatform() - filter { includeTestsMatching("*LanceSparkRESTServiceIT*") } - - val versionBundleDir = lanceSparkBundleDirFor(version) - doFirst { - val bundleJar = - versionBundleDir.get().asFile.listFiles()?.singleOrNull { it.extension == "jar" } - ?: throw GradleException( - "Expected exactly one Lance Spark bundle jar in " + - "${versionBundleDir.get().asFile} for version $version" - ) - systemProperty(lanceSparkBundleJarPathProperty, bundleJar.absolutePath) - println("[lance-spark-matrix] running IT against bundle $version -> ${bundleJar.name}") - } - - // Send per-version reports to a separate directory so a matrix run - // doesn't overwrite results across versions. - val versionSlug = version.replace(".", "_").replace("-", "_") - reports { - html.outputLocation.set( - layout.buildDirectory.dir("reports/lance-spark-matrix/$versionSlug") - ) - junitXml.outputLocation.set( - layout.buildDirectory.dir("test-results/lance-spark-matrix/$versionSlug") - ) - } - } - } - - register("lanceSparkMatrixTest") { - group = "verification" - description = - "Run LanceSparkRESTServiceIT against every version in -PlanceSparkBundleVersions " + - "(default: $primaryLanceSparkBundleVersion). Reports land under " + - "build/reports/lance-spark-matrix//." - dependsOn( - lanceSparkBundleVersions.map { named(lanceSparkTestTaskName(it)) } - ) - } } tasks.test { From a094b267867783abac673b503c55a2de8d688674 Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 18 May 2026 10:43:31 +0800 Subject: [PATCH 12/21] [#10983] doc(lance): Mark createEmptyTable for removal in 1.4.0 Upstream lance-namespace-core 0.7.5 dropped createEmptyTable from the LanceNamespace interface, and lance-ray < 0.3.0 / lance-spark < 0.1.0 (the only clients that still call /create-empty) are already declared incompatible with Gravitino 1.3.0 in the compatibility matrix. The legacy SDK method and REST endpoint therefore have no reachable users. Document the removal target on both surfaces so any downstream caller relying on this notices in time. The endpoint and the method itself stay through 1.3.0 as a courtesy for direct HTTP callers. Co-Authored-By: Claude Opus 4.7 --- .../lance/common/ops/LanceTableOperations.java | 10 +++++++++- .../lance/service/rest/LanceTableOperations.java | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java index 1ba87a21365..11f6da9c7eb 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -74,12 +74,20 @@ DeclareTableResponse declareTable( /** * Create a new table without schema. * + *

Slated for removal in 1.4.0. Upstream {@code lance-namespace-core} 0.7.5 already removed + * this operation from the {@code LanceNamespace} interface, and the compatibility matrix declares Gravitino + * 1.3.0 incompatible with the older clients that still call it (lance-ray < 0.3.0 and + * lance-spark < 0.1.0), so the legacy {@code /create-empty} REST endpoint and this method keep + * no reachable users. They are kept for one release as a courtesy for any direct HTTP caller and + * will be deleted once 1.3.0 has shipped. + * * @param tableId table ids are in the format of "{namespace}{delimiter}{table_name}" * @param delimiter the delimiter used in the namespace * @param tableLocation the location where the table data will be stored * @param tableProperties the properties of the table * @return the response of the create table operation - * @deprecated Use {@link #declareTable} instead. + * @deprecated Use {@link #declareTable} instead. To be removed in 1.4.0. */ @Deprecated DeclareTableResponse createEmptyTable( diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index b0f7cd7b774..7be8a1168c1 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -136,6 +136,12 @@ public Response createTable( /** * Legacy create-empty endpoint kept for clients that still call it. It only records table * metadata, including its location, and does not touch Lance storage. + * + *

Slated for removal in 1.4.0 — upstream {@code lance-namespace-core} 0.7.5 dropped this + * operation, and lance-ray < 0.3.0 / lance-spark < 0.1.0 (the only clients that could still + * hit it) are already declared incompatible with Gravitino 1.3.0 in the compatibility matrix. Kept for one + * release as a courtesy to any direct HTTP caller. */ @POST @Path("/create-empty") From 37ed3338d4231bf2f13a81538d13000ac5cd60c7 Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 18 May 2026 11:52:45 +0800 Subject: [PATCH 13/21] [#10983] refactor(lance): Address review comments on the upgrade PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five fixes batched together: * Move ray / lance-ray / lance-namespace out of `requirements-dev.txt` into a new `requirements-lance.txt` and expose it as an `lance` extra in `setup.py`. The default `pip install -e .[dev]` (and the lint / unit-test gradle tasks that drive it) no longer pull the heavy pylance + ray wheels. Lance integration tests now install via `pip install -e .[lance]`. * `_missing_lance_ray_deps()` is now evaluated once at module import and reused by the `@skipIf` predicate and message instead of being invoked twice. * The `lance-ray` skip message now points at the new `[lance]` extra and the lance-namespace-core >= 0.7.5 server requirement instead of saying "install ray lance-ray lance-namespace". * `test_write_read_filter_via_lance_ray` adds an inline `# pylint: disable=import-error` for the deferred `ray` / `lance_ray` imports, since those packages now live behind the `[lance]` extra and are no longer present in the default dev install. * `LanceRESTServiceIT.testCreateEmptyTable` was migrated to call `ns.declareTable(...)` in the upgrade. The method has been removed here — the existing `testDeclareTable` further down in the same class is a strict superset of its assertions, so keeping the old method as a renamed duplicate would just be dead code. Plus a drive-by black reformat of an unrelated DTO file caught by the client-python pylint task. Co-Authored-By: Claude Opus 4.7 --- clients/client-python/requirements-dev.txt | 6 -- clients/client-python/requirements-lance.txt | 25 +++++++++ clients/client-python/setup.py | 1 + .../tests/integration/test_lance_ray.py | 22 ++++++-- .../integration/test/LanceRESTServiceIT.java | 56 ------------------- 5 files changed, 42 insertions(+), 68 deletions(-) create mode 100644 clients/client-python/requirements-lance.txt diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index 867d2b08ea9..e4335e2a98a 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -33,9 +33,3 @@ jwcrypto==1.5.6 sphinx==7.1.2 furo==2024.8.6 banks==2.4.1 - -# Lance integration deps. Pinned so integration tests run against a single, -# known-good server-side `lance-namespace-core` 0.7.5+ combination. -ray==2.55.1 -lance-ray==0.4.2 -lance-namespace==0.7.5 diff --git a/clients/client-python/requirements-lance.txt b/clients/client-python/requirements-lance.txt new file mode 100644 index 00000000000..79a84a91a2a --- /dev/null +++ b/clients/client-python/requirements-lance.txt @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Lance integration deps. Pinned so the lance-ray integration test runs +# against a single, known-good server-side `lance-namespace-core` 0.7.5+ +# combination. Installed via the `lance` extra (e.g. `pip install -e .[lance]`) +# so the heavy ray/pylance native wheels don't slow down the default `dev` +# install used by lint and unit-test tasks. +ray==2.55.1 +lance-ray==0.4.2 +lance-namespace==0.7.5 diff --git a/clients/client-python/setup.py b/clients/client-python/setup.py index 18ea18e0e8d..4cdbd9302a1 100644 --- a/clients/client-python/setup.py +++ b/clients/client-python/setup.py @@ -56,6 +56,7 @@ install_requires=open("requirements.txt").read(), extras_require={ "dev": open("requirements-dev.txt").read(), + "lance": open("requirements-lance.txt").read(), }, include_package_data=True, ) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index aaf6c45ca3d..71f917b58ad 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -56,10 +56,17 @@ def _missing_lance_ray_deps() -> Optional[str]: return ", ".join(missing) if missing else None +# Compute once at module import time so the @skipIf condition and message +# don't trigger two rounds of import attempts. +_MISSING_LANCE_RAY_DEPS = _missing_lance_ray_deps() + + @unittest.skipIf( - _missing_lance_ray_deps() is not None, - f"lance-ray test deps not installed: {_missing_lance_ray_deps()}. " - "Install with: pip install ray lance-ray lance-namespace", + _MISSING_LANCE_RAY_DEPS is not None, + f"lance-ray test deps not installed: {_MISSING_LANCE_RAY_DEPS}. " + "Install with: pip install -e .[lance] (or pip install ray lance-ray " + "lance-namespace). Requires the Gravitino server to expose a lance-rest " + "auxiliary service backed by lance-namespace-core >= 0.7.5.", ) class TestLanceRayIntegration(IntegrationTestEnv): """End-to-end test for the lance-ray Python client against a Gravitino-backed @@ -285,12 +292,15 @@ def _wait_for_lance_rest_ready(timeout_s: float = 60.0) -> bool: def test_write_read_filter_via_lance_ray(self): # Imports are deferred so the skipIf decorator handles missing deps - # cleanly without import errors at module load time. - # pylint: disable=import-outside-toplevel + # cleanly without import errors at module load time. The lance/ray + # extras live in `requirements-lance.txt` (and `setup.py`'s `lance` + # extra), so they aren't present in the default `dev` install used by + # pylint — silence the resulting import-error. + # pylint: disable=import-outside-toplevel,import-error import ray from lance_ray import read_lance, write_lance - # pylint: enable=import-outside-toplevel + # pylint: enable=import-outside-toplevel,import-error ns_properties = {"uri": LANCE_REST_BASE_URL} table_id = [self.CATALOG_NAME, self.SCHEMA_NAME, self.TABLE_NAME] diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java index b71f444d654..4eb03078c04 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java @@ -426,62 +426,6 @@ public void testNamespaceExists() { Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); } - @Test - void testCreateEmptyTable() throws ApiException { - catalog = createCatalog(CATALOG_NAME); - createSchema(); - - DeclareTableRequest request = new DeclareTableRequest(); - String location = tempDir + "/" + "empty_table/"; - request.setLocation(location); - request.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "empty_table")); - - DeclareTableResponse response = ns.declareTable(request); - Assertions.assertNotNull(response); - Assertions.assertEquals(location, response.getLocation()); - - DescribeTableRequest describeTableRequest = new DescribeTableRequest(); - describeTableRequest.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "empty_table")); - - DescribeTableResponse loadTable = ns.describeTable(describeTableRequest); - Assertions.assertNotNull(loadTable); - Assertions.assertEquals(location, loadTable.getLocation()); - Assertions.assertEquals( - "true", loadTable.getMetadata().get(LanceConstants.LANCE_TABLE_CREATE_EMPTY)); - Assertions.assertEquals("true", loadTable.getMetadata().get(Table.PROPERTY_EXTERNAL)); - - // Try to create the same table again should fail - RuntimeException exception = - Assertions.assertThrows( - RuntimeException.class, - () -> { - ns.declareTable(request); - }); - Assertions.assertTrue(exception.getMessage().contains("\"code\":5")); - - // Create an empty table with non-existent location should succeed - // since storage is not touched - DeclareTableRequest wrongLocationRequest = new DeclareTableRequest(); - wrongLocationRequest.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "another_table")); - String another_location = tempDir + "/" + "another_location/"; - Assertions.assertFalse(new File(another_location).exists()); - wrongLocationRequest.setLocation(another_location); - response = ns.declareTable(wrongLocationRequest); - Assertions.assertNotNull(response); - Assertions.assertEquals(another_location, response.getLocation()); - // Will not touch storage, so the path should not be created. - Assertions.assertFalse(new File(another_location).exists()); - - // Create another empty table at a new location and verify it succeeds - String correctedLocation = tempDir + "/" + "wrong_location_table/"; - wrongLocationRequest.setLocation(correctedLocation); - wrongLocationRequest.setId(List.of(CATALOG_NAME, SCHEMA_NAME, "wrong_location_table")); - DeclareTableResponse wrongLocationResponse = - Assertions.assertDoesNotThrow(() -> ns.declareTable(wrongLocationRequest)); - Assertions.assertNotNull(wrongLocationResponse); - Assertions.assertEquals(correctedLocation, wrongLocationResponse.getLocation()); - } - @Test void testCreateTable() throws IOException { catalog = createCatalog(CATALOG_NAME); From 36b50dd9b0ab5fabefa64a4b8e47301172f88986 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 19 May 2026 08:00:32 +0800 Subject: [PATCH 14/21] fix(lance): remove deprecated create empty table API --- .../lakehouse/lance/LanceTableDelegator.java | 6 +- .../lakehouse/lance/LanceTableOperations.java | 8 +- .../test/CatalogGenericCatalogLanceIT.java | 11 +- docs/lance-rest-service.md | 31 ++--- .../common/ops/LanceTableOperations.java | 25 +--- .../GravitinoLanceTableOperations.java | 24 +--- .../lance/common/utils/LanceConstants.java | 2 +- .../service/rest/LanceTableOperations.java | 65 ---------- .../integration/test/LanceRESTServiceIT.java | 26 ++-- .../rest/TestLanceNamespaceOperations.java | 116 ------------------ 10 files changed, 39 insertions(+), 275 deletions(-) diff --git a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java index 0e9a9676dfd..a2c2c33c5db 100644 --- a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java +++ b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableDelegator.java @@ -20,7 +20,7 @@ import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_STORAGE_OPTIONS_PREFIX; -import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_CREATE_EMPTY; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_DECLARED; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_REGISTER; @@ -68,8 +68,8 @@ public List> tablePropertyEntries() { false /* hidden */, false /* reserved */), PropertyEntry.booleanPropertyEntry( - LANCE_TABLE_CREATE_EMPTY, - "Whether this is a lance create empty table (declare table) operation.", + LANCE_TABLE_DECLARED, + "Whether this is a Lance metadata-only declare table operation.", false, true /* immutable */, false /* defaultValue */, diff --git a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java index dccbfe8c04e..0813d4a4141 100644 --- a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java +++ b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java @@ -282,14 +282,14 @@ Table createTableInternal( ident, columns, comment, properties, partitions, distribution, sortOrders, indexes); } - // Check whether it's a create empty table operation. + // Check whether it's a metadata-only declare table operation. boolean createEmpty = - Optional.ofNullable(properties.get(LanceConstants.LANCE_TABLE_CREATE_EMPTY)) + Optional.ofNullable(properties.get(LanceConstants.LANCE_TABLE_DECLARED)) .map(Boolean::parseBoolean) .orElse(false); if (createEmpty) { - // For create empty table, we just create the table metadata in Gravitino without creating - // the underlying Lance dataset. + // For declare table, we just create the table metadata in Gravitino without creating the + // underlying Lance dataset. return super.createTable( ident, columns, comment, properties, partitions, distribution, sortOrders, indexes); } diff --git a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java index f5a98885030..d7b2b5f2181 100644 --- a/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java +++ b/catalogs/catalog-lakehouse-generic/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/integration/test/CatalogGenericCatalogLanceIT.java @@ -19,7 +19,7 @@ package org.apache.gravitino.catalog.lakehouse.lance.integration.test; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE; -import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_CREATE_EMPTY; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_DECLARED; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_REGISTER; @@ -162,7 +162,7 @@ public void testCrateEmptyTable() { String tableLocation = tempDirectory + "/" + tableName; properties.put("format", "lance"); properties.put("location", tableLocation); - properties.put(LANCE_TABLE_CREATE_EMPTY, "true"); + properties.put(LANCE_TABLE_DECLARED, "true"); properties.put(Table.PROPERTY_EXTERNAL, "true"); Table createdTable = @@ -178,7 +178,7 @@ public void testCrateEmptyTable() { null); Assertions.assertEquals(createdTable.name(), emptyTableName); - // Now try to alter the property LANCE_TABLE_CREATE_EMPTY + // Now try to alter the property LANCE_TABLE_DECLARED IllegalArgumentException e = Assertions.assertThrows( IllegalArgumentException.class, @@ -186,11 +186,10 @@ public void testCrateEmptyTable() { catalog .asTableCatalog() .alterTable( - nameIdentifier, - TableChange.setProperty(LANCE_TABLE_CREATE_EMPTY, "false"))); + nameIdentifier, TableChange.setProperty(LANCE_TABLE_DECLARED, "false"))); Assertions.assertTrue( - e.getMessage().contains("Property lance.create-empty is immutable or reserved")); + e.getMessage().contains("Property lance.declared is immutable or reserved")); } @Test diff --git a/docs/lance-rest-service.md b/docs/lance-rest-service.md index 5a299ac9249..c2e8b8698f1 100644 --- a/docs/lance-rest-service.md +++ b/docs/lance-rest-service.md @@ -81,8 +81,7 @@ The Lance REST service provides comprehensive support for namespace management, | TableExists | Check whether a table exists | POST | `/lance/v1/table/{id}/exists` | 1.1.0 | | RegisterTable | Register an existing Lance table to a namespace | POST | `/lance/v1/table/{id}/register` | 1.1.0 | | DeregisterTable | Unregister a table from a namespace (metadata only, data remains) | POST | `/lance/v1/table/{id}/deregister` | 1.1.0 | -| CreateEmptyTable | **Deprecated**: Use `DeclareTable` instead. Declare a table and store the metadata without touching lance table data, for more, please refer to [doc](https://docs.lancedb.com/api-reference/rest/table/create-an-empty-table) | POST | `/lance/v1/table/{id}/create-empty` | 1.1.0 | -| DeclareTable | Declare a table and store the metadata without touching lance table data. This is the preferred replacement for `CreateEmptyTable`. | POST | `/lance/v1/table/{id}/declare` | 1.3.0 | +| DeclareTable | Declare a table and store the metadata without touching lance table data. | POST | `/lance/v1/table/{id}/declare` | 1.3.0 | More details, please refer to the [Lance REST API specification](https://lance.org/format/namespace/rest/catalog-spec/) @@ -280,17 +279,7 @@ curl -X POST http://localhost:9101/lance/v1/table/lance_catalog%24schema%24table "mode": "create" }' -# Create a new empty table -# x-lance-table-properties is optional; if omitted, it defaults to an empty map. -curl -X POST http://localhost:9101/lance/v1/table/lance_catalog%24schema%24table02/create-empty \ - -H 'Content-Type: application/json' \ - -H "x-lance-table-properties: {\"description\":\"This is table02\"}" \ - -d '{ - "id": ["lance_catalog", "schema", "table02"], - "location": "/tmp/lance_catalog/schema/table02" - }' - -# Declare a table (preferred replacement for create-empty) +# Declare a table curl -X POST http://localhost:9101/lance/v1/table/lance_catalog%24schema%24table04/declare \ -H 'Content-Type: application/json' \ -d '{ @@ -350,11 +339,11 @@ registerTableRequest.setId(Arrays.asList("lance_catalog", "schema", "table01")); registerTableRequest.setMode("create"); ns.registerTable(registerTableRequest); -// Create an empty table -CreateEmptyTableRequest createEmptyTableRequest = new CreateEmptyTableRequest(); -createEmptyTableRequest.setLocation("/tmp/lance_catalog/schema/table02"); -createEmptyTableRequest.setId(Arrays.asList("lance_catalog", "schema", "table02")); -ns.createEmptyTable(createEmptyTableRequest); +// Declare a table +DeclareTableRequest declareTableRequest = new DeclareTableRequest(); +declareTableRequest.setLocation("/tmp/lance_catalog/schema/table02"); +declareTableRequest.setId(Arrays.asList("lance_catalog", "schema", "table02")); +ns.declareTable(declareTableRequest); // Create a table with schema inferred from Arrow IPC file. // For REST create API, location/properties are passed via headers. @@ -400,12 +389,12 @@ register_table_request = ln.RegisterTableRequest( ) ns.register_table(register_table_request) -# Create an empty table -create_empty_table_request = ln.CreateEmptyTableRequest( +# Declare a table +declare_table_request = ln.DeclareTableRequest( id=['lance_catalog', 'schema', 'table02'], location='/tmp/lance_catalog/schema/table02' ) -ns.create_empty_table(create_empty_table_request) +ns.declare_table(declare_table_request) # Create a table with schema inferred from Arrow IPC file. # For REST create API, location/properties are passed via headers. diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java index 11f6da9c7eb..7570de38a94 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -59,8 +59,7 @@ CreateTableResponse createTable( byte[] arrowStreamBody); /** - * Declare a table without touching storage. This is the preferred API for creating metadata-only - * table entries, replacing the deprecated {@link #createEmptyTable} method. + * Declare a table without touching storage. * * @param tableId table ids are in the format of "{namespace}{delimiter}{table_name}" * @param delimiter the delimiter used in the namespace @@ -71,28 +70,6 @@ CreateTableResponse createTable( DeclareTableResponse declareTable( String tableId, String delimiter, String tableLocation, Map tableProperties); - /** - * Create a new table without schema. - * - *

Slated for removal in 1.4.0. Upstream {@code lance-namespace-core} 0.7.5 already removed - * this operation from the {@code LanceNamespace} interface, and the compatibility matrix declares Gravitino - * 1.3.0 incompatible with the older clients that still call it (lance-ray < 0.3.0 and - * lance-spark < 0.1.0), so the legacy {@code /create-empty} REST endpoint and this method keep - * no reachable users. They are kept for one release as a courtesy for any direct HTTP caller and - * will be deleted once 1.3.0 has shipped. - * - * @param tableId table ids are in the format of "{namespace}{delimiter}{table_name}" - * @param delimiter the delimiter used in the namespace - * @param tableLocation the location where the table data will be stored - * @param tableProperties the properties of the table - * @return the response of the create table operation - * @deprecated Use {@link #declareTable} instead. To be removed in 1.4.0. - */ - @Deprecated - DeclareTableResponse createEmptyTable( - String tableId, String delimiter, String tableLocation, Map tableProperties); - /** * Register an existing table. * diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java index d3eb52fc9fe..e7409c2ee16 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java @@ -22,7 +22,7 @@ import static org.apache.gravitino.lance.common.ops.gravitino.LanceDataTypeConverter.CONVERTER; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_LOCATION; -import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_CREATE_EMPTY; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_DECLARED; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_FORMAT; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_VERSION; import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET; @@ -196,7 +196,7 @@ public DeclareTableResponse declareTable( ImmutableMap props = ImmutableMap.builder() .putAll(tableProperties) - .put(LANCE_TABLE_CREATE_EMPTY, "true") + .put(LANCE_TABLE_DECLARED, "true") .put(Table.PROPERTY_EXTERNAL, "true") .build(); @@ -208,26 +208,6 @@ public DeclareTableResponse declareTable( return declareTableResponse; } - @Override - @SuppressWarnings("deprecation") - public DeclareTableResponse createEmptyTable( - String tableId, String delimiter, String tableLocation, Map tableProperties) { - // Empty table creation only supports CREATE mode (not EXIST_OK or OVERWRITE). - ImmutableMap props = - ImmutableMap.builder() - .putAll(tableProperties) - .put(LANCE_TABLE_CREATE_EMPTY, "true") - .put(Table.PROPERTY_EXTERNAL, "true") - .build(); - - CreateTableResponse response = - createTable(tableId, "create", delimiter, tableLocation, props, null); - DeclareTableResponse emptyTableResponse = new DeclareTableResponse(); - emptyTableResponse.setLocation(response.getLocation()); - emptyTableResponse.setStorageOptions(response.getStorageOptions()); - return emptyTableResponse; - } - @Override public RegisterTableResponse registerTable( String tableId, String mode, String delimiter, Map tableProperties) { diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java index 991c6b8f261..3ad473c0b48 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java @@ -39,7 +39,7 @@ public class LanceConstants { public static final String LANCE_TABLE_VERSION = "lance.version"; // Mark whether it is to create an empty Lance table(no data files) - public static final String LANCE_TABLE_CREATE_EMPTY = "lance.create-empty"; + public static final String LANCE_TABLE_DECLARED = "lance.declared"; public static final String LANCE_TABLE_FORMAT = "lance"; } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 7be8a1168c1..3df1a7ff5a4 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -133,50 +133,6 @@ public Response createTable( } } - /** - * Legacy create-empty endpoint kept for clients that still call it. It only records table - * metadata, including its location, and does not touch Lance storage. - * - *

Slated for removal in 1.4.0 — upstream {@code lance-namespace-core} 0.7.5 dropped this - * operation, and lance-ray < 0.3.0 / lance-spark < 0.1.0 (the only clients that could still - * hit it) are already declared incompatible with Gravitino 1.3.0 in the compatibility matrix. Kept for one - * release as a courtesy to any direct HTTP caller. - */ - @POST - @Path("/create-empty") - @Produces("application/json") - @Timed(name = "create-empty-table." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) - @ResponseMetered(name = "create-empty-table", absolute = true) - @SuppressWarnings("deprecation") - public Response createEmptyTable( - @PathParam("id") String tableId, - @QueryParam("delimiter") @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) String delimiter, - Map requestBody, - @Context HttpHeaders headers) { - try { - validateCreateEmptyTableRequest(requestBody); - String tableLocation = - Optional.ofNullable(requestBody) - .map(body -> body.get(LANCE_LOCATION)) - .map(String::valueOf) - .orElse(null); - Map props = extractPropertiesFromBody(requestBody); - MultivaluedMap headersMap = headers.getRequestHeaders(); - String tableProperties = headersMap.getFirst(LANCE_TABLE_PROPERTIES_PREFIX_HEADER); - Map headerProps = SerializationUtils.deserializeProperties(tableProperties); - // Keep backward compatibility: accept body properties and let header override on key - // conflict. - props.putAll(headerProps); - - DeclareTableResponse response = - lanceNamespace.asTableOps().createEmptyTable(tableId, delimiter, tableLocation, props); - return Response.ok(response).build(); - } catch (Exception e) { - return LanceExceptionMapper.toRESTResponse(tableId, e); - } - } - @POST @Path("/declare") @Produces("application/json") @@ -336,32 +292,11 @@ public Response alterColumns( } } - @SuppressWarnings({"unused", "deprecation"}) - private void validateCreateEmptyTableRequest(Map requestBody) { - // No specific fields to validate for now - } - private void validateDeclareTableRequest( @SuppressWarnings("unused") DeclareTableRequest request) { // No specific fields to validate for now } - private static Map extractPropertiesFromBody(Map requestBody) { - if (requestBody == null) { - return Maps.newHashMap(); - } - - Object propertiesObject = requestBody.get("properties"); - if (!(propertiesObject instanceof Map)) { - return Maps.newHashMap(); - } - - Map properties = Maps.newHashMap(); - ((Map) propertiesObject) - .forEach((key, value) -> properties.put(String.valueOf(key), String.valueOf(value))); - return properties; - } - private void validateRegisterTableRequest( @SuppressWarnings("unused") RegisterTableRequest request) { // No specific fields to validate for now diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java index 4eb03078c04..05ee768f207 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java @@ -797,13 +797,13 @@ void testDeregisterNonExistingTable() { Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); Assertions.assertTrue(exception.getMessage().contains("Table not found")); // Try to create a table and then deregister table - DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); + DeclareTableRequest declareTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "to_be_deregistered_table/"; ids = List.of(CATALOG_NAME, SCHEMA_NAME, "to_be_deregistered_table"); - createEmptyTableRequest.setLocation(location); - createEmptyTableRequest.setId(ids); + declareTableRequest.setLocation(location); + declareTableRequest.setId(ids); DeclareTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); + Assertions.assertDoesNotThrow(() -> ns.declareTable(declareTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -841,12 +841,12 @@ void testTableExists() { createSchema(); List ids = List.of(CATALOG_NAME, SCHEMA_NAME, "table_exists"); - DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); + DeclareTableRequest declareTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "table_exists/"; - createEmptyTableRequest.setLocation(location); - createEmptyTableRequest.setId(ids); + declareTableRequest.setLocation(location); + declareTableRequest.setId(ids); DeclareTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); + Assertions.assertDoesNotThrow(() -> ns.declareTable(declareTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -870,12 +870,12 @@ void testDropTable() { createSchema(); List ids = List.of(CATALOG_NAME, SCHEMA_NAME, "table_to_drop"); - DeclareTableRequest createEmptyTableRequest = new DeclareTableRequest(); + DeclareTableRequest declareTableRequest = new DeclareTableRequest(); String location = tempDir + "/" + "table_to_drop/"; - createEmptyTableRequest.setLocation(location); - createEmptyTableRequest.setId(ids); + declareTableRequest.setLocation(location); + declareTableRequest.setId(ids); DeclareTableResponse response = - Assertions.assertDoesNotThrow(() -> ns.declareTable(createEmptyTableRequest)); + Assertions.assertDoesNotThrow(() -> ns.declareTable(declareTableRequest)); Assertions.assertNotNull(response); Assertions.assertEquals(location, response.getLocation()); @@ -920,7 +920,7 @@ void testDeclareTable() { Assertions.assertNotNull(loadTable); Assertions.assertEquals(location, loadTable.getLocation()); Assertions.assertEquals( - "true", loadTable.getMetadata().get(LanceConstants.LANCE_TABLE_CREATE_EMPTY)); + "true", loadTable.getMetadata().get(LanceConstants.LANCE_TABLE_DECLARED)); Assertions.assertEquals("true", loadTable.getMetadata().get(Table.PROPERTY_EXTERNAL)); // Try to declare the same table again should fail diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java index 9f6d355e9f8..644affb4091 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java @@ -19,7 +19,6 @@ package org.apache.gravitino.lance.service.rest; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; @@ -443,121 +442,6 @@ void testCreateTable() { Assertions.assertEquals("Runtime exception", errorResp.getError()); } - @Test - void testCreateEmptyTable() { - String tableIds = "catalog.scheme.create_empty_table"; - String delimiter = "."; - - // Test normal - DeclareTableResponse createTableResponse = new DeclareTableResponse(); - createTableResponse.setLocation("/path/to/table"); - createTableResponse.setStorageOptions(ImmutableMap.of("key", "value")); - when(tableOps.createEmptyTable(any(), any(), any(), any())).thenReturn(createTableResponse); - - DeclareTableRequest tableRequest = new DeclareTableRequest(); - tableRequest.setLocation("/path/to/table"); - - Response resp = - target(String.format("/v1/table/%s/create-empty", tableIds)) - .queryParam("delimiter", delimiter) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(tableRequest, MediaType.APPLICATION_JSON_TYPE)); - - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); - Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); - DeclareTableResponse response = resp.readEntity(DeclareTableResponse.class); - Assertions.assertEquals(createTableResponse.getLocation(), response.getLocation()); - Assertions.assertEquals(createTableResponse.getStorageOptions(), response.getStorageOptions()); - - Mockito.verify(tableOps) - .createEmptyTable(eq(tableIds), eq(delimiter), eq("/path/to/table"), eq(Map.of())); - - // Backward compatibility: request-body properties should still be accepted. - Mockito.reset(tableOps); - when(tableOps.createEmptyTable(any(), any(), any(), any())).thenReturn(createTableResponse); - String bodyWithProperties = - "{" - + "\"id\":[\"catalog\",\"scheme\",\"create_empty_table\"]," - + "\"location\":\"/path/to/table\"," - + "\"properties\":{\"k1\":\"v1\",\"k2\":2}" - + "}"; - resp = - target(String.format("/v1/table/%s/create-empty", tableIds)) - .queryParam("delimiter", delimiter) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(bodyWithProperties, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); - Mockito.verify(tableOps) - .createEmptyTable( - eq(tableIds), - eq(delimiter), - eq("/path/to/table"), - argThat( - (Map props) -> - "v1".equals(props.get("k1")) - && "2".equals(props.get("k2")) - && props.size() == 2)); - - // Header properties should override body properties on key conflicts. - Mockito.reset(tableOps); - when(tableOps.createEmptyTable(any(), any(), any(), any())).thenReturn(createTableResponse); - String bodyWithOverlappedProperties = - "{" - + "\"id\":[\"catalog\",\"scheme\",\"create_empty_table\"]," - + "\"location\":\"/path/to/table\"," - + "\"properties\":{\"k1\":\"body\",\"k2\":\"body2\"}" - + "}"; - resp = - target(String.format("/v1/table/%s/create-empty", tableIds)) - .queryParam("delimiter", delimiter) - .request(MediaType.APPLICATION_JSON_TYPE) - .header( - LanceConstants.LANCE_TABLE_PROPERTIES_PREFIX_HEADER, - "{\"k1\":\"header\",\"k3\":\"v3\"}") - .post(Entity.entity(bodyWithOverlappedProperties, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); - Mockito.verify(tableOps) - .createEmptyTable( - eq(tableIds), - eq(delimiter), - eq("/path/to/table"), - argThat( - (Map props) -> - "header".equals(props.get("k1")) - && "body2".equals(props.get("k2")) - && "v3".equals(props.get("k3")) - && props.size() == 3)); - - Mockito.reset(tableOps); - // Test illegal argument - when(tableOps.createEmptyTable(any(), any(), any(), any())) - .thenThrow(new IllegalArgumentException("Illegal argument")); - - resp = - target(String.format("/v1/table/%s/create-empty", tableIds)) - .queryParam("delimiter", delimiter) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(tableRequest, MediaType.APPLICATION_JSON_TYPE)); - Assertions.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), resp.getStatus()); - Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); - - // Test runtime exception - Mockito.reset(tableOps); - when(tableOps.createEmptyTable(any(), any(), any(), any())) - .thenThrow(new RuntimeException("Runtime exception")); - resp = - target(String.format("/v1/table/%s/create-empty", tableIds)) - .queryParam("delimiter", delimiter) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(tableRequest, MediaType.APPLICATION_JSON_TYPE)); - - Assertions.assertEquals( - Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), resp.getStatus()); - Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); - ErrorResponse errorResp = resp.readEntity(ErrorResponse.class); - Assertions.assertEquals("Runtime exception", errorResp.getError()); - } - @Test void testRegisterTable() { String tableIds = "catalog.scheme.register_table"; From db91351e3e36f7895b961c770909af6efcaa25e7 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 19 May 2026 08:06:04 +0800 Subject: [PATCH 15/21] fix(python): avoid pinning lance namespace --- clients/client-python/requirements-lance.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clients/client-python/requirements-lance.txt b/clients/client-python/requirements-lance.txt index 79a84a91a2a..482aeef75f6 100644 --- a/clients/client-python/requirements-lance.txt +++ b/clients/client-python/requirements-lance.txt @@ -15,11 +15,10 @@ # specific language governing permissions and limitations # under the License. -# Lance integration deps. Pinned so the lance-ray integration test runs -# against a single, known-good server-side `lance-namespace-core` 0.7.5+ -# combination. Installed via the `lance` extra (e.g. `pip install -e .[lance]`) -# so the heavy ray/pylance native wheels don't slow down the default `dev` -# install used by lint and unit-test tasks. +# Lance integration deps. `lance-ray` owns the compatible `lance-namespace` +# dependency, so do not pin `lance-namespace` here separately. Installed via +# the `lance` extra (e.g. `pip install -e .[lance]`) so the heavy ray/pylance +# native wheels don't slow down the default `dev` install used by lint and +# unit-test tasks. ray==2.55.1 lance-ray==0.4.2 -lance-namespace==0.7.5 From a0f91052ede22b10c56fdf6d2d4eb73b08f330b0 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 19 May 2026 19:57:05 +0800 Subject: [PATCH 16/21] fix(lance): handle namespace 0.7 table parameters --- .../common/ops/LanceTableOperations.java | 4 +- .../GravitinoLanceTableOperations.java | 11 +++- .../common/utils/LancePropertiesUtils.java | 10 +++ .../service/rest/LanceTableOperations.java | 18 +++++- .../TestGravitinoLanceModeParsing.java | 61 +++++++++++++++++++ .../rest/TestLanceNamespaceOperations.java | 40 ++++++++++-- 6 files changed, 135 insertions(+), 9 deletions(-) diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java index 7570de38a94..6bdfcb50548 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -35,9 +35,11 @@ public interface LanceTableOperations { * @param tableId table ids are in the format of "{namespace}{delimiter}{table_name}" * @param delimiter the delimiter used in the namespace * @param version the version of the table to describe, if null, describe the latest version + * @param checkDeclared whether to populate the is_only_declared response field * @return the table description */ - DescribeTableResponse describeTable(String tableId, String delimiter, Optional version); + DescribeTableResponse describeTable( + String tableId, String delimiter, Optional version, boolean checkDeclared); /** * Create a new table. diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java index e7409c2ee16..21a53a86007 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceTableOperations.java @@ -99,7 +99,7 @@ public GravitinoLanceTableOperations(GravitinoLanceNamespaceWrapper namespaceWra @Override public DescribeTableResponse describeTable( - String tableId, String delimiter, Optional version) { + String tableId, String delimiter, Optional version, boolean checkDeclared) { if (!version.isEmpty()) { throw new UnsupportedOperationException( "Describing specific table version is not supported. It should be null to indicate the" @@ -124,6 +124,7 @@ public DescribeTableResponse describeTable( } DescribeTableResponse response = new DescribeTableResponse(); response.setMetadata(table.properties()); + response.setProperties(table.properties()); response.setLocation(table.properties().get(LANCE_LOCATION)); response.setSchema(toJsonArrowSchema(table.columns())); response.setVersion( @@ -132,6 +133,11 @@ public DescribeTableResponse describeTable( .orElse(null)); response.setStorageOptions( LancePropertiesUtils.resolveLanceStorageOptions(catalog.properties(), table.properties())); + response.setManagedVersioning(false); + if (checkDeclared) { + response.setIsOnlyDeclared( + Boolean.parseBoolean(table.properties().getOrDefault(LANCE_TABLE_DECLARED, "false"))); + } return response; } @@ -187,6 +193,7 @@ public CreateTableResponse createTable( response.setVersion( Optional.ofNullable(properties.get(LANCE_TABLE_VERSION)).map(Long::valueOf).orElse(null)); response.setLocation(properties.get(LANCE_LOCATION)); + response.setProperties(properties); return response; } @@ -205,6 +212,8 @@ public DeclareTableResponse declareTable( DeclareTableResponse declareTableResponse = new DeclareTableResponse(); declareTableResponse.setLocation(response.getLocation()); declareTableResponse.setStorageOptions(response.getStorageOptions()); + declareTableResponse.setProperties(response.getProperties()); + declareTableResponse.setManagedVersioning(false); return declareTableResponse; } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java index 3555fa3f99a..e4129b40b77 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java @@ -68,4 +68,14 @@ public static Map resolveLanceStorageOptions( effectiveStorageOptions.putAll(getLanceStorageOptions(tableProperties)); return effectiveStorageOptions; } + + public static Map toTableProperties(Map storageOptions) { + if (storageOptions == null) { + return Map.of(); + } + + return storageOptions.entrySet().stream() + .collect( + Collectors.toMap(e -> LANCE_STORAGE_OPTIONS_PREFIX + e.getKey(), Map.Entry::getValue)); + } } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 3df1a7ff5a4..5114b475c31 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -45,6 +45,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; import org.apache.gravitino.lance.common.utils.LanceConstants; +import org.apache.gravitino.lance.common.utils.LancePropertiesUtils; import org.apache.gravitino.lance.common.utils.SerializationUtils; import org.apache.gravitino.lance.service.LanceExceptionMapper; import org.apache.gravitino.metrics.MetricNames; @@ -86,13 +87,19 @@ public LanceTableOperations(NamespaceWrapper lanceNamespace) { public Response describeTable( @PathParam("id") String tableId, @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, + @QueryParam("check_declared") Boolean checkDeclared, DescribeTableRequest request) { try { validateDescribeTableRequest(request); + Optional version = + request == null ? Optional.empty() : Optional.ofNullable(request.getVersion()); + boolean shouldCheckDeclared = + Optional.ofNullable(checkDeclared) + .orElse(request != null && Boolean.TRUE.equals(request.getCheckDeclared())); DescribeTableResponse response = lanceNamespace .asTableOps() - .describeTable(tableId, delimiter, Optional.ofNullable(request.getVersion())); + .describeTable(tableId, delimiter, version, shouldCheckDeclared); return Response.ok(response).build(); } catch (Exception e) { return LanceExceptionMapper.toRESTResponse(tableId, e); @@ -109,6 +116,8 @@ public Response createTable( @PathParam("id") String tableId, @QueryParam("mode") @DefaultValue("create") String mode, // create, exist_ok, overwrite @QueryParam("delimiter") @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) String delimiter, + @QueryParam("properties") String queryProperties, + @QueryParam("storage_options") String queryStorageOptions, @Context HttpHeaders headers, byte[] arrowStreamBody) { try { @@ -117,6 +126,10 @@ public Response createTable( String tableLocation = headersMap.getFirst(LANCE_TABLE_LOCATION_HEADER); String tableProperties = headersMap.getFirst(LANCE_TABLE_PROPERTIES_PREFIX_HEADER); Map props = SerializationUtils.deserializeProperties(tableProperties); + props.putAll(SerializationUtils.deserializeProperties(queryProperties)); + props.putAll( + LancePropertiesUtils.toTableProperties( + SerializationUtils.deserializeProperties(queryStorageOptions))); CreateTableResponse response = lanceNamespace .asTableOps() @@ -150,6 +163,9 @@ public Response declareTable( MultivaluedMap headersMap = headers.getRequestHeaders(); String tableProperties = headersMap.getFirst(LANCE_TABLE_PROPERTIES_PREFIX_HEADER); Map props = SerializationUtils.deserializeProperties(tableProperties); + if (declareTableRequest.getProperties() != null) { + props.putAll(declareTableRequest.getProperties()); + } DeclareTableResponse response = lanceNamespace.asTableOps().declareTable(tableId, delimiter, tableLocation, props); diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceModeParsing.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceModeParsing.java index 4ac063a4060..cadfccebdfa 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceModeParsing.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/common/ops/gravitino/TestGravitinoLanceModeParsing.java @@ -20,12 +20,16 @@ import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_CREATION_MODE; import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_LOCATION; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_STORAGE_OPTIONS_PREFIX; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_DECLARED; +import static org.apache.gravitino.lance.common.utils.LanceConstants.LANCE_TABLE_VERSION; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.isNull; import static org.mockito.Mockito.when; import java.util.Map; +import java.util.Optional; import java.util.regex.Pattern; import org.apache.gravitino.Catalog; import org.apache.gravitino.NameIdentifier; @@ -35,6 +39,8 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.model.CreateTableResponse; +import org.lance.namespace.model.DescribeTableResponse; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -162,6 +168,61 @@ void testRegisterModeRejectsMalformedValues() { exception.getMessage().contains("Unknown register table mode: #register$")); } + @Test + void testCreateTableReturnsPropertiesAndStorageOptions() { + TableCatalog tableCatalog = Mockito.mock(TableCatalog.class); + Table table = Mockito.mock(Table.class); + when(table.properties()) + .thenReturn( + Map.of( + "custom-key", + "custom-value", + LANCE_LOCATION, + "/tmp/table", + LANCE_TABLE_VERSION, + "5", + LANCE_STORAGE_OPTIONS_PREFIX + "region", + "us-west-2")); + when(tableCatalog.createTable( + any(NameIdentifier.class), any(Column[].class), isNull(), anyMap())) + .thenReturn(table); + GravitinoLanceTableOperations operations = newTableOperations(tableCatalog); + + CreateTableResponse response = + operations.createTable("catalog.schema.table", "create", ".", null, Map.of(), null); + + Assertions.assertEquals("/tmp/table", response.getLocation()); + Assertions.assertEquals(5L, response.getVersion()); + Assertions.assertEquals("custom-value", response.getProperties().get("custom-key")); + Assertions.assertEquals("us-west-2", response.getStorageOptions().get("region")); + } + + @Test + void testDescribeTableReturnsDeclaredStateWhenRequested() { + TableCatalog tableCatalog = Mockito.mock(TableCatalog.class); + Table table = Mockito.mock(Table.class); + when(table.properties()) + .thenReturn( + Map.of( + LANCE_LOCATION, + "/tmp/table", + LANCE_TABLE_DECLARED, + "true", + LANCE_STORAGE_OPTIONS_PREFIX + "region", + "us-west-2")); + when(table.columns()).thenReturn(new Column[0]); + when(tableCatalog.loadTable(any(NameIdentifier.class))).thenReturn(table); + GravitinoLanceTableOperations operations = newTableOperations(tableCatalog); + + DescribeTableResponse response = + operations.describeTable("catalog.schema.table", ".", Optional.empty(), true); + + Assertions.assertEquals(Boolean.TRUE, response.getIsOnlyDeclared()); + Assertions.assertEquals(Boolean.FALSE, response.getManagedVersioning()); + Assertions.assertEquals("true", response.getProperties().get(LANCE_TABLE_DECLARED)); + Assertions.assertEquals("us-west-2", response.getStorageOptions().get("region")); + } + private static GravitinoLanceTableOperations newTableOperations(TableCatalog tableCatalog) { GravitinoLanceNamespaceWrapper namespaceWrapper = Mockito.mock(GravitinoLanceNamespaceWrapper.class); diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java index 644affb4091..b5f7a845c4e 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java @@ -19,6 +19,7 @@ package org.apache.gravitino.lance.service.rest; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; @@ -32,8 +33,10 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import java.io.IOException; +import java.net.URI; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.regex.Pattern; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.client.Entity; @@ -395,6 +398,7 @@ public void testDropNamespace() { @Test void testCreateTable() { + Mockito.reset(tableOps); String tableIds = "catalog.scheme.create_table"; String delimiter = "."; @@ -405,13 +409,27 @@ void testCreateTable() { byte[] bytes = new byte[] {0x01, 0x02, 0x03}; Response resp = - target(String.format("/v1/table/%s/create", tableIds)) - .queryParam("delimiter", delimiter) + client() + .target( + URI.create( + getBaseUri() + + String.format("v1/table/%s/create", tableIds) + + "?delimiter=." + + "&properties=%7B%22custom%22%3A%22value%22%7D" + + "&storage_options=%7B%22region%22%3A%22us-west-2%22%7D")) .request(MediaType.APPLICATION_JSON_TYPE) .post(Entity.entity(bytes, "application/vnd.apache.arrow.stream")); Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); + Mockito.verify(tableOps) + .createTable( + eq(tableIds), + eq("create"), + eq(delimiter), + eq(null), + eq(Map.of("custom", "value", "lance.storage.region", "us-west-2")), + eq(bytes)); // Test illegal argument when(tableOps.createTable(any(), any(), any(), any(), any(), any())) @@ -607,6 +625,7 @@ void testDeregisterTable() { @Test void testDescribeTable() { + Mockito.reset(tableOps); String tableIds = "catalog.scheme.describe_table"; String delimiter = "."; @@ -614,12 +633,13 @@ void testDescribeTable() { DescribeTableResponse createTableResponse = new DescribeTableResponse(); createTableResponse.setLocation("/path/to/describe_table"); createTableResponse.setMetadata(ImmutableMap.of("key", "value")); - when(tableOps.describeTable(any(), any(), any())).thenReturn(createTableResponse); + when(tableOps.describeTable(any(), any(), any(), anyBoolean())).thenReturn(createTableResponse); DescribeTableRequest tableRequest = new DescribeTableRequest(); Response resp = target(String.format("/v1/table/%s/describe", tableIds)) .queryParam("delimiter", delimiter) + .queryParam("check_declared", "true") .request(MediaType.APPLICATION_JSON_TYPE) .post(Entity.entity(tableRequest, MediaType.APPLICATION_JSON_TYPE)); @@ -628,10 +648,12 @@ void testDescribeTable() { DescribeTableResponse response = resp.readEntity(DescribeTableResponse.class); Assertions.assertEquals(createTableResponse.getLocation(), response.getLocation()); Assertions.assertEquals(createTableResponse.getMetadata(), response.getMetadata()); + Mockito.verify(tableOps) + .describeTable(eq(tableIds), eq(delimiter), eq(Optional.empty()), eq(true)); // Test not found exception Mockito.reset(tableOps); - when(tableOps.describeTable(any(), any(), any())) + when(tableOps.describeTable(any(), any(), any(), anyBoolean())) .thenThrow(new TableNotFoundException("Table not found", "", tableIds)); resp = target(String.format("/v1/table/%s/describe", tableIds)) @@ -642,7 +664,7 @@ void testDescribeTable() { // Test runtime exception Mockito.reset(tableOps); - when(tableOps.describeTable(any(), any(), any())) + when(tableOps.describeTable(any(), any(), any(), anyBoolean())) .thenThrow(new RuntimeException("Runtime exception")); resp = target(String.format("/v1/table/%s/describe", tableIds)) @@ -916,6 +938,7 @@ void testAlterColumns() { @Test void testDeclareTable() { + Mockito.reset(tableOps); String tableIds = "catalog.scheme.declare_table"; String delimiter = "."; @@ -927,6 +950,7 @@ void testDeclareTable() { DeclareTableRequest tableRequest = new DeclareTableRequest(); tableRequest.setLocation("/path/to/table"); + tableRequest.setProperties(Map.of("declared-key", "declared-value")); Response resp = target(String.format("/v1/table/%s/declare", tableIds)) @@ -941,7 +965,11 @@ void testDeclareTable() { Assertions.assertEquals(declareTableResponse.getStorageOptions(), response.getStorageOptions()); Mockito.verify(tableOps) - .declareTable(eq(tableIds), eq(delimiter), eq("/path/to/table"), eq(Map.of())); + .declareTable( + eq(tableIds), + eq(delimiter), + eq("/path/to/table"), + eq(Map.of("declared-key", "declared-value"))); // Test illegal argument Mockito.reset(tableOps); From c3e6a47171981e555d1554911fb269999f4f1392 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 20 May 2026 14:30:07 +0800 Subject: [PATCH 17/21] feat(lance): upgrade lance-core to 6.0.0 --- gradle/libs.versions.toml | 2 +- .../integration/test/LanceRESTServiceIT.java | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 49f9ffcb1b1..8a7228a3cff 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -28,7 +28,7 @@ guava = "32.1.3-jre" lombok = "1.18.20" slf4j = "2.0.16" log4j = "2.25.4" -lance = "4.0.1" +lance = "6.0.0" lance-namespace = "0.7.5" delta-kernel = "3.3.0" jetty = "9.4.58.v20250814" diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java index 05ee768f207..40798407e41 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/integration/test/LanceRESTServiceIT.java @@ -219,7 +219,7 @@ public void testDescribeNamespace() { RuntimeException exception = Assertions.assertThrows( RuntimeException.class, () -> ns.describeNamespace(nonExistentCatalogReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); // test describe a non-existent schema namespace DescribeNamespaceRequest nonExistentSchemaReq = new DescribeNamespaceRequest(); @@ -228,7 +228,7 @@ public void testDescribeNamespace() { exception = Assertions.assertThrows( RuntimeException.class, () -> ns.describeNamespace(nonExistentSchemaReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); } @Test @@ -254,7 +254,7 @@ public void testCreateNamespace() { RuntimeException exception = Assertions.assertThrows( RuntimeException.class, () -> ns.createNamespace(createNamespaceReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":2")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_ALREADY_EXISTS); // create catalog again with exist_ok mode should succeed createNamespaceReq.setMode("exist_ok"); @@ -296,7 +296,7 @@ public void testCreateNamespace() { // create schema again with default mode (create) should fail exception = Assertions.assertThrows(RuntimeException.class, () -> ns.createNamespace(createSchemaReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":2")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_ALREADY_EXISTS); // create schema again with exist_ok mode should succeed createSchemaReq.setMode("exist_ok"); @@ -328,7 +328,7 @@ public void testDropNamespace() { dropNamespaceReq.addIdItem("non_existent_catalog"); RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> ns.dropNamespace(dropNamespaceReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); // test drop a non-existent namespace (catalog) with SKIP mode should succeed dropNamespaceReq.setMode("skip"); @@ -341,7 +341,7 @@ public void testDropNamespace() { dropSchemaReq.addIdItem("non_existent_schema"); exception = Assertions.assertThrows(RuntimeException.class, () -> ns.dropNamespace(dropSchemaReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); // test drop a non-existent namespace (schema) with SKIP mode should succeed dropSchemaReq.setMode("skip"); @@ -354,7 +354,7 @@ public void testDropNamespace() { exception = Assertions.assertThrows( RuntimeException.class, () -> ns.dropNamespace(dropNonEmptyCatalogReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":13")); + assertLanceErrorCode(exception, ErrorCode.INVALID_INPUT); // test drop a non-empty namespace (catalog) with CASCADE behavior should succeed dropNonEmptyCatalogReq.setBehavior("cascade"); @@ -382,7 +382,7 @@ public void testDropNamespace() { exception = Assertions.assertThrows( RuntimeException.class, () -> ns.dropNamespace(dropNonEmptySchemaReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":13")); + assertLanceErrorCode(exception, ErrorCode.INVALID_INPUT); Assertions.assertTrue(catalog.asSchemas().schemaExists(schema.name())); // test drop a non-empty namespace (schema) with CASCADE behavior should succeed @@ -408,7 +408,7 @@ public void testNamespaceExists() { RuntimeException exception = Assertions.assertThrows( RuntimeException.class, () -> ns.namespaceExists(nonExistentCatalogReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); // test existing schema NamespaceExistsRequest schemaExistsReq = new NamespaceExistsRequest(); @@ -423,7 +423,7 @@ public void testNamespaceExists() { exception = Assertions.assertThrows( RuntimeException.class, () -> ns.namespaceExists(nonExistentSchemaReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":1")); + assertLanceErrorCode(exception, ErrorCode.NAMESPACE_NOT_FOUND); } @Test @@ -794,7 +794,7 @@ void testDeregisterNonExistingTable() { RuntimeException exception = Assertions.assertThrows( RuntimeException.class, () -> ns.deregisterTable(deregisterTableRequest)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); + assertLanceErrorCode(exception, ErrorCode.TABLE_NOT_FOUND); Assertions.assertTrue(exception.getMessage().contains("Table not found")); // Try to create a table and then deregister table DeclareTableRequest declareTableRequest = new DeclareTableRequest(); @@ -823,7 +823,7 @@ void testDeregisterNonExistingTable() { RuntimeException describeException = Assertions.assertThrows( RuntimeException.class, () -> ns.describeTable(describeTableRequest)); - Assertions.assertTrue(describeException.getMessage().contains("\"code\":4")); + assertLanceErrorCode(describeException, ErrorCode.TABLE_NOT_FOUND); describeTableRequest.setVersion(1L); RuntimeException versionException = @@ -860,8 +860,8 @@ void testTableExists() { tableExistsReq.setId(nonExistingIds); RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> ns.tableExists(tableExistsReq)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); - Assertions.assertTrue(exception.getMessage().contains("Not Found")); + assertLanceErrorCode(exception, ErrorCode.TABLE_NOT_FOUND); + Assertions.assertTrue(exception.getMessage().contains("Table not found")); } @Test @@ -890,13 +890,13 @@ void testDropTable() { RuntimeException exception = Assertions.assertThrows( RuntimeException.class, () -> ns.describeTable(describeTableRequest)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); + assertLanceErrorCode(exception, ErrorCode.TABLE_NOT_FOUND); // Drop a non-existing table should fail dropTableRequest.setId(ids); exception = Assertions.assertThrows(RuntimeException.class, () -> ns.dropTable(dropTableRequest)); - Assertions.assertTrue(exception.getMessage().contains("\"code\":4")); + assertLanceErrorCode(exception, ErrorCode.TABLE_NOT_FOUND); } @Test @@ -930,7 +930,7 @@ void testDeclareTable() { () -> { ns.declareTable(request); }); - Assertions.assertTrue(declareException.getMessage().contains("\"code\":5")); + assertLanceErrorCode(declareException, ErrorCode.TABLE_ALREADY_EXISTS); // Declare a table with non-existent location should succeed // since storage is not touched @@ -1002,6 +1002,13 @@ private static LanceNamespaceException toLanceNamespaceException(ApiException e) return new LanceNamespaceException(ErrorCode.INTERNAL, e.getMessage(), e); } + private static void assertLanceErrorCode( + RuntimeException exception, ErrorCode expectedErrorCode) { + Assertions.assertInstanceOf(LanceNamespaceException.class, exception); + Assertions.assertEquals( + expectedErrorCode.getCode(), ((LanceNamespaceException) exception).getCode()); + } + private TableApi createTableApi() { ApiClient apiClient = new ApiClient().setBasePath(getLanceRestServiceUrl()); return new TableApi(apiClient); From 4b4442d489220ed68f09e53b3bcb791ef011f1af Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 20 May 2026 16:06:40 +0800 Subject: [PATCH 18/21] fix(lance): address review comments --- .../tests/integration/test_lance_ray.py | 7 ++---- core/build.gradle.kts | 2 +- .../LancePartitionStatisticStorage.java | 8 +++++-- .../TestLancePartitionStatisticStorage.java | 3 ++- .../common/utils/LancePropertiesUtils.java | 22 +++++++++++++------ 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/clients/client-python/tests/integration/test_lance_ray.py b/clients/client-python/tests/integration/test_lance_ray.py index 71f917b58ad..d94ee69d8f8 100644 --- a/clients/client-python/tests/integration/test_lance_ray.py +++ b/clients/client-python/tests/integration/test_lance_ray.py @@ -92,11 +92,8 @@ class TestLanceRayIntegration(IntegrationTestEnv): def setUpClass(cls): super().setUpClass() - gravitino_home = os.environ.get("GRAVITINO_HOME") - if not gravitino_home: - raise RuntimeError( - "GRAVITINO_HOME must be set to the distribution package directory" - ) + cls._get_gravitino_home() + gravitino_home = cls.gravitino_home cls.main_conf_path = os.path.join(gravitino_home, MAIN_CONF_FILE) # Bind the lance-rest aux service to our test metalake. If the same diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 58afc132d8c..e92a7f52ab2 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -54,7 +54,7 @@ dependencies { exclude(group = "com.fasterxml.jackson.jaxrs", module = "jackson-jaxrs-json-provider") // using gravitino's version exclude(group = "org.apache.httpcomponents.client5", module = "*") // provided by gravitino exclude(group = "org.lance", module = "lance-namespace-core") // This is unnecessary in the core module - // Same rationale as lance-namespace-core: lance-core 4.0.1 declares + // Same rationale as lance-namespace-core: lance-core 6.0.0 declares // lance-namespace-apache-client as a transitive, but core never calls into it. // Leaving it on the main classpath shadows the lance-rest aux service's own // lance-namespace-apache-client (loaded via lance-rest-server/libs/), and diff --git a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java index d3f3d426f37..ec2579f7a7e 100644 --- a/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java +++ b/core/src/main/java/org/apache/gravitino/stats/storage/LancePartitionStatisticStorage.java @@ -204,7 +204,6 @@ public LancePartitionStatisticStorage(Map properties) { Caffeine.newBuilder() .maximumSize(datasetCacheSize) .scheduler(Scheduler.forScheduledExecutorService(this.scheduler)) - .executor(Runnable::run) .removalListener( (RemovalListener) (key, value, cause) -> { @@ -358,6 +357,7 @@ private void dropStatisticsImpl(Long tableId, List drop public void close() throws IOException { if (datasetCache.isPresent()) { Cache cache = datasetCache.get(); + cache.asMap().values().forEach(LancePartitionStatisticStorage::closeDatasetHolder); cache.invalidateAll(); cache.cleanUp(); } @@ -623,6 +623,8 @@ static class DatasetHolder implements Closeable { private final Dataset dataset; + private final AtomicBoolean closed = new AtomicBoolean(false); + DatasetHolder(Dataset dataset) { this.dataset = dataset; } @@ -637,7 +639,9 @@ void checkoutLatest() { @Override public void close() throws IOException { - dataset.close(); + if (closed.compareAndSet(false, true)) { + dataset.close(); + } } } } diff --git a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java index 3a1ab82cf82..5a6ae16e980 100644 --- a/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java +++ b/core/src/test/java/org/apache/gravitino/stats/storage/TestLancePartitionStatisticStorage.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -673,7 +674,7 @@ public void testDatasetCacheClosesPreviousHolderOnReplacement() throws Exception datasetCache.put(1L, previousHolder); datasetCache.put(1L, newHolder); - verify(previousHolder).close(); + verify(previousHolder, timeout(5000)).close(); storage.close(); diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java index e4129b40b77..39fbe93bf4d 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LancePropertiesUtils.java @@ -69,13 +69,21 @@ public static Map resolveLanceStorageOptions( return effectiveStorageOptions; } + /** + * Converts Lance storage options to table properties. + * + *

The input map should use unprefixed Lance storage option keys. The returned map prefixes + * each key with {@code lance.storage.}. A {@code null} input returns an empty map. + * + * @param storageOptions the unprefixed Lance storage options + * @return the table properties with Lance storage option prefixes + */ public static Map toTableProperties(Map storageOptions) { - if (storageOptions == null) { - return Map.of(); - } - - return storageOptions.entrySet().stream() - .collect( - Collectors.toMap(e -> LANCE_STORAGE_OPTIONS_PREFIX + e.getKey(), Map.Entry::getValue)); + return storageOptions == null + ? Map.of() + : storageOptions.entrySet().stream() + .collect( + Collectors.toMap( + entry -> LANCE_STORAGE_OPTIONS_PREFIX + entry.getKey(), Map.Entry::getValue)); } } From ee93f7164bf08fbae030577071a9f4fb521a1746 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 20 May 2026 16:41:43 +0800 Subject: [PATCH 19/21] fix(lance): clean up review comments --- .../catalog/lakehouse/lance/LanceTableOperations.java | 4 ++-- clients/client-python/MANIFEST.in | 1 + .../apache/gravitino/lance/common/utils/LanceConstants.java | 2 +- .../lance/service/rest/TestLanceNamespaceOperations.java | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java index 0813d4a4141..9edec20a23c 100644 --- a/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java +++ b/catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java @@ -283,11 +283,11 @@ Table createTableInternal( } // Check whether it's a metadata-only declare table operation. - boolean createEmpty = + boolean declaredOnly = Optional.ofNullable(properties.get(LanceConstants.LANCE_TABLE_DECLARED)) .map(Boolean::parseBoolean) .orElse(false); - if (createEmpty) { + if (declaredOnly) { // For declare table, we just create the table metadata in Gravitino without creating the // underlying Lance dataset. return super.createTable( diff --git a/clients/client-python/MANIFEST.in b/clients/client-python/MANIFEST.in index 5656cf186f1..f75d13e1844 100644 --- a/clients/client-python/MANIFEST.in +++ b/clients/client-python/MANIFEST.in @@ -18,6 +18,7 @@ include requirements.txt include requirements-dev.txt +include requirements-lance.txt include README.md include LICENSE include NOTICE diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java index 3ad473c0b48..f5faaad1c2e 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/utils/LanceConstants.java @@ -38,7 +38,7 @@ public class LanceConstants { public static final String LANCE_TABLE_REGISTER = "lance.register"; public static final String LANCE_TABLE_VERSION = "lance.version"; - // Mark whether it is to create an empty Lance table(no data files) + // Mark whether the table is declared only in metadata without creating a Lance dataset. public static final String LANCE_TABLE_DECLARED = "lance.declared"; public static final String LANCE_TABLE_FORMAT = "lance"; diff --git a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java index b5f7a845c4e..a7ab97120af 100644 --- a/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/test/java/org/apache/gravitino/lance/service/rest/TestLanceNamespaceOperations.java @@ -112,7 +112,7 @@ protected Application configure() { // auto-register a Jackson provider that calls ObjectMapper.findAndRegisterModules(). // When jackson-module-scala is on the classpath (pulled in by spark-sql), the // auto-discovered provider deserializes JSON objects as Scala Maps instead of - // java.util.Map, breaking extractPropertiesFromBody(). + // java.util.Map, breaking Lance REST request handling that expects Java Maps. resourceConfig.property(CommonProperties.FEATURE_AUTO_DISCOVERY_DISABLE, true); resourceConfig.property(CommonProperties.MOXY_JSON_FEATURE_DISABLE, true); ObjectMapper mapper = new ObjectMapper(); From 9b1dc8bc32053f6b50d562cea3ea79b41bcb5c12 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 26 May 2026 10:17:35 +0800 Subject: [PATCH 20/21] fix --- docs/lance-rest-integration.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/lance-rest-integration.md b/docs/lance-rest-integration.md index 76ee184e0e8..99190cbaeb8 100644 --- a/docs/lance-rest-integration.md +++ b/docs/lance-rest-integration.md @@ -20,19 +20,19 @@ This documentation assumes familiarity with the Lance REST service setup as desc The following table outlines the tested compatibility between Gravitino versions and Lance connector versions: -| Gravitino Version (Lance REST) | Supported lance-spark Versions | Supported lance-ray Versions | -|--------------------------------|--------------------------------|------------------------------| -| 1.1.1 - 1.2.1 | 0.0.10 - 0.0.15 | 0.0.6 - 0.0.8 | -| 1.3.0 | 0.1.0 - 0.4.0 | 0.3.0 - 0.4.2 | +| Gravitino Version (Lance REST) | Supported lance-spark Versions | Supported lance-ray Versions | +|--------------------------------|--------------------------------|-----------------------------------------------| +| 1.1.1 - 1.2.1 | 0.0.10 - 0.0.15 | 0.0.6 - 0.0.8 | +| 1.3.0 | 0.1.0 - 0.4.0 | 0.3.0 - 0.4.2, 0.2.0 supports with conditions | :::note - These version ranges show which versions are expected to work together. - For Gravitino 1.3.0, the explicitly verified release versions are `lance-spark` {0.1.0, 0.1.1, 0.2.0, 0.4.0} and `lance-ray` - {0.3.0, 0.4.2}. lance-ray 0.2.0 and earlier are *not* supported on 1.3.0 + {0.3.0, 0.4.2}. By default, lance-ray 0.2.0 and earlier are *not* supported on 1.3.0 because pip resolves them with an older `lance-namespace` whose request schema is incompatible with the upgraded server-side `lance-namespace-core` - (0.7.5+). + (0.7.5+). But if can still use lance-ray 0.2.0 with Gravitino 1.3.0 by pining pylance to 3.x or 4.x; - Before using in production, please test the exact connector versions in your own environment. - The Lance ecosystem is changing quickly, so some versions may introduce breaking changes. ::: From 6c5cc6d88c299b42e70f41a6f46697cc985e3d54 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 26 May 2026 10:23:33 +0800 Subject: [PATCH 21/21] fix --- docs/lance-rest-integration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/lance-rest-integration.md b/docs/lance-rest-integration.md index 99190cbaeb8..8b87183de7f 100644 --- a/docs/lance-rest-integration.md +++ b/docs/lance-rest-integration.md @@ -32,7 +32,7 @@ The following table outlines the tested compatibility between Gravitino versions {0.3.0, 0.4.2}. By default, lance-ray 0.2.0 and earlier are *not* supported on 1.3.0 because pip resolves them with an older `lance-namespace` whose request schema is incompatible with the upgraded server-side `lance-namespace-core` - (0.7.5+). But if can still use lance-ray 0.2.0 with Gravitino 1.3.0 by pining pylance to 3.x or 4.x; + (0.7.5+). But if you can still use lance-ray 0.2.0 with Gravitino 1.3.0 by pining pylance to 3.x or 4.x; - Before using in production, please test the exact connector versions in your own environment. - The Lance ecosystem is changing quickly, so some versions may introduce breaking changes. :::