lance-format
diff --git a/‎docs/src/operations/ddl/create-index.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/src/operations/ddl/create-index.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceFragmentScanner.java‎
Lines changed: 1 addition & 0 deletions b/‎lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceFragmentScanner.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceCountStarPartitionReader.java‎
Lines changed: 1 addition & 0 deletions b/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceCountStarPartitionReader.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceInputPartition.java‎
Lines changed: 7 additions & 0 deletions b/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceInputPartition.java‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScan.java‎
Lines changed: 4 additions & 0 deletions b/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScan.java‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java‎
Lines changed: 67 additions & 2 deletions b/‎lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java‎
Lines changed: 67 additions & 2 deletions
@@ -7,7 +7,7 @@ Creates a scalar index on a Lance table to accelerate queries.
 
 ## Overview
 
-The `CREATE INDEX` command builds an index on one or more columns of a Lance table. Indexing can improve the performance of queries that filter on the indexed columns. Depending on the index method, Lance Spark either uses a fragment-parallel build path or delegates to Lance's built-in single-phase index creation path.
+The `CREATE INDEX` command builds an index on one or more columns of a Lance table. Indexing can improve the performance of queries that filter on the indexed columns. Depending on the index method, Lance Spark either uses a fragment-parallel build path or a driver-coordinated commit flow after parallel executor builds.
 
 ## Basic Usage
 
@@ -151,12 +151,12 @@ Consider creating an index when:
 
 The `CREATE INDEX` command operates as follows:
 
-1.  **Index Build Execution**: Lance Spark chooses an execution path based on the index method. Methods such as `btree` can use fragment-parallel execution, while `zonemap` is built through Lance's single-phase create-index API.
-2.  **Metadata Finalization**: Lance records the new index metadata as part of the index creation flow.
+1.  **Index Build Execution**: Lance Spark chooses an execution path based on the index method. Methods such as `btree`, `fts`, and `zonemap` can build physical index segments in parallel across fragments. Range-mode `btree` uses Spark repartitioning and sorted preprocessed data.
+2.  **Metadata Finalization**: Lance Spark merges or commits the resulting index metadata on the driver so the new logical index becomes visible atomically.
 3.  **Transactional Commit**: A new table version is committed with the new index information. The operation is atomic and ensures that concurrent reads are not affected.
 
 ## Notes and Limitations
 
 - **Index Methods**: The `zonemap`, `btree`, and `fts` methods are supported for scalar index creation.
 - **Zonemap Column Count**: Zonemap indexes currently support a single column only. The generic `CREATE INDEX` grammar accepts a column list, but Lance rejects multi-column zonemap creation.
-- **Index Replacement**: If you create an index with the same name as an existing one, the old index will be replaced by the new one. This is because the underlying implementation uses `replace(true)`.
+- **Index Replacement**: If you create an index with the same name as an existing one, the old index will be replaced by the new one.
@@ -87,6 +87,7 @@ public static LanceFragmentScanner create(int fragmentId, LanceInputPartition in
       if (inputPartition.getWhereCondition().isPresent()) {
         scanOptions.filter(inputPartition.getWhereCondition().get());
       }
+      scanOptions.useScalarIndex(inputPartition.isUseScalarIndex());
       scanOptions.batchSize(readOptions.getBatchSize());
       if (readOptions.getNearest() != null) {
         scanOptions.nearest(readOptions.getNearest());
 
@@ -77,6 +77,7 @@ private long computeCount() {
       if (inputPartition.getWhereCondition().isPresent()) {
         scanOptionsBuilder.filter(inputPartition.getWhereCondition().get());
       }
+      scanOptionsBuilder.useScalarIndex(inputPartition.isUseScalarIndex());
       scanOptionsBuilder.withRowId(true);
       scanOptionsBuilder.columns(Lists.newArrayList());
       scanOptionsBuilder.fragmentIds(fragmentIds);
 
@@ -38,6 +38,7 @@ public class LanceInputPartition implements HasPartitionKey {
   private final Optional<List<ColumnOrdering>> topNSortOrders;
   private final Optional<Aggregation> pushedAggregation;
   private final String scanId;
+  private final boolean useScalarIndex;
 
   /**
    * Initial storage options fetched from namespace.describeTable() on the driver. These are passed
@@ -69,6 +70,7 @@ public LanceInputPartition(
       Optional<List<ColumnOrdering>> topNSortOrders,
       Optional<Aggregation> pushedAggregation,
       String scanId,
+      boolean useScalarIndex,
       Map<String, String> initialStorageOptions,
       String namespaceImpl,
       Map<String, String> namespaceProperties,
@@ -83,6 +85,7 @@ public LanceInputPartition(
     this.topNSortOrders = topNSortOrders;
     this.pushedAggregation = pushedAggregation;
     this.scanId = scanId;
+    this.useScalarIndex = useScalarIndex;
     this.initialStorageOptions = initialStorageOptions;
     this.namespaceImpl = namespaceImpl;
     this.namespaceProperties = namespaceProperties;
@@ -129,6 +132,10 @@ public String getScanId() {
     return scanId;
   }
 
+  public boolean isUseScalarIndex() {
+    return useScalarIndex;
+  }
+
   public Map<String, String> getInitialStorageOptions() {
     return initialStorageOptions;
   }
 
@@ -107,6 +107,7 @@ public class LanceScan
   private final String namespaceImpl;
 
   private final java.util.Map<String, String> namespaceProperties;
+  private final boolean useScalarIndex;
 
   public LanceScan(
       StructType schema,
@@ -121,6 +122,7 @@ public LanceScan(
       java.util.Map<String, List<ZoneStats>> zonemapStats,
       Set<Integer> survivingFragmentIds,
       ZonemapFragmentPruner.PartitionInfo partitionInfo,
+      boolean useScalarIndex,
       java.util.Map<String, String> initialStorageOptions,
       String namespaceImpl,
       java.util.Map<String, String> namespaceProperties) {
@@ -137,6 +139,7 @@ public LanceScan(
     this.zonemapStats = zonemapStats != null ? zonemapStats : Collections.emptyMap();
     this.cachedSurvivingFragmentIds = survivingFragmentIds;
     this.partitionInfo = partitionInfo;
+    this.useScalarIndex = useScalarIndex;
     this.initialStorageOptions = initialStorageOptions;
     this.namespaceImpl = namespaceImpl;
     this.namespaceProperties = namespaceProperties;
@@ -191,6 +194,7 @@ public InputPartition[] planInputPartitions() {
                       topNSortOrders,
                       pushedAggregation,
                       scanId,
+                      useScalarIndex,
                       initialStorageOptions,
                       namespaceImpl,
                       namespaceProperties,
 
@@ -48,6 +48,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -70,6 +71,7 @@ public class LanceScanBuilder
   private StructType schema;
 
   private Filter[] pushedFilters = new Filter[0];
+  private boolean forcePostScanFiltering = false;
   private Optional<Integer> limit = Optional.empty();
   private Optional<Integer> offset = Optional.empty();
   private Optional<List<ColumnOrdering>> topNSortOrders = Optional.empty();
@@ -214,7 +216,11 @@ public Scan build() {
     // Close the lazily opened dataset - it's no longer needed after build
     closeLazyDataset();
 
-    Optional<String> whereCondition = FilterPushDown.compileFiltersToSqlWhereClause(pushedFilters);
+    Optional<String> whereCondition =
+        forcePostScanFiltering
+            ? Optional.empty()
+            : FilterPushDown.compileFiltersToSqlWhereClause(pushedFilters);
+    boolean useScalarIndex = zonemapStats.isEmpty();
     return new LanceScan(
         schema,
         readOptions,
@@ -228,6 +234,7 @@ public Scan build() {
         zonemapStats,
         survivingFragmentIds,
         partitionInfo,
+        useScalarIndex,
         initialStorageOptions,
         namespaceImpl,
         namespaceProperties);
@@ -245,7 +252,14 @@ public Filter[] pushFilters(Filter[] filters) {
     }
     Filter[][] processFilters = FilterPushDown.processFilters(filters);
     pushedFilters = processFilters[0];
-    return processFilters[1];
+    forcePostScanFiltering = shouldForcePostScanFiltering(pushedFilters);
+    if (!forcePostScanFiltering) {
+      return processFilters[1];
+    }
+    LOG.info(
+        "Using Spark post-scan filtering for segmented zonemap query on dataset {}",
+        readOptions.getDatasetUri());
+    return concatFilters(processFilters[0], processFilters[1]);
   }
 
   @Override
@@ -305,6 +319,9 @@ public boolean pushTopN(SortOrder[] orders, int limit) {
 
   @Override
   public boolean pushAggregation(Aggregation aggregation) {
+    if (forcePostScanFiltering && pushedFilters.length > 0) {
+      return false;
+    }
     AggregateFunc[] funcs = aggregation.aggregateExpressions();
     if (aggregation.groupByExpressions().length > 0) {
       return false;
@@ -408,4 +425,52 @@ private static Set<String> extractReferencedColumns(Filter[] filters) {
     }
     return columns;
   }
+
+  /**
+   * Segmented zonemap indexes are currently used safely for fragment pruning, but scan-time filter
+   * pushdown still needs a Spark-side fallback until Lance-core query execution fully handles that
+   * layout.
+   */
+  private boolean shouldForcePostScanFiltering(Filter[] acceptedFilters) {
+    if (acceptedFilters.length == 0) {
+      return false;
+    }
+
+    Set<String> referencedColumns = extractReferencedColumns(acceptedFilters);
+    if (referencedColumns.isEmpty()) {
+      return false;
+    }
+
+    Dataset dataset = getOrOpenDataset();
+    Map<Integer, String> fieldIdToName = new HashMap<>();
+    for (LanceField field : dataset.getLanceSchema().fields()) {
+      fieldIdToName.put(field.getId(), field.getName());
+    }
+
+    Map<String, Integer> segmentedZonemapCounts = new HashMap<>();
+    for (Index idx : dataset.getIndexes()) {
+      if (idx.indexType() != IndexType.ZONEMAP || idx.fields().size() != 1) {
+        continue;
+      }
+      if (!idx.fragments().isPresent() || idx.fragments().get().size() != 1) {
+        continue;
+      }
+
+      String columnName = fieldIdToName.get(idx.fields().get(0));
+      if (columnName == null || !referencedColumns.contains(columnName)) {
+        continue;
+      }
+
+      String key = idx.name() + ":" + columnName;
+      segmentedZonemapCounts.merge(key, 1, Integer::sum);
+    }
+
+    return segmentedZonemapCounts.values().stream().anyMatch(count -> count > 1);
+  }
+
+  private static Filter[] concatFilters(Filter[] first, Filter[] second) {
+    Filter[] combined = Arrays.copyOf(first, first.length + second.length);
+    System.arraycopy(second, 0, combined, first.length, second.length);
+    return combined;
+  }
 }
Original file line number	Diff line number	Diff line change
`@@ -87,6 +87,7 @@ public static LanceFragmentScanner create(int fragmentId, LanceInputPartition in`
`87`	`87`	`if (inputPartition.getWhereCondition().isPresent()) {`
`88`	`88`	`scanOptions.filter(inputPartition.getWhereCondition().get());`
`89`	`89`	`}`
	`90`	`+ scanOptions.useScalarIndex(inputPartition.isUseScalarIndex());`
`90`	`91`	`scanOptions.batchSize(readOptions.getBatchSize());`
`91`	`92`	`if (readOptions.getNearest() != null) {`
`92`	`93`	`scanOptions.nearest(readOptions.getNearest());`
Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,7 @@ private long computeCount() {`
`77`	`77`	`if (inputPartition.getWhereCondition().isPresent()) {`
`78`	`78`	`scanOptionsBuilder.filter(inputPartition.getWhereCondition().get());`
`79`	`79`	`}`
	`80`	`+ scanOptionsBuilder.useScalarIndex(inputPartition.isUseScalarIndex());`
`80`	`81`	`scanOptionsBuilder.withRowId(true);`
`81`	`82`	`scanOptionsBuilder.columns(Lists.newArrayList());`
`82`	`83`	`scanOptionsBuilder.fragmentIds(fragmentIds);`