Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/content/append-table/global-index.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,13 @@ Generation) applications.
CALL sys.create_global_index(
table => 'db.my_table',
index_column => 'embedding',
index_type => 'lumina-vector-ann',
index_type => 'lumina',
options => 'lumina.index.dimension=128'
);
```

The legacy index type `lumina-vector-ann` is still accepted for existing tables and SQL compatibility.

**Vector Search**

{{< tabs "vector-search" >}}
Expand Down
4 changes: 3 additions & 1 deletion docs/content/learn-paimon/scenario-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -451,14 +451,16 @@ Schema schema = Schema.newBuilder()
CALL sys.create_global_index(
table => 'db.doc_embeddings',
index_column => 'embedding',
index_type => 'lumina-vector-ann',
index_type => 'lumina',
options => 'lumina.index.dimension=768'
);

-- Search for top-5 nearest neighbors
SELECT * FROM vector_search('doc_embeddings', 'embedding', array(0.1f, 0.2f, ...), 5);
```

The legacy index type `lumina-vector-ann` is still accepted for existing tables and SQL compatibility.

**Why:** The [Global Index]({{< ref "append-table/global-index" >}}) with DiskANN provides high-performance ANN search.
Vector data is stored in dedicated `.vector.lance` files optimized for dense vectors, while scalar columns stay in
Parquet. You can also build a **BTree Index** on scalar columns for efficient filtering:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ public void testGlobalIndexFiles() {
+ "/index/bitmap-global-index-a1b2c3d4-e5f6.index")))
.isEqualTo(FileType.GLOBAL_INDEX);
// lumina vector global index
assertThat(
FileType.classify(
new Path(TABLE_ROOT + "/index/lumina-global-index-a1b2c3d4.index")))
.isEqualTo(FileType.GLOBAL_INDEX);
// legacy lumina vector global index
assertThat(
FileType.classify(
new Path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,15 @@ void setUp() {
}

@ParameterizedTest
@ValueSource(strings = {"HASH", "DELETION_VECTORS", "btree", "bitmap", "lumina-vector-ann"})
@ValueSource(
strings = {
"HASH",
"DELETION_VECTORS",
"btree",
"bitmap",
"lumina",
"lumina-vector-ann"
})
void testExistsAndDeleteIndexFile(String indexType) throws IOException {
String fileName = "index-" + UUID.randomUUID();
IndexFileMeta meta =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
/** Test case for Lumina vector global index via Flink procedure. */
public class LuminaVectorGlobalIndexITCase extends CatalogITCaseBase {

private static final String INDEX_TYPE = "lumina-vector-ann";
private static final String INDEX_TYPE = "lumina";
private static final String LEGACY_INDEX_TYPE = "lumina-vector-ann";

@BeforeAll
static void checkLuminaAvailable() {
Expand Down Expand Up @@ -88,6 +89,47 @@ public void testLuminaVectorIndex() throws Catalog.TableNotExistException {
assertThat(totalRowCount).isEqualTo(100L);
}

@Test
public void testLuminaVectorIndexLegacyIdentifier() throws Catalog.TableNotExistException {
// Verifies that the legacy `lumina-vector-ann` identifier still resolves through SPI to
// LegacyLuminaVectorGlobalIndexerFactory and produces a working global index. Files are
// tagged with the legacy indexType in the manifest, so reads of pre-rename tables continue
// to dispatch to the same writer/reader as the new `lumina` identifier.
sql(
"CREATE TABLE T_LEGACY (id INT, v ARRAY<FLOAT>) WITH ("
+ "'bucket' = '-1', "
+ "'row-tracking.enabled' = 'true', "
+ "'data-evolution.enabled' = 'true', "
+ "'lumina.index.dimension' = '3', "
+ "'lumina.distance.metric' = 'l2'"
+ ")");

sql("INSERT INTO T_LEGACY VALUES " + vectorValues(0, 10, 3));

sql(
"CALL sys.create_global_index("
+ "`table` => 'default.T_LEGACY', "
+ "index_column => 'v', "
+ "index_type => '"
+ LEGACY_INDEX_TYPE
+ "')");

FileStoreTable table = paimonTable("T_LEGACY");
List<IndexFileMeta> vectorEntries =
table.store().newIndexFileHandler().scanEntries().stream()
.map(IndexManifestEntry::indexFile)
.filter(f -> LEGACY_INDEX_TYPE.equals(f.indexType()))
.collect(Collectors.toList());

assertThat(vectorEntries).isNotEmpty();
long totalRowCount = vectorEntries.stream().mapToLong(IndexFileMeta::rowCount).sum();
assertThat(totalRowCount).isEqualTo(10L);
for (IndexFileMeta meta : vectorEntries) {
assertThat(meta.indexType()).isEqualTo(LEGACY_INDEX_TYPE);
assertThat(meta.globalIndexMeta()).isNotNull();
}
}

@Test
public void testMultiBatchInsert() throws Catalog.TableNotExistException {
sql(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.lumina.index;

/**
* Factory for the legacy Lumina vector index identifier {@code lumina-vector-ann}.
*
* <p>Retained so that tables created before the rename to {@code lumina} continue to load. New
* tables should use {@link LuminaVectorGlobalIndexerFactory} via the {@code lumina} identifier.
*
* @deprecated Use {@link LuminaVectorGlobalIndexerFactory} ({@code lumina}) for new tables. This
* factory only exists to keep the legacy identifier resolvable through SPI.
*/
@Deprecated
public class LegacyLuminaVectorGlobalIndexerFactory extends LuminaVectorGlobalIndexerFactory {

public static final String IDENTIFIER = "lumina-vector-ann";

@Override
public String identifier() {
return IDENTIFIER;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/** Factory for creating Lumina vector index. */
public class LuminaVectorGlobalIndexerFactory implements GlobalIndexerFactory {

public static final String IDENTIFIER = "lumina-vector-ann";
public static final String IDENTIFIER = "lumina";

@Override
public String identifier() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
# limitations under the License.

org.apache.paimon.lumina.index.LuminaVectorGlobalIndexerFactory
org.apache.paimon.lumina.index.LegacyLuminaVectorGlobalIndexerFactory
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.lumina.index;

import org.apache.paimon.globalindex.GlobalIndexerFactoryUtils;

import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

/** Tests for Lumina global indexer factory identifiers. */
public class LuminaVectorGlobalIndexerFactoryTest {

@Test
public void testIdentifiers() {
assertThat(new LuminaVectorGlobalIndexerFactory().identifier()).isEqualTo("lumina");
assertThat(new LegacyLuminaVectorGlobalIndexerFactory().identifier())
.isEqualTo("lumina-vector-ann");
}

@Test
public void testLoadNewAndLegacyIdentifiers() {
assertThat(GlobalIndexerFactoryUtils.load("lumina"))
.isExactlyInstanceOf(LuminaVectorGlobalIndexerFactory.class);
assertThat(GlobalIndexerFactoryUtils.load("lumina-vector-ann"))
.isExactlyInstanceOf(LegacyLuminaVectorGlobalIndexerFactory.class);
}
}
8 changes: 6 additions & 2 deletions paimon-python/pypaimon/globalindex/lumina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@
################################################################################

from pypaimon.globalindex.lumina.lumina_vector_global_index_reader import (
LuminaVectorGlobalIndexReader,
LUMINA_IDENTIFIER,
LUMINA_IDENTIFIERS,
LUMINA_VECTOR_ANN_IDENTIFIER,
LuminaVectorGlobalIndexReader,
)

__all__ = [
'LuminaVectorGlobalIndexReader',
'LUMINA_IDENTIFIER',
'LUMINA_IDENTIFIERS',
'LUMINA_VECTOR_ANN_IDENTIFIER',
'LuminaVectorGlobalIndexReader',
]
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
from pypaimon.globalindex.global_index_reader import GlobalIndexReader
from pypaimon.globalindex.vector_search_result import DictBasedScoredIndexResult

LUMINA_IDENTIFIER = "lumina"
LUMINA_VECTOR_ANN_IDENTIFIER = "lumina-vector-ann"
LUMINA_IDENTIFIERS = (LUMINA_IDENTIFIER, LUMINA_VECTOR_ANN_IDENTIFIER)

MIN_SEARCH_LIST_SIZE = 16

Expand Down
4 changes: 2 additions & 2 deletions paimon-python/pypaimon/table/source/vector_search_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,10 @@ def _eval(self, row_range_start, row_range_end, vector_index_files,
def _create_vector_reader(index_type, file_io, index_path, index_io_meta_list, options=None):
"""Create a global index reader for vector search."""
from pypaimon.globalindex.lumina.lumina_vector_global_index_reader import (
LUMINA_VECTOR_ANN_IDENTIFIER,
LUMINA_IDENTIFIERS,
LuminaVectorGlobalIndexReader,
)
if index_type == LUMINA_VECTOR_ANN_IDENTIFIER:
if index_type in LUMINA_IDENTIFIERS:
return LuminaVectorGlobalIndexReader(
file_io, index_path, index_io_meta_list, options
)
Expand Down
26 changes: 23 additions & 3 deletions paimon-python/pypaimon/tests/vector_search_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from pypaimon.common.predicate import Predicate
from pypaimon.common.predicate_builder import PredicateBuilder
from pypaimon.globalindex.global_index_meta import GlobalIndexMeta
from pypaimon.globalindex.global_index_meta import GlobalIndexIOMeta, GlobalIndexMeta
from pypaimon.globalindex.global_index_result import GlobalIndexResult
from pypaimon.globalindex.vector_search_result import ScoredGlobalIndexResult
from pypaimon.index.index_file_meta import IndexFileMeta
Expand Down Expand Up @@ -121,6 +121,26 @@ def _scan(snapshot, entry_filter=None):
# ----------------------------- tests ---------------------------------------


class VectorReaderFactoryTest(unittest.TestCase):
"""Vector reader factory compatibility."""

def test_lumina_reader_accepts_new_and_legacy_identifiers(self):
from pypaimon.globalindex.lumina.lumina_vector_global_index_reader import (
LUMINA_IDENTIFIERS,
LuminaVectorGlobalIndexReader,
)
from pypaimon.table.source.vector_search_read import _create_vector_reader

io_meta = GlobalIndexIOMeta(file_name="vec.index", file_size=1)
for index_type in LUMINA_IDENTIFIERS:
reader = _create_vector_reader(
index_type, object(), "/tmp/unused", [io_meta], {})
try:
self.assertIsInstance(reader, LuminaVectorGlobalIndexReader)
finally:
reader.close()


class VectorSearchFilterTest(unittest.TestCase):
"""Non-partitioned wiring: scan + read + external_path plumbing."""

Expand Down Expand Up @@ -472,11 +492,11 @@ def setUp(self):
partition_pt2 = GenericRow([2], [self.pt_field])
self.entries = [
_entry(partition_pt1, field_id=2,
index_type="lumina-vector-ann",
index_type="lumina",
file_name="vec-pt1.index",
row_range_start=0, row_range_end=4),
_entry(partition_pt2, field_id=2,
index_type="lumina-vector-ann",
index_type="lumina",
file_name="vec-pt2.index",
row_range_start=5, row_range_end=9),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ import scala.collection.JavaConverters._
/** Tests for Lumina vector index read/write operations. */
class LuminaVectorIndexTest extends PaimonSparkTestBase {

private val indexType = "lumina-vector-ann"
private val indexType = "lumina"
private val legacyIndexType = "lumina-vector-ann"
private val defaultOptions = "lumina.index.dimension=3"

// ========== Index Creation Tests ==========
Expand Down Expand Up @@ -68,6 +69,53 @@ class LuminaVectorIndexTest extends PaimonSparkTestBase {
}
}

test("create lumina vector index - legacy index type") {
withTable("T") {
spark.sql("""
|CREATE TABLE T (id INT, v ARRAY<FLOAT>)
|TBLPROPERTIES (
| 'bucket' = '-1',
| 'global-index.row-count-per-shard' = '10000',
| 'row-tracking.enabled' = 'true',
| 'data-evolution.enabled' = 'true')
|""".stripMargin)

val values = (0 until 10)
.map(
i => s"($i, array(cast($i as float), cast(${i + 1} as float), cast(${i + 2} as float)))")
.mkString(",")
spark.sql(s"INSERT INTO T VALUES $values")

val output = spark
.sql(
s"CALL sys.create_global_index(table => 'test.T', index_column => 'v', index_type => '$legacyIndexType', options => '$defaultOptions')")
.collect()
.head
assert(output.getBoolean(0))

val table = loadTable("T")
val indexEntries = table
.store()
.newIndexFileHandler()
.scanEntries()
.asScala
.filter(_.indexFile().indexType() == legacyIndexType)

assert(indexEntries.nonEmpty)
val totalRowCount = indexEntries.map(_.indexFile().rowCount()).sum
assert(totalRowCount == 10L)

// End-to-end read: vector_search must resolve the legacy identifier through
// LegacyLuminaVectorGlobalIndexerFactory and return results.
val result = spark
.sql("""
|SELECT * FROM vector_search('T', 'v', array(50.0f, 51.0f, 52.0f), 5)
|""".stripMargin)
.collect()
assert(result.length == 5)
}
}

test("table_indexes system table - global index metadata") {
withTable("T") {
spark.sql("""
Expand Down Expand Up @@ -96,13 +144,13 @@ class LuminaVectorIndexTest extends PaimonSparkTestBase {
|SELECT index_type, row_count, row_range_start, row_range_end,
| index_field_id, index_field_name
|FROM `T$table_indexes`
|WHERE index_type = 'lumina-vector-ann'
|WHERE index_type = 'lumina'
|""".stripMargin)
.collect()

assert(indexRows.nonEmpty)
val row = indexRows.head
assert(row.getAs[String]("index_type") == "lumina-vector-ann")
assert(row.getAs[String]("index_type") == "lumina")
assert(row.getAs[Long]("row_count") == 100L)
assert(row.getAs[Long]("row_range_start") == 0L)
assert(row.getAs[Long]("row_range_end") == 99L)
Expand Down
Loading