Skip to content

Commit e3ccf56

Browse files
committed
[vector] Minimize vector index metadata
1 parent 50e7473 commit e3ccf56

4 files changed

Lines changed: 28 additions & 156 deletions

File tree

paimon-vector/paimon-vector-index/src/main/java/org/apache/paimon/vector/index/VectorGlobalIndexReader.java

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import org.apache.paimon.globalindex.GlobalIndexResult;
2525
import org.apache.paimon.globalindex.ScoredGlobalIndexResult;
2626
import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
27+
import org.apache.paimon.index.ivfpq.Metric;
2728
import org.apache.paimon.index.ivfpq.VectorIndexInput;
29+
import org.apache.paimon.index.ivfpq.VectorIndexMetadata;
2830
import org.apache.paimon.index.ivfpq.VectorIndexReader;
2931
import org.apache.paimon.index.ivfpq.VectorSearchResult;
3032
import org.apache.paimon.predicate.FieldRef;
@@ -59,6 +61,7 @@ public class VectorGlobalIndexReader implements GlobalIndexReader {
5961
private final ExecutorService executor;
6062

6163
private volatile VectorIndexMeta indexMeta;
64+
private volatile VectorIndexMetadata nativeMeta;
6265
private volatile VectorIndexReader vectorReader;
6366
private SeekableInputStream openStream;
6467

@@ -98,7 +101,7 @@ private ScoredGlobalIndexResult search(VectorSearch vectorSearch) throws IOExcep
98101
float[] queryVector = vectorSearch.vector().clone();
99102
int limit = vectorSearch.limit();
100103
int nprobe = indexMeta.nprobe();
101-
String metric = indexMeta.metric();
104+
Metric metric = nativeMeta.metric();
102105

103106
RoaringNavigableMap64 includeRowIds = vectorSearch.includeRowIds();
104107
VectorSearchResult result;
@@ -155,15 +158,17 @@ private ScoredGlobalIndexResult search(VectorSearch vectorSearch) throws IOExcep
155158
});
156159
}
157160

158-
private static float convertDistanceToScore(float distance, String metric) {
159-
if ("l2".equals(metric)) {
160-
return 1.0f / (1.0f + distance);
161-
} else if ("cosine".equals(metric)) {
162-
return 1.0f - distance;
163-
} else if ("inner_product".equals(metric)) {
164-
return distance;
161+
private static float convertDistanceToScore(float distance, Metric metric) {
162+
switch (metric) {
163+
case L2:
164+
return 1.0f / (1.0f + distance);
165+
case COSINE:
166+
return 1.0f - distance;
167+
case INNER_PRODUCT:
168+
return distance;
169+
default:
170+
throw new IllegalArgumentException("Unknown metric: " + metric);
165171
}
166-
throw new IllegalArgumentException("Unknown metric: " + metric);
167172
}
168173

169174
private void validateSearchVector(Object vector) {
@@ -183,11 +188,11 @@ private void validateSearchVector(Object vector) {
183188
+ fieldType);
184189
}
185190
int queryDim = ((float[]) vector).length;
186-
if (queryDim != indexMeta.dimension()) {
191+
if (queryDim != nativeMeta.dimension()) {
187192
throw new IllegalArgumentException(
188193
String.format(
189194
"Query vector dimension mismatch: index expects %d, but got %d",
190-
indexMeta.dimension(), queryDim));
195+
nativeMeta.dimension(), queryDim));
191196
}
192197
}
193198

@@ -200,6 +205,7 @@ private void ensureLoaded() throws IOException {
200205
try {
201206
vectorReader =
202207
new VectorIndexReader(new SeekableStreamVectorIndexInput(in));
208+
nativeMeta = vectorReader.metadata();
203209
openStream = in;
204210
} catch (Exception e) {
205211
IOUtils.closeQuietly(in);

paimon-vector/paimon-vector-index/src/main/java/org/apache/paimon/vector/index/VectorGlobalIndexWriter.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -439,15 +439,6 @@ private Map<String, String> nativeOptions(int effectiveNlist) {
439439

440440
private Map<String, String> metadata() {
441441
Map<String, String> metadata = new LinkedHashMap<>();
442-
metadata.put(VectorIndexMeta.KEY_INDEX_TYPE, identifier);
443-
metadata.put(VectorIndexMeta.KEY_DIMENSION, String.valueOf(dim));
444-
metadata.put(VectorIndexMeta.KEY_METRIC, metric);
445-
metadata.put(VectorIndexMeta.KEY_NLIST, String.valueOf(nlist));
446-
metadata.put(VectorIndexMeta.KEY_M, String.valueOf(pqM));
447-
metadata.put(VectorIndexMeta.KEY_USE_OPQ, String.valueOf(useOpq));
448-
metadata.put(VectorIndexMeta.KEY_HNSW_M, String.valueOf(hnswM));
449-
metadata.put(VectorIndexMeta.KEY_HNSW_EF_CONSTRUCTION, String.valueOf(hnswEfConstruction));
450-
metadata.put(VectorIndexMeta.KEY_HNSW_MAX_LEVEL, String.valueOf(hnswMaxLevel));
451442
metadata.put(VectorIndexMeta.KEY_NPROBE, String.valueOf(nprobe));
452443
metadata.put(VectorIndexMeta.KEY_EF_SEARCH, String.valueOf(efSearch));
453444
return metadata;

paimon-vector/paimon-vector-index/src/main/java/org/apache/paimon/vector/index/VectorIndexMeta.java

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
package org.apache.paimon.vector.index;
2020

21-
import org.apache.paimon.index.ivfpq.IndexType;
22-
2321
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.type.TypeReference;
2422
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
2523

@@ -31,22 +29,13 @@
3129
/**
3230
* Metadata for a vector index file.
3331
*
34-
* <p>Serialized as a flat JSON {@code Map<String, String>} storing the index build parameters
35-
* required for correct search-time behavior.
32+
* <p>Serialized as a flat JSON {@code Map<String, String>} storing Paimon search parameters that
33+
* are not part of the native vector index file metadata.
3634
*/
3735
public class VectorIndexMeta implements Serializable {
3836

3937
private static final long serialVersionUID = 1L;
4038

41-
static final String KEY_INDEX_TYPE = "index_type";
42-
static final String KEY_DIMENSION = "dimension";
43-
static final String KEY_METRIC = "metric";
44-
static final String KEY_NLIST = "nlist";
45-
static final String KEY_M = "m";
46-
static final String KEY_USE_OPQ = "use_opq";
47-
static final String KEY_HNSW_M = "hnsw_m";
48-
static final String KEY_HNSW_EF_CONSTRUCTION = "hnsw_ef_construction";
49-
static final String KEY_HNSW_MAX_LEVEL = "hnsw_max_level";
5039
static final String KEY_NPROBE = "nprobe";
5140
static final String KEY_EF_SEARCH = "ef_search";
5241

@@ -61,47 +50,6 @@ public class VectorIndexMeta implements Serializable {
6150
this.params = new LinkedHashMap<>(params);
6251
}
6352

64-
public IndexType indexType() {
65-
String value = params.get(KEY_INDEX_TYPE);
66-
if (value == null) {
67-
throw new IllegalArgumentException(
68-
"Missing required key in vector index metadata: " + KEY_INDEX_TYPE);
69-
}
70-
return parseIndexType(value);
71-
}
72-
73-
public int dimension() {
74-
return Integer.parseInt(params.get(KEY_DIMENSION));
75-
}
76-
77-
public String metric() {
78-
return params.get(KEY_METRIC);
79-
}
80-
81-
public int nlist() {
82-
return Integer.parseInt(params.get(KEY_NLIST));
83-
}
84-
85-
public int m() {
86-
return intValue(KEY_M, 0);
87-
}
88-
89-
public boolean useOpq() {
90-
return Boolean.parseBoolean(params.get(KEY_USE_OPQ));
91-
}
92-
93-
public int hnswM() {
94-
return intValue(KEY_HNSW_M, 20);
95-
}
96-
97-
public int hnswEfConstruction() {
98-
return intValue(KEY_HNSW_EF_CONSTRUCTION, 150);
99-
}
100-
101-
public int hnswMaxLevel() {
102-
return intValue(KEY_HNSW_MAX_LEVEL, 7);
103-
}
104-
10553
public int nprobe() {
10654
return intValue(KEY_NPROBE, 16);
10755
}
@@ -116,35 +64,11 @@ public byte[] serialize() throws IOException {
11664

11765
public static VectorIndexMeta deserialize(byte[] data) throws IOException {
11866
Map<String, String> map = OBJECT_MAPPER.readValue(data, MAP_TYPE_REF);
119-
if (!map.containsKey(KEY_DIMENSION)) {
120-
throw new IOException(
121-
"Missing required key in vector index metadata: " + KEY_DIMENSION);
122-
}
123-
if (!map.containsKey(KEY_INDEX_TYPE)) {
124-
throw new IOException(
125-
"Missing required key in vector index metadata: " + KEY_INDEX_TYPE);
126-
}
127-
if (!map.containsKey(KEY_METRIC)) {
128-
throw new IOException("Missing required key in vector index metadata: " + KEY_METRIC);
129-
}
13067
return new VectorIndexMeta(map);
13168
}
13269

13370
private int intValue(String key, int defaultValue) {
13471
String val = params.get(key);
13572
return val == null ? defaultValue : Integer.parseInt(val);
13673
}
137-
138-
private static IndexType parseIndexType(String value) {
139-
if (IvfPqAlgorithmVectorGlobalIndexerFactory.IDENTIFIER.equals(value)) {
140-
return IndexType.IVF_PQ;
141-
} else if (IvfFlatVectorGlobalIndexerFactory.IDENTIFIER.equals(value)) {
142-
return IndexType.IVF_FLAT;
143-
} else if (IvfHnswFlatVectorGlobalIndexerFactory.IDENTIFIER.equals(value)) {
144-
return IndexType.IVF_HNSW_FLAT;
145-
} else if (IvfHnswSqVectorGlobalIndexerFactory.IDENTIFIER.equals(value)) {
146-
return IndexType.IVF_HNSW_SQ;
147-
}
148-
throw new IllegalArgumentException("Unknown vector index type: " + value);
149-
}
15074
}

paimon-vector/paimon-vector-index/src/test/java/org/apache/paimon/vector/index/VectorGlobalIndexTest.java

Lines changed: 9 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ public class VectorGlobalIndexTest {
6464

6565
private static final String IVF_PQ_IDENTIFIER =
6666
IvfPqAlgorithmVectorGlobalIndexerFactory.IDENTIFIER;
67-
private static final String IVF_HNSW_FLAT_IDENTIFIER =
68-
IvfHnswFlatVectorGlobalIndexerFactory.IDENTIFIER;
69-
7067
private FileIO fileIO;
7168
private Path indexPath;
7269
private DataType vectorType;
@@ -173,48 +170,24 @@ public void testAllNullReturnsEmpty() {
173170
@Test
174171
public void testMetaSerializationRoundTrip() throws IOException {
175172
Options options = new Options();
176-
options.setInteger("vector.index.dimension", 32);
177-
options.setString("vector.distance.metric", "cosine");
178-
options.setInteger("vector.nlist", 64);
179-
options.setInteger("vector.pq.m", 8);
180-
options.setString("vector.pq.use-opq", "true");
181173
options.setInteger("vector.nprobe", 24);
174+
options.setInteger("vector.hnsw.ef-search", 80);
182175

183-
VectorIndexMeta meta = new VectorIndexMeta(metaOptions(IVF_PQ_IDENTIFIER, options));
176+
VectorIndexMeta meta = new VectorIndexMeta(metaOptions(options));
184177
byte[] serialized = meta.serialize();
185178
VectorIndexMeta deserialized = VectorIndexMeta.deserialize(serialized);
186179

187-
assertThat(deserialized.dimension()).isEqualTo(32);
188-
assertThat(deserialized.indexType()).isEqualTo(IndexType.IVF_PQ);
189-
assertThat(deserialized.metric()).isEqualTo("cosine");
190-
assertThat(deserialized.nlist()).isEqualTo(64);
191-
assertThat(deserialized.m()).isEqualTo(8);
192-
assertThat(deserialized.useOpq()).isTrue();
193180
assertThat(deserialized.nprobe()).isEqualTo(24);
181+
assertThat(deserialized.efSearch()).isEqualTo(80);
194182
}
195183

196184
@Test
197-
public void testMetaSerializationRoundTripForHnsw() throws IOException {
198-
Options options = new Options();
199-
options.setInteger("vector.index.dimension", 16);
200-
options.setString("vector.distance.metric", "l2");
201-
options.setInteger("vector.nlist", 8);
202-
options.setInteger("vector.hnsw.m", 12);
203-
options.setInteger("vector.hnsw.ef-construction", 64);
204-
options.setInteger("vector.hnsw.max-level", 5);
205-
options.setInteger("vector.hnsw.ef-search", 80);
206-
185+
public void testMetaSerializationDefaults() throws IOException {
207186
VectorIndexMeta deserialized =
208-
VectorIndexMeta.deserialize(
209-
new VectorIndexMeta(metaOptions(IVF_HNSW_FLAT_IDENTIFIER, options))
210-
.serialize());
211-
212-
assertThat(deserialized.indexType()).isEqualTo(IndexType.IVF_HNSW_FLAT);
213-
assertThat(deserialized.dimension()).isEqualTo(16);
214-
assertThat(deserialized.hnswM()).isEqualTo(12);
215-
assertThat(deserialized.hnswEfConstruction()).isEqualTo(64);
216-
assertThat(deserialized.hnswMaxLevel()).isEqualTo(5);
217-
assertThat(deserialized.efSearch()).isEqualTo(80);
187+
VectorIndexMeta.deserialize(new VectorIndexMeta(new LinkedHashMap<>()).serialize());
188+
189+
assertThat(deserialized.nprobe()).isEqualTo(16);
190+
assertThat(deserialized.efSearch()).isEqualTo(0);
218191
}
219192

220193
// =================== Tests that NEED native library =====================
@@ -394,30 +367,8 @@ private Options createDefaultOptions(int dimension) {
394367
return options;
395368
}
396369

397-
private Map<String, String> metaOptions(String indexType, Options options) {
370+
private Map<String, String> metaOptions(Options options) {
398371
Map<String, String> meta = new LinkedHashMap<>();
399-
meta.put(VectorIndexMeta.KEY_INDEX_TYPE, indexType);
400-
meta.put(
401-
VectorIndexMeta.KEY_DIMENSION,
402-
String.valueOf(options.getInteger("vector.index.dimension", 128)));
403-
meta.put(
404-
VectorIndexMeta.KEY_METRIC,
405-
options.getString("vector.distance.metric", "inner_product"));
406-
meta.put(
407-
VectorIndexMeta.KEY_NLIST, String.valueOf(options.getInteger("vector.nlist", 256)));
408-
meta.put(VectorIndexMeta.KEY_M, String.valueOf(options.getInteger("vector.pq.m", 16)));
409-
meta.put(
410-
VectorIndexMeta.KEY_USE_OPQ,
411-
String.valueOf(options.getBoolean("vector.pq.use-opq", false)));
412-
meta.put(
413-
VectorIndexMeta.KEY_HNSW_M,
414-
String.valueOf(options.getInteger("vector.hnsw.m", 20)));
415-
meta.put(
416-
VectorIndexMeta.KEY_HNSW_EF_CONSTRUCTION,
417-
String.valueOf(options.getInteger("vector.hnsw.ef-construction", 150)));
418-
meta.put(
419-
VectorIndexMeta.KEY_HNSW_MAX_LEVEL,
420-
String.valueOf(options.getInteger("vector.hnsw.max-level", 7)));
421372
meta.put(
422373
VectorIndexMeta.KEY_NPROBE,
423374
String.valueOf(options.getInteger("vector.nprobe", 16)));

0 commit comments

Comments
 (0)