Skip to content

Commit fb80efd

Browse files
committed
Fix PQ file path and migrate legacy PQ files
1 parent 5d3a985 commit fb80efd

2 files changed

Lines changed: 42 additions & 2 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# ArcadeDB MSMARCO MSMARCO-100K (1000 queries, Recall@50)
2+
3+
| quantization | store_vectors_in_graph | add_hierarchy | max_connections | beam_width | overquery_factor | batch_size | load_corpus_s | load_corpus_rss_mb | ingest_s | ingest_rss_mb | create_index_s | create_index_rss_mb | build_graph_now_s | build_graph_now_rss_mb | warmup_s | warmup_rss_mb | search_s | search_rss_mb | recall@50_before_close | close_db_s | close_db_rss_mb | open_db_s | open_db_rss_mb | warmup_after_reopen_s | warmup_after_reopen_rss_mb | search_after_reopen_s | search_after_reopen_rss_mb | recall@50_after_reopen | peak_rss_mb | db_size_mb | total_duration |
4+
|:---------------|:-------------------------|:----------------|------------------:|-------------:|-------------------:|-------------:|----------------:|---------------------:|-----------:|----------------:|-----------------:|----------------------:|--------------------:|-------------------------:|-----------:|----------------:|-----------:|----------------:|-------------------------:|-------------:|------------------:|------------:|-----------------:|------------------------:|-----------------------------:|------------------------:|-----------------------------:|-------------------------:|--------------:|-------------:|:-----------------|
5+
| PRODUCT | False | True | 12 | 64 | 1 | 10000 | 0 | 0 | 6.96 | 1369.83 | 1.187 | 281.465 | 44.925 | 1425.59 | 0.048 | 1.812 | 1.394 | 36.148 | 0.8956 | 0.018 | -0.113 | 0.205 | 16.23 | 0.533 | 0.074 | 1.273 | 309.82 | 0.8956 | 3659.88 | 602.014 | 57.216s |

engine/src/main/java/com/arcadedb/index/vector/LSMVectorIndex.java

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
import java.io.File;
5151
import java.io.IOException;
5252
import java.nio.ByteBuffer;
53+
import java.nio.file.Files;
54+
import java.nio.file.StandardCopyOption;
5355
import java.util.*;
5456
import java.util.concurrent.atomic.AtomicBoolean;
5557
import java.util.concurrent.atomic.AtomicInteger;
@@ -244,7 +246,7 @@ public LSMVectorIndex(final DatabaseInternal database, final String name, final
244246

245247
// Create PQ file handler for Product Quantization (zero-disk-I/O search)
246248
// Note: PQ file uses direct I/O (not ArcadeDB pages) since it's loaded entirely into memory
247-
this.pqFile = new LSMVectorIndexPQFile(filePath);
249+
this.pqFile = createPQFileWithFallback(mutable.getFilePath());
248250

249251
LogManager.instance()
250252
.log(this, Level.FINE, "Created LSMVectorIndex: indexName=%s, vectorFileId=%d, graphFileId=%d", indexName,
@@ -286,7 +288,7 @@ protected LSMVectorIndex(final DatabaseInternal database, final String name, fin
286288

287289
// Create PQ file handler (for zero-disk-I/O search)
288290
// PQ data will be loaded after schema loads metadata (see loadVectorsAfterSchemaLoad)
289-
this.pqFile = new LSMVectorIndexPQFile(filePath);
291+
this.pqFile = createPQFileWithFallback(mutable.getFilePath());
290292

291293
// Initialize compaction fields
292294
this.currentMutablePages = new AtomicInteger(mutable.getTotalPages());
@@ -309,6 +311,39 @@ protected LSMVectorIndex(final DatabaseInternal database, final String name, fin
309311
// See loadVectorsAfterSchemaLoad() method which is called by LSMVectorIndexMutable.onAfterSchemaLoad()
310312
}
311313

314+
private LSMVectorIndexPQFile createPQFileWithFallback(final String primaryBasePath) {
315+
// Use the component file path as canonical. If legacy PQ exists at a shorter base name, migrate it once.
316+
final LSMVectorIndexPQFile pq = new LSMVectorIndexPQFile(primaryBasePath);
317+
318+
// Derive a legacy base path by stripping the first extension (e.g., drop .4.262144.v0.lsmvecidx)
319+
String legacyBasePath = null;
320+
final int dot = primaryBasePath.indexOf('.');
321+
if (dot > 0) {
322+
legacyBasePath = primaryBasePath.substring(0, dot);
323+
}
324+
325+
if (!pq.exists() && legacyBasePath != null) {
326+
final LSMVectorIndexPQFile legacyPQ = new LSMVectorIndexPQFile(legacyBasePath);
327+
if (legacyPQ.exists()) {
328+
try {
329+
final var targetParent = pq.getFilePath().getParent();
330+
if (targetParent != null && !Files.exists(targetParent)) {
331+
Files.createDirectories(targetParent);
332+
}
333+
Files.move(legacyPQ.getFilePath(), pq.getFilePath(), StandardCopyOption.REPLACE_EXISTING);
334+
LogManager.instance().log(this, Level.INFO,
335+
"Migrated PQ file from legacy path %s to canonical %s", legacyPQ.getFilePath(), pq.getFilePath());
336+
} catch (final Exception e) {
337+
LogManager.instance().log(this, Level.WARNING,
338+
"Failed to migrate PQ file from legacy path %s to canonical %s: %s", legacyPQ.getFilePath(), pq.getFilePath(),
339+
e.getMessage());
340+
}
341+
}
342+
}
343+
344+
return pq;
345+
}
346+
312347
/**
313348
* Load vectors from pages after schema has loaded metadata.
314349
* Called by LSMVectorIndexMutable.onAfterSchemaLoad() after dimensions are set from schema.json.

0 commit comments

Comments
 (0)