Skip to content

Commit 34df0ce

Browse files
authored
[core][vector] add log for lumina index writer (#7512)
1 parent f261a66 commit 34df0ce

1 file changed

Lines changed: 41 additions & 3 deletions

File tree

paimon-lumina/src/main/java/org/apache/paimon/lumina/index/LuminaVectorGlobalIndexWriter.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,23 +196,48 @@ public List<ResultEntry> finish() {
196196

197197
private ResultEntry buildIndex() throws IOException {
198198
configureExecutorThreadCount();
199+
LOG.info(
200+
"Lumina index build started: {} vectors, dim={}, type={}, metric={}",
201+
count,
202+
dim,
203+
options.indexType(),
204+
options.metric());
205+
long buildStart = System.currentTimeMillis();
206+
199207
try (LuminaIndex index =
200208
LuminaIndex.createForBuild(
201209
options.indexType(), dim, options.metric(), options.toLuminaOptions())) {
202210

203211
// Pretrain and insert via streaming file-backed Dataset API
204-
try (FileBackedDataset ds = new FileBackedDataset(tempVectorFile, dim, count)) {
212+
long phaseStart = System.currentTimeMillis();
213+
LOG.info("Lumina pretrain phase started");
214+
try (FileBackedDataset ds =
215+
new FileBackedDataset(tempVectorFile, dim, count, "pretrain")) {
205216
index.pretrainFrom(ds);
206217
}
207-
try (FileBackedDataset ds = new FileBackedDataset(tempVectorFile, dim, count)) {
218+
LOG.info(
219+
"Lumina pretrain phase done in {} ms", System.currentTimeMillis() - phaseStart);
220+
221+
phaseStart = System.currentTimeMillis();
222+
LOG.info("Lumina insert phase started");
223+
try (FileBackedDataset ds =
224+
new FileBackedDataset(tempVectorFile, dim, count, "insert")) {
208225
index.insertFrom(ds);
209226
}
227+
LOG.info("Lumina insert phase done in {} ms", System.currentTimeMillis() - phaseStart);
210228

229+
phaseStart = System.currentTimeMillis();
230+
LOG.info("Lumina dump phase started");
211231
String fileName = fileWriter.newFileName(FILE_NAME_PREFIX);
212232
try (PositionOutputStream out = fileWriter.newOutputStream(fileName)) {
213233
index.dump(new OutputStreamFileOutput(out));
214234
out.flush();
215235
}
236+
LOG.info("Lumina dump phase done in {} ms", System.currentTimeMillis() - phaseStart);
237+
238+
LOG.info(
239+
"Lumina index build completed in {} ms",
240+
System.currentTimeMillis() - buildStart);
216241

217242
LuminaIndexMeta meta = new LuminaIndexMeta(options.toLuminaOptions());
218243
return new ResultEntry(fileName, count, meta.serialize());
@@ -308,8 +333,10 @@ static class FileBackedDataset implements LuminaDataset, Closeable {
308333
private final int totalCount;
309334
private int cursor;
310335
private final ByteBuffer readBuf;
336+
private final String phase;
337+
private int lastLoggedPercent;
311338

312-
FileBackedDataset(File file, int dim, int totalCount) throws IOException {
339+
FileBackedDataset(File file, int dim, int totalCount, String phase) throws IOException {
313340
this.raf = new RandomAccessFile(file, "r");
314341
this.channel = raf.getChannel();
315342
this.dim = dim;
@@ -318,6 +345,8 @@ static class FileBackedDataset implements LuminaDataset, Closeable {
318345
this.readBuf = ByteBuffer.allocateDirect(IO_BUFFER_SIZE);
319346
this.readBuf.order(ByteOrder.nativeOrder());
320347
this.readBuf.limit(0); // empty initially
348+
this.phase = phase;
349+
this.lastLoggedPercent = -1;
321350
}
322351

323352
@Override
@@ -370,6 +399,15 @@ public long getNextBatch(float[] vectorBuf, long[] idBuf) {
370399
idBuf[i] = cursor + i;
371400
}
372401
cursor += batchSize;
402+
403+
int percent = (int) ((long) cursor * 100 / totalCount);
404+
if (percent / 10 > lastLoggedPercent / 10) {
405+
LOG.info(
406+
"Lumina {} progress: {}/{} vectors ({}%)",
407+
phase, cursor, totalCount, percent);
408+
lastLoggedPercent = percent;
409+
}
410+
373411
return batchSize;
374412
}
375413

0 commit comments

Comments
 (0)