@@ -196,23 +196,48 @@ public List<ResultEntry> finish() {
196196
197197 private ResultEntry buildIndex () throws IOException {
198198 configureExecutorThreadCount ();
199+ LOG .info (
200+ "Lumina index build started: {} vectors, dim={}, type={}, metric={}" ,
201+ count ,
202+ dim ,
203+ options .indexType (),
204+ options .metric ());
205+ long buildStart = System .currentTimeMillis ();
206+
199207 try (LuminaIndex index =
200208 LuminaIndex .createForBuild (
201209 options .indexType (), dim , options .metric (), options .toLuminaOptions ())) {
202210
203211 // Pretrain and insert via streaming file-backed Dataset API
204- try (FileBackedDataset ds = new FileBackedDataset (tempVectorFile , dim , count )) {
212+ long phaseStart = System .currentTimeMillis ();
213+ LOG .info ("Lumina pretrain phase started" );
214+ try (FileBackedDataset ds =
215+ new FileBackedDataset (tempVectorFile , dim , count , "pretrain" )) {
205216 index .pretrainFrom (ds );
206217 }
207- try (FileBackedDataset ds = new FileBackedDataset (tempVectorFile , dim , count )) {
218+ LOG .info (
219+ "Lumina pretrain phase done in {} ms" , System .currentTimeMillis () - phaseStart );
220+
221+ phaseStart = System .currentTimeMillis ();
222+ LOG .info ("Lumina insert phase started" );
223+ try (FileBackedDataset ds =
224+ new FileBackedDataset (tempVectorFile , dim , count , "insert" )) {
208225 index .insertFrom (ds );
209226 }
227+ LOG .info ("Lumina insert phase done in {} ms" , System .currentTimeMillis () - phaseStart );
210228
229+ phaseStart = System .currentTimeMillis ();
230+ LOG .info ("Lumina dump phase started" );
211231 String fileName = fileWriter .newFileName (FILE_NAME_PREFIX );
212232 try (PositionOutputStream out = fileWriter .newOutputStream (fileName )) {
213233 index .dump (new OutputStreamFileOutput (out ));
214234 out .flush ();
215235 }
236+ LOG .info ("Lumina dump phase done in {} ms" , System .currentTimeMillis () - phaseStart );
237+
238+ LOG .info (
239+ "Lumina index build completed in {} ms" ,
240+ System .currentTimeMillis () - buildStart );
216241
217242 LuminaIndexMeta meta = new LuminaIndexMeta (options .toLuminaOptions ());
218243 return new ResultEntry (fileName , count , meta .serialize ());
@@ -308,8 +333,10 @@ static class FileBackedDataset implements LuminaDataset, Closeable {
308333 private final int totalCount ;
309334 private int cursor ;
310335 private final ByteBuffer readBuf ;
336+ private final String phase ;
337+ private int lastLoggedPercent ;
311338
312- FileBackedDataset (File file , int dim , int totalCount ) throws IOException {
339+ FileBackedDataset (File file , int dim , int totalCount , String phase ) throws IOException {
313340 this .raf = new RandomAccessFile (file , "r" );
314341 this .channel = raf .getChannel ();
315342 this .dim = dim ;
@@ -318,6 +345,8 @@ static class FileBackedDataset implements LuminaDataset, Closeable {
318345 this .readBuf = ByteBuffer .allocateDirect (IO_BUFFER_SIZE );
319346 this .readBuf .order (ByteOrder .nativeOrder ());
320347 this .readBuf .limit (0 ); // empty initially
348+ this .phase = phase ;
349+ this .lastLoggedPercent = -1 ;
321350 }
322351
323352 @ Override
@@ -370,6 +399,15 @@ public long getNextBatch(float[] vectorBuf, long[] idBuf) {
370399 idBuf [i ] = cursor + i ;
371400 }
372401 cursor += batchSize ;
402+
403+ int percent = (int ) ((long ) cursor * 100 / totalCount );
404+ if (percent / 10 > lastLoggedPercent / 10 ) {
405+ LOG .info (
406+ "Lumina {} progress: {}/{} vectors ({}%)" ,
407+ phase , cursor , totalCount , percent );
408+ lastLoggedPercent = percent ;
409+ }
410+
373411 return batchSize ;
374412 }
375413
0 commit comments