@@ -287,25 +287,24 @@ IndexNode::GetEmbListByIds(const DataSetPtr dataset, const std::string& metric_t
287287 " GetEmbListByIds requires emb_list_offset, but it is not available" );
288288 }
289289 auto sub_metric = get_sub_metric_type (metric_type);
290- if (!sub_metric.has_value () || !HasRawData (sub_metric.value ())) {
290+ if (!sub_metric.has_value ()) {
291+ return expected<DataSetPtr>::Err (Status::not_implemented,
292+ " GetEmbListByIds: invalid metric type " + metric_type);
293+ }
294+
295+ // Raw data can come from emb_list_raw_index_ (MUVERA/LEMUR) or base index (TokenANN)
296+ bool use_raw_index = (emb_list_raw_index_ != nullptr );
297+ if (!use_raw_index && !HasRawData (sub_metric.value ())) {
291298 return expected<DataSetPtr>::Err (
292299 Status::not_implemented,
293300 " GetEmbListByIds requires raw data support, but the index does not store raw vectors" );
294301 }
295302
296303 auto num_el_ids = dataset->GetRows ();
297304 auto el_ids = dataset->GetIds ();
298- auto dim = Dim ();
299-
300- // Build the output offset array and collect all vector-level IDs in a single pass.
301- //
302- // TODO(perf): Vectors within each embedding list are contiguous in the index. However, the current
303- // implementation collects all these contiguous IDs into a flat array and passes them to GetVectorByIds,
304- // which internally calls reconstruct(id, ...) one vector at a time. This could be optimized by using
305- // reconstruct_n(start, len, ...) or direct memcpy from raw data storage, avoiding both the redundant
306- // ID array allocation and per-vector overhead. We don't do this yet because it would require
307- // index-type-specific implementations (HNSW, IVF, FLAT, etc. each store raw data differently),
308- // whereas the current approach works generically across all index types via the GetVectorByIds interface.
305+ auto dim = use_raw_index ? emb_list_raw_index_->d : Dim ();
306+
307+ // Build the output offset array
309308 std::vector<size_t > out_offsets (num_el_ids + 1 );
310309 out_offsets[0 ] = 0 ;
311310 for (int64_t i = 0 ; i < num_el_ids; i++) {
@@ -318,17 +317,9 @@ IndexNode::GetEmbListByIds(const DataSetPtr dataset, const std::string& metric_t
318317 out_offsets[i + 1 ] = out_offsets[i] + emb_list_offset_->get_el_len (el_id);
319318 }
320319
321- std::vector<int64_t > vec_ids;
322- vec_ids.reserve (out_offsets[num_el_ids]);
323- for (int64_t i = 0 ; i < num_el_ids; i++) {
324- size_t start = emb_list_offset_->offset [el_ids[i]];
325- size_t len = out_offsets[i + 1 ] - out_offsets[i];
326- for (size_t j = 0 ; j < len; j++) {
327- vec_ids.push_back (static_cast <int64_t >(start + j));
328- }
329- }
320+ auto total_vecs = out_offsets[num_el_ids];
330321
331- if (vec_ids. empty () ) {
322+ if (total_vecs == 0 ) {
332323 // all emblist are empty list
333324 auto result = GenResultDataSet (num_el_ids, dim, (const void *)nullptr );
334325 auto * offsets_ptr = new size_t [out_offsets.size ()];
@@ -337,16 +328,52 @@ IndexNode::GetEmbListByIds(const DataSetPtr dataset, const std::string& metric_t
337328 return result;
338329 }
339330
340- auto vec_dataset = GenIdsDataSet (vec_ids.size (), vec_ids.data ());
341- auto res = GetVectorByIds (vec_dataset, op_context);
342- if (!res.has_value ()) {
343- return res;
344- }
331+ const void * tensor = nullptr ;
332+
333+ if (use_raw_index) {
334+ // MUVERA/LEMUR: vectors are contiguous per el in emb_list_raw_index_, use reconstruct_n
335+ auto data = std::make_unique<float []>(total_vecs * dim);
336+ float * ptr = data.get ();
337+ for (int64_t i = 0 ; i < num_el_ids; i++) {
338+ auto start = static_cast <int64_t >(emb_list_offset_->offset [el_ids[i]]);
339+ auto len = static_cast <int64_t >(out_offsets[i + 1 ] - out_offsets[i]);
340+ if (len > 0 ) {
341+ emb_list_raw_index_->reconstruct_n (start, len, ptr);
342+ ptr += len * dim;
343+ }
344+ }
345+ tensor = data.release ();
346+ } else {
347+ // TokenANN: collect vec_ids and use base index GetVectorByIds
348+ //
349+ // TODO(perf): Vectors within each embedding list are contiguous in the index. However, the current
350+ // implementation collects all these contiguous IDs into a flat array and passes them to GetVectorByIds,
351+ // which internally calls reconstruct(id, ...) one vector at a time. This could be optimized by using
352+ // reconstruct_n(start, len, ...) or direct memcpy from raw data storage, avoiding both the redundant
353+ // ID array allocation and per-vector overhead. We don't do this yet because it would require
354+ // index-type-specific implementations (HNSW, IVF, FLAT, etc. each store raw data differently),
355+ // whereas the current approach works generically across all index types via the GetVectorByIds interface.
356+ std::vector<int64_t > vec_ids;
357+ vec_ids.reserve (total_vecs);
358+ for (int64_t i = 0 ; i < num_el_ids; i++) {
359+ size_t start = emb_list_offset_->offset [el_ids[i]];
360+ size_t len = out_offsets[i + 1 ] - out_offsets[i];
361+ for (size_t j = 0 ; j < len; j++) {
362+ vec_ids.push_back (static_cast <int64_t >(start + j));
363+ }
364+ }
365+
366+ // Build result: transfer tensor ownership from GetVectorByIds result to new dataset
367+ auto vec_dataset = GenIdsDataSet (vec_ids.size (), vec_ids.data ());
368+ auto res = GetVectorByIds (vec_dataset, op_context);
369+ if (!res.has_value ()) {
370+ return res;
371+ }
345372
346- // Build result: transfer tensor ownership from GetVectorByIds result to new dataset
347- auto vec_result = res. value ();
348- auto tensor = vec_result->GetTensor ( );
349- vec_result-> SetIsOwner ( false );
373+ auto vec_result = res. value ();
374+ tensor = vec_result-> GetTensor ();
375+ vec_result->SetIsOwner ( false );
376+ }
350377
351378 auto result = GenResultDataSet (num_el_ids, dim, tensor);
352379 auto * offsets_ptr = new size_t [out_offsets.size ()];
0 commit comments