@@ -1072,6 +1072,30 @@ TEST_F(KnnTest, AutoResize) {
10721072 EXPECT_EQ (indices.GetAllDocs ().size (), 100 );
10731073}
10741074
1075+ // Seeds the given HNSW index with `n` deterministic random vectors of dim `dim` using
1076+ // the given RNG seed. Returns the owning MockedDocuments so the caller can pass them
1077+ // back to UpdateVectorData after a restore. Used by the serialization/restore tests.
1078+ inline vector<MockedDocument> SeedHnswIndex (HnswVectorIndex& index, size_t n, size_t dim,
1079+ uint32_t rng_seed) {
1080+ vector<MockedDocument> docs (n);
1081+ std::mt19937 rng (rng_seed);
1082+ std::uniform_real_distribution<float > dist (0 .0f , 1 .0f );
1083+ for (size_t i = 0 ; i < n; i++) {
1084+ vector<float > coords (dim);
1085+ for (size_t d = 0 ; d < dim; d++)
1086+ coords[d] = dist (rng);
1087+ docs[i] = MockedDocument::Map{{" vec" , ToBytes (absl::MakeConstSpan (coords))}};
1088+ index.Add (i, docs[i], " vec" );
1089+ }
1090+ return docs;
1091+ }
1092+
1093+ // Snapshots all nodes from the index under its read lock.
1094+ inline vector<HnswNodeData> SnapshotHnswNodes (const HnswVectorIndex& index) {
1095+ auto lock = index.GetReadLock ();
1096+ return index.GetNodesRange (0 , index.GetNodeCount ());
1097+ }
1098+
10751099// Parameterized HNSW serialization round-trip test.
10761100// Parameters: {num_elements, dim, similarity}
10771101struct HnswSerParam {
@@ -1108,27 +1132,12 @@ TEST_P(HnswSerializationTest, RoundTrip) {
11081132 params.hnsw_ef_construction = 200 ;
11091133
11101134 HnswVectorIndex original (params, /* copy_vector=*/ true );
1135+ vector<MockedDocument> docs = SeedHnswIndex (original, num_elements, dim, /* rng_seed=*/ 42 );
11111136
1112- std::mt19937 rng (42 );
1113- std::uniform_real_distribution<float > dist (0 .0f , 1 .0f );
1114- vector<MockedDocument> docs (num_elements);
1115- for (size_t i = 0 ; i < num_elements; i++) {
1116- vector<float > coords (dim);
1117- for (size_t d = 0 ; d < dim; d++)
1118- coords[d] = dist (rng);
1119- docs[i] = MockedDocument::Map{{" vec" , ToBytes (absl::MakeConstSpan (coords))}};
1120- original.Add (i, docs[i], " vec" );
1121- }
1122-
1123- // Serialize
11241137 auto metadata = original.GetMetadata ();
1125- ASSERT_EQ (metadata. cur_element_count , num_elements);
1138+ ASSERT_EQ (original. GetNodeCount () , num_elements);
11261139
1127- std::vector<HnswNodeData> nodes;
1128- {
1129- auto lock = original.GetReadLock ();
1130- nodes = original.GetNodesRange (0 , metadata.cur_element_count );
1131- }
1140+ std::vector<HnswNodeData> nodes = SnapshotHnswNodes (original);
11321141 ASSERT_EQ (nodes.size (), num_elements);
11331142
11341143 // Verify node data integrity
@@ -1139,8 +1148,7 @@ TEST_P(HnswSerializationTest, RoundTrip) {
11391148
11401149 // Deserialize into a fresh index
11411150 HnswVectorIndex restored (params, /* copy_vector=*/ true );
1142- restored.SetMetadata (metadata);
1143- restored.RestoreFromNodes (nodes, metadata);
1151+ ASSERT_TRUE (restored.RestoreFromNodes (nodes, metadata));
11441152
11451153 // Before UpdateVectorData, all nodes must be marked deleted.
11461154 // KNN should safely return empty results (no crash from nullptr dereference).
@@ -1153,17 +1161,16 @@ TEST_P(HnswSerializationTest, RoundTrip) {
11531161 for (size_t i = 0 ; i < num_elements; i++)
11541162 restored.UpdateVectorData (i, docs[i], " vec" );
11551163
1156- // Metadata must match
11571164 auto rm = restored.GetMetadata ();
1158- EXPECT_EQ (rm.cur_element_count , metadata.cur_element_count );
1159- EXPECT_EQ (rm.maxlevel , metadata.maxlevel );
1165+ EXPECT_EQ (restored.GetNodeCount (), num_elements);
11601166 EXPECT_EQ (rm.enterpoint_node , metadata.enterpoint_node );
1167+ EXPECT_EQ (restored.GetMaxLevel (), original.GetMaxLevel ());
11611168
11621169 // Graph links must be identical
11631170 std::vector<HnswNodeData> restored_nodes;
11641171 {
11651172 auto lock = restored.GetReadLock ();
1166- restored_nodes = restored.GetNodesRange (0 , rm. cur_element_count );
1173+ restored_nodes = restored.GetNodesRange (0 , restored. GetNodeCount () );
11671174 }
11681175 ASSERT_EQ (restored_nodes.size (), nodes.size ());
11691176 for (size_t i = 0 ; i < nodes.size (); i++) {
@@ -1209,6 +1216,76 @@ TEST_P(HnswSerializationTest, RoundTrip) {
12091216 }
12101217}
12111218
1219+ // Regression for the save-side race where an Add raises maxlevel between metadata
1220+ // capture and node serialization (see RestoreFromNodes for the rationale). Simulated
1221+ // by forging metadata with a low-level entry point against a multi-level node set;
1222+ // expects maxlevel_ to clamp to the entry point's level rather than max(node.level).
1223+ TEST (HnswRestoreInvariant, MaxLevelClampedToEntryPointLevel) {
1224+ constexpr size_t kDim = 8 ;
1225+ constexpr size_t kN = 100 ;
1226+
1227+ InitTLSearchMR (PMR_NS::get_default_resource ());
1228+ absl::Cleanup cleanup = [] { InitTLSearchMR (nullptr ); };
1229+
1230+ SchemaField::VectorParams params;
1231+ params.use_hnsw = true ;
1232+ params.dim = kDim ;
1233+ params.sim = VectorSimilarity::L2;
1234+ params.capacity = kN ;
1235+ params.hnsw_m = 16 ;
1236+ params.hnsw_ef_construction = 200 ;
1237+
1238+ HnswVectorIndex original (params, /* copy_vector=*/ true );
1239+ SeedHnswIndex (original, kN , kDim , /* rng_seed=*/ 42 );
1240+ std::vector<HnswNodeData> nodes = SnapshotHnswNodes (original);
1241+
1242+ int global_max_level = -1 ;
1243+ std::optional<uint32_t > low_level_internal_id;
1244+ for (const auto & n : nodes) {
1245+ global_max_level = std::max (global_max_level, n.level );
1246+ if (!low_level_internal_id && n.level == 0 )
1247+ low_level_internal_id = n.internal_id ;
1248+ }
1249+ ASSERT_GT (global_max_level, 0 ) << " test setup: need a multi-level graph" ;
1250+ ASSERT_TRUE (low_level_internal_id.has_value ()) << " test setup: need a level-0 node" ;
1251+
1252+ HnswIndexMetadata forged_metadata{.enterpoint_node = *low_level_internal_id};
1253+
1254+ HnswVectorIndex restored (params, /* copy_vector=*/ true );
1255+ ASSERT_TRUE (restored.RestoreFromNodes (nodes, forged_metadata));
1256+
1257+ EXPECT_EQ (restored.GetMaxLevel (), 0 )
1258+ << " maxlevel_ must equal entry-point level; got " << restored.GetMaxLevel ()
1259+ << " while node set max level=" << global_max_level;
1260+ }
1261+
1262+ // Malformed/mismatched metadata (entry point not in serialized node set) must
1263+ // fail restoration gracefully — returning false — instead of SIGABRT'ing via
1264+ // CHECK. Callers then rebuild the index from the keyspace.
1265+ TEST (HnswRestoreInvariant, MissingEntrypointFailsGracefully) {
1266+ constexpr size_t kDim = 4 ;
1267+ constexpr size_t kN = 10 ;
1268+
1269+ InitTLSearchMR (PMR_NS::get_default_resource ());
1270+ absl::Cleanup cleanup = [] { InitTLSearchMR (nullptr ); };
1271+
1272+ SchemaField::VectorParams params;
1273+ params.use_hnsw = true ;
1274+ params.dim = kDim ;
1275+ params.sim = VectorSimilarity::L2;
1276+ params.capacity = kN ;
1277+ params.hnsw_m = 16 ;
1278+ params.hnsw_ef_construction = 200 ;
1279+
1280+ HnswVectorIndex original (params, /* copy_vector=*/ true );
1281+ SeedHnswIndex (original, kN , kDim , /* rng_seed=*/ 7 );
1282+ std::vector<HnswNodeData> nodes = SnapshotHnswNodes (original);
1283+
1284+ HnswIndexMetadata bad_metadata{.enterpoint_node = 999999 }; // well past any real id
1285+ HnswVectorIndex restored (params, /* copy_vector=*/ true );
1286+ EXPECT_FALSE (restored.RestoreFromNodes (nodes, bad_metadata));
1287+ }
1288+
12121289// Regression: in borrowed mode (copy_vector=false), Remove marks the node deleted
12131290// but hnswlib still traverses it and dereferences its data pointer. If the external
12141291// data is freed (as happens after DEL), the pointer dangles. The fix in DoRemove
0 commit comments