From f17b96a1974d2ff355e77a473f04f3a000477c51 Mon Sep 17 00:00:00 2001 From: LHT129 Date: Wed, 27 May 2026 16:25:12 +0800 Subject: [PATCH] feat(hgraph): add support_force_remove switch and conditional locking - add a dedicated support_force_remove HGraph parameter and map it through the public and internal parameter layers - gate force-remove locking and RemoveMode::FORCE_REMOVE on support_force_remove while keeping support_remove for existing delete-metadata semantics - update regression tests, examples, and EN/ZH docs to describe the split behavior Closes: #2085 Signed-off-by: LHT129 --- docs/docs/en/src/advanced/index_lifecycle.md | 21 ++++++------ docs/docs/en/src/indexes/hgraph.md | 14 ++------ docs/docs/zh/src/advanced/index_lifecycle.md | 17 +++++----- docs/docs/zh/src/indexes/hgraph.md | 14 ++------ docs/hgraph.md | 15 +++++--- include/vsag/constants.h | 1 + src/algorithm/hgraph/hgraph.cpp | 1 + src/algorithm/hgraph/hgraph.h | 6 ++++ src/algorithm/hgraph/hgraph_build.cpp | 18 ++++++++-- src/algorithm/hgraph/hgraph_modify.cpp | 2 ++ src/algorithm/hgraph/hgraph_param_mapping.cpp | 7 ++++ src/algorithm/hgraph/hgraph_parameter.cpp | 8 +++++ src/algorithm/hgraph/hgraph_parameter.h | 1 + .../hgraph/hgraph_parameter_test.cpp | 34 +++++++++++++++++-- src/algorithm/hgraph/hgraph_search.cpp | 15 ++++++-- src/constants.cpp | 1 + src/inner_string_params.h | 4 ++- tests/test_hgraph_remove.cpp | 25 +++++++++++++- 18 files changed, 148 insertions(+), 56 deletions(-) diff --git a/docs/docs/en/src/advanced/index_lifecycle.md b/docs/docs/en/src/advanced/index_lifecycle.md index 01737ea30..e3aa81560 100644 --- a/docs/docs/en/src/advanced/index_lifecycle.md +++ b/docs/docs/en/src/advanced/index_lifecycle.md @@ -31,12 +31,12 @@ supports it and which mode it supports. `Remove` deletes vectors by id. HGraph supports two deletion modes with different requirements: - `RemoveMode::MARK_REMOVE` (the default) only writes a tombstone via the label table and works - regardless of `support_remove`. The id is filtered out of subsequent searches, but the underlying - graph node and vector storage are kept. + regardless of `support_force_remove`. The id is filtered out of subsequent searches, but the + underlying graph node and vector storage are kept. - `RemoveMode::FORCE_REMOVE` physically rewrites the graph and reclaims the slot. This mode is - only available when the index was built with `support_remove: true` in `index_param` (which - causes the graph data cell to allocate the delete-tracking metadata). Calling `FORCE_REMOVE` on - an index built without `support_remove: true` will fail. + only available when the index was built with `support_force_remove: true` in `index_param`. That + flag enables the force-remove path and its extra synchronization; calling `FORCE_REMOVE` on an + index built without `support_force_remove: true` will fail. ```json { @@ -47,13 +47,13 @@ supports it and which mode it supports. "base_quantization_type": "sq8", "max_degree": 16, "ef_construction": 100, - "support_remove": true + "support_force_remove": true } } ``` The JSON snippet above is only required if you intend to use `FORCE_REMOVE`. For `MARK_REMOVE` -alone you can omit the `support_remove` flag. +alone you can omit the `support_force_remove` flag. ```json { @@ -63,8 +63,7 @@ alone you can omit the `support_remove` flag. "index_param": { "base_quantization_type": "sq8", "max_degree": 16, - "ef_construction": 100, - "support_remove": true + "ef_construction": 100 } } ``` @@ -81,8 +80,8 @@ The optional `RemoveMode` argument selects the deletion strategy: | Mode | Behavior | |-------------------------------|-------------------------------------------------------------------| -| `RemoveMode::MARK_REMOVE` (default) | Tombstones the id; fast, no shrink or graph repair. Subsequent searches skip the id. Does not require `support_remove: true`. | -| `RemoveMode::FORCE_REMOVE` | Physically removes the vector and repairs the graph. Heavier. Requires the index to be built with `support_remove: true`. | +| `RemoveMode::MARK_REMOVE` (default) | Tombstones the id; fast, no shrink or graph repair. Subsequent searches skip the id. Does not require `support_force_remove: true`. | +| `RemoveMode::FORCE_REMOVE` | Physically removes the vector and repairs the graph. Heavier. Requires the index to be built with `support_force_remove: true`. | `Remove` returns the number of ids that were successfully removed. Ids that did not exist are silently skipped and not counted. diff --git a/docs/docs/en/src/indexes/hgraph.md b/docs/docs/en/src/indexes/hgraph.md index febf3cee6..88d94728f 100644 --- a/docs/docs/en/src/indexes/hgraph.md +++ b/docs/docs/en/src/indexes/hgraph.md @@ -1,7 +1,5 @@ # HGraph -![HGraph: hierarchical proximity graph with top-down greedy search and optional reorder](../figures/indexes/hgraph-overview.svg) - HGraph is VSAG's flagship **graph-based** index. It builds a hierarchical proximity graph similar in spirit to HNSW, but with a richer set of quantization options, a unified build-parameter schema (`index_param`), and first-class support for reordering, @@ -77,7 +75,8 @@ and `docs/hgraph.md` in the repository. | `build_thread_count` | int | `100` | Threads used to parallelise build | | `support_duplicate` | bool | `false` | Enable duplicate-ID detection on insert | | `duplicate_distance_threshold` | float | `0.0` | Duplicate-detection distance threshold. When greater than `0`, deduplicate by the nearest candidate distance; when `0`, fall back to the current code `memcmp` check | -| `support_remove` | bool | `false` | Enable `Remove()` on the built index | +| `support_remove` | bool | `false` | Enable graph delete-tracking metadata used by mark-remove recovery paths | +| `support_force_remove` | bool | `false` | Enable `RemoveMode::FORCE_REMOVE` and its extra synchronization on the built index | | `store_raw_vector` | bool | `false` | Keep the raw vector in addition to the quantized copy (useful for `cosine`) | | `use_elp_optimizer` | bool | `false` | Auto-tune search parameters after build | | `base_io_type` / `precise_io_type` | string | `"block_memory_io"` | Storage backend (`memory_io`, `block_memory_io`, `buffer_io`, `async_io`, `mmap_io`) | @@ -204,24 +203,17 @@ Search-time parameters live under the `hgraph` sub-object: | Parameter | Type | Description | |-----------|------|-------------| | `ef_search` | int | Size of the search frontier. Larger = higher recall, slower query. | -| `enable_reorder` | bool | `true` by default. Set to `false` to skip the final reorder stage for this request even when the index was built with reorder enabled. This also disables the RaBitQ one-bit reorder path. | ```cpp auto result = index->KnnSearch( query, topk, R"({"hgraph": {"ef_search": 200}})").value(); ``` -```cpp -auto fast_result = index->KnnSearch( - query, topk, - R"({"hgraph": {"ef_search": 200, "enable_reorder": false}})").value(); -``` - ## When to use HGraph - Dense float vectors with dimensions roughly between 64 and 4096. - Latency-sensitive queries where high recall matters. -- Mixed workloads with incremental insertion (optionally deletion via `support_remove`). +- Mixed workloads with incremental insertion (optionally force removal via `support_force_remove`). - Memory-constrained deployments that benefit from `sq8` / `sq4_uniform` / `pq` — often in combination with `use_reorder` to recover recall. diff --git a/docs/docs/zh/src/advanced/index_lifecycle.md b/docs/docs/zh/src/advanced/index_lifecycle.md index 8799e5bd8..428e7d983 100644 --- a/docs/docs/zh/src/advanced/index_lifecycle.md +++ b/docs/docs/zh/src/advanced/index_lifecycle.md @@ -28,11 +28,11 @@ `Remove` 按 id 删除向量。HGraph 支持两种删除模式,要求不同: -- `RemoveMode::MARK_REMOVE`(默认):仅通过 label table 写入墓碑标记,**不依赖** `support_remove` +- `RemoveMode::MARK_REMOVE`(默认):仅通过 label table 写入墓碑标记,**不依赖** `support_force_remove` 即可调用。该 id 会在后续搜索中被过滤掉,但底层图节点与向量存储仍然保留。 - `RemoveMode::FORCE_REMOVE`:物理重写图并回收存储槽。该模式仅在索引以 - `index_param` 中 `support_remove: true` 构建时可用(这会让图 datacell 分配删除追踪元数据)。 - 若索引未带 `support_remove: true` 构建,调用 `FORCE_REMOVE` 会失败。 + `index_param` 中 `support_force_remove: true` 构建时可用。该开关会启用 force remove 路径及其额外同步; + 若索引未带 `support_force_remove: true` 构建,调用 `FORCE_REMOVE` 会失败。 ```json { @@ -43,13 +43,13 @@ "base_quantization_type": "sq8", "max_degree": 16, "ef_construction": 100, - "support_remove": true + "support_force_remove": true } } ``` 上述 JSON 仅在打算使用 `FORCE_REMOVE` 时是必需的。若只用 `MARK_REMOVE`,可以省略 -`support_remove` 字段。 +`support_force_remove` 字段。 ```json { @@ -59,8 +59,7 @@ "index_param": { "base_quantization_type": "sq8", "max_degree": 16, - "ef_construction": 100, - "support_remove": true + "ef_construction": 100 } } ``` @@ -77,8 +76,8 @@ index->Remove(std::vector{id1, id2, id3}); | 模式 | 行为 | |-------------------------------------|---------------------------------------------------------------------| -| `RemoveMode::MARK_REMOVE`(默认) | 对 id 打墓碑标记;速度快,不收缩、不修图。后续搜索会跳过该 id。不要求 `support_remove: true`。 | -| `RemoveMode::FORCE_REMOVE` | 物理删除向量并修复图结构。开销较大。要求索引以 `support_remove: true` 构建。 | +| `RemoveMode::MARK_REMOVE`(默认) | 对 id 打墓碑标记;速度快,不收缩、不修图。后续搜索会跳过该 id。不要求 `support_force_remove: true`。 | +| `RemoveMode::FORCE_REMOVE` | 物理删除向量并修复图结构。开销较大。要求索引以 `support_force_remove: true` 构建。 | `Remove` 返回成功删除的 id 数量。原本不存在的 id 会被静默跳过,不计入返回值。 diff --git a/docs/docs/zh/src/indexes/hgraph.md b/docs/docs/zh/src/indexes/hgraph.md index f377d93cf..2dc88df45 100644 --- a/docs/docs/zh/src/indexes/hgraph.md +++ b/docs/docs/zh/src/indexes/hgraph.md @@ -1,7 +1,5 @@ # HGraph -![HGraph:自顶向下贪心搜索的层级近邻图,支持可选精排](../figures/indexes/hgraph-overview.svg) - HGraph 是 VSAG 的旗舰 **图索引**。它构建的是与 HNSW 思路类似的多层近邻图,但在此基础上 提供了更丰富的量化方案、统一的构建参数 schema(`index_param`),并原生支持精排(reorder)、 增量更新、删除、以及基于 ELP 的运行时自动调优。 @@ -70,7 +68,8 @@ auto result = index->KnnSearch( | `build_thread_count` | int | `100` | 构建阶段并发线程数 | | `support_duplicate` | bool | `false` | 是否在插入时做重复 ID 检测 | | `duplicate_distance_threshold` | float | `0.0` | 重复判定距离阈值。大于 `0` 时按最近候选的距离判重;等于 `0` 时退化为当前编码 `memcmp` 判重 | -| `support_remove` | bool | `false` | 是否支持 `Remove()` | +| `support_remove` | bool | `false` | 是否启用 mark-remove 恢复路径所需的图删除追踪元数据 | +| `support_force_remove` | bool | `false` | 是否启用 `RemoveMode::FORCE_REMOVE` 及其额外同步 | | `store_raw_vector` | bool | `false` | 除量化副本外再保留原始向量(`cosine` 场景有用) | | `use_elp_optimizer` | bool | `false` | 构建完成后自动调优检索参数 | | `base_io_type` / `precise_io_type` | string | `"block_memory_io"` | 存储后端(`memory_io`、`block_memory_io`、`buffer_io`、`async_io`、`mmap_io`) | @@ -190,24 +189,17 @@ base->NumElements(num_vectors)->Dim(dim)->Ids(ids) | 参数 | 类型 | 说明 | |------|------|------| | `ef_search` | int | 搜索前沿候选集的大小,越大召回越高、查询越慢 | -| `enable_reorder` | bool | 默认值为 `true`。当索引构建时启用了 reorder,也可以在单次请求里设为 `false` 跳过最终精排;这也会一并关闭 RaBitQ 的 one-bit reorder 路径。 | ```cpp auto result = index->KnnSearch( query, topk, R"({"hgraph": {"ef_search": 200}})").value(); ``` -```cpp -auto fast_result = index->KnnSearch( - query, topk, - R"({"hgraph": {"ef_search": 200, "enable_reorder": false}})").value(); -``` - ## 何时选择 HGraph - 维度大约在 64–4096 的稠密 float 向量。 - 对延迟敏感且要求高召回的场景。 -- 需要增量插入(可选通过 `support_remove` 打开删除)的混合负载。 +- 需要增量插入(可选通过 `support_force_remove` 打开物理删除)的混合负载。 - 内存受限环境,可用 `sq8` / `sq4_uniform` / `pq` 压缩,再配合 `use_reorder` 弥补召回。 如果你的业务偏向粗粒度分桶(每次查询只扫部分桶)或严重受 SSD I/O 制约,建议先对比 diff --git a/docs/hgraph.md b/docs/hgraph.md index ab8f7a05b..e528b823c 100644 --- a/docs/hgraph.md +++ b/docs/hgraph.md @@ -53,7 +53,8 @@ For RabitQ split 1bit + 7bit storage/search, see [rabitq_split_1bit_7bit.md](rab | **Advanced** | build_by_base | bool | false | No | Build index using base quantization | | **Features** | support_duplicate | bool | false | No | Enable duplicate data detection | | **Features** | duplicate_distance_threshold | float | 0.0 | No | Deduplicate by nearest-candidate distance when greater than 0; otherwise fall back to code memcmp | -| **Features** | support_remove | bool | false | No | Enable deletion support | +| **Features** | support_remove | bool | false | No | Enable graph delete-tracking metadata | +| **Features** | support_force_remove | bool | false | No | Enable force-remove support and its extra synchronization | | **Features** | store_raw_vector | bool | false | No | Store raw vectors (cosine metric) | | **Features** | use_elp_optimizer | bool | false | No | Auto parameter optimization | @@ -217,7 +218,13 @@ For RabitQ split 1bit + 7bit storage/search, see [rabitq_split_1bit_7bit.md](rab ### support_remove - **Parameter Type**: bool -- **Parameter Description**: Whether to support deletion operations +- **Parameter Description**: Whether to enable graph delete-tracking metadata +- **Optional Values**: true, false +- **Default Value**: false + +### support_force_remove +- **Parameter Type**: bool +- **Parameter Description**: Whether to enable the force-remove path and its extra synchronization - **Optional Values**: true, false - **Default Value**: false @@ -242,10 +249,10 @@ means that the index is built using SQ8 quantization, with a maximum degree of 3 "build_thread_count": 50, "support_duplicate": true, "duplicate_distance_threshold": 0.02, - "support_remove": true + "support_force_remove": true } ``` -means that the index uses PQ quantization with 64 subspaces, enables reordering with FP16 precision, deduplicates inserts within distance threshold 0.02, supports deletion, with maximum degree 64 and ef_construction 400. +means that the index uses PQ quantization with 64 subspaces, enables reordering with FP16 precision, deduplicates inserts within distance threshold 0.02, and enables force-remove support with maximum degree 64 and ef_construction 400. ## Detailed Explanation of Search Parameters diff --git a/include/vsag/constants.h b/include/vsag/constants.h index 4d2a6dfe4..7556dd018 100644 --- a/include/vsag/constants.h +++ b/include/vsag/constants.h @@ -167,6 +167,7 @@ extern const char* const RABITQ_USE_FHT; extern const char* const INDEX_TQ_CHAIN; extern const char* const HGRAPH_SUPPORT_REMOVE; +extern const char* const HGRAPH_SUPPORT_FORCE_REMOVE; extern const char* const HGRAPH_REMOVE_FLAG_BIT; // hgraph params diff --git a/src/algorithm/hgraph/hgraph.cpp b/src/algorithm/hgraph/hgraph.cpp index 5848e2431..483dad513 100644 --- a/src/algorithm/hgraph/hgraph.cpp +++ b/src/algorithm/hgraph/hgraph.cpp @@ -59,6 +59,7 @@ HGraph::HGraph(const HGraphParameterPtr& hgraph_param, const vsag::IndexCommonPa ef_construct_(hgraph_param->ef_construction), alpha_(hgraph_param->alpha), duplicate_distance_threshold_(hgraph_param->duplicate_distance_threshold), + support_force_remove_(hgraph_param->support_force_remove), odescent_param_(hgraph_param->odescent_param), graph_type_(hgraph_param->graph_type), hierarchical_datacell_param_(hgraph_param->hierarchical_graph_param), diff --git a/src/algorithm/hgraph/hgraph.h b/src/algorithm/hgraph/hgraph.h index 7b77733e9..fc2c1aefe 100644 --- a/src/algorithm/hgraph/hgraph.h +++ b/src/algorithm/hgraph/hgraph.h @@ -393,6 +393,11 @@ class HGraph : public InnerIndexInterface { return use_reorder_ and not reorder_by_base_; } + [[nodiscard]] bool + support_force_remove() const { + return support_force_remove_; + } + [[nodiscard]] FlattenInterfacePtr get_reorder_codes() const { return reorder_by_base_ ? basic_flatten_codes_ : high_precise_codes_; @@ -457,6 +462,7 @@ class HGraph : public InnerIndexInterface { bool use_old_serial_format_{false}; bool support_duplicate_{false}; + bool support_force_remove_{false}; float duplicate_distance_threshold_{0.0F}; std::unique_ptr cache_{nullptr}; diff --git a/src/algorithm/hgraph/hgraph_build.cpp b/src/algorithm/hgraph/hgraph_build.cpp index 4a777baf6..b31b1c9d6 100644 --- a/src/algorithm/hgraph/hgraph_build.cpp +++ b/src/algorithm/hgraph/hgraph_build.cpp @@ -199,7 +199,10 @@ HGraph::build_by_odescent(const DatasetPtr& data) { std::vector HGraph::Add(const DatasetPtr& data, AddMode mode) { - std::shared_lock force_remove_rlock(this->force_remove_mutex_); + std::shared_lock force_remove_rlock; + if (this->support_force_remove()) { + force_remove_rlock = std::shared_lock(this->force_remove_mutex_); + } std::vector failed_ids; auto base_dim = data->GetDim(); if (data_type_ != DataTypes::DATA_TYPE_SPARSE) { @@ -362,7 +365,10 @@ HGraph::add_one_point(const void* data, int level, InnerIdType inner_id) { void HGraph::insert_persistent_codes(const void* data, InnerIdType inner_id) { - std::shared_lock add_lock(add_mutex_); + std::shared_lock add_lock; + if (not this->support_force_remove()) { + add_lock = std::shared_lock(this->add_mutex_); + } this->basic_flatten_codes_->InsertVector(data, inner_id); if (has_precise_reorder()) { this->high_precise_codes_->InsertVector(data, inner_id); @@ -374,10 +380,16 @@ HGraph::insert_persistent_codes(const void* data, InnerIdType inner_id) { void HGraph::add_one_point(const void* data, int level, InnerIdType inner_id, bool insert_codes) { + std::unique_lock add_lock(this->add_mutex_, std::defer_lock); + if (this->support_force_remove()) { + add_lock.lock(); + } if (insert_codes) { this->insert_persistent_codes(data, inner_id); } - std::unique_lock add_lock(add_mutex_); + if (not this->support_force_remove()) { + add_lock.lock(); + } if (level >= static_cast(this->route_graphs_.size()) || bottom_graph_->TotalCount() == 0) { std::scoped_lock wlock(this->global_mutex_); // level maybe a negative number(-1) diff --git a/src/algorithm/hgraph/hgraph_modify.cpp b/src/algorithm/hgraph/hgraph_modify.cpp index 653bfd40e..c5b6e8b3b 100644 --- a/src/algorithm/hgraph/hgraph_modify.cpp +++ b/src/algorithm/hgraph/hgraph_modify.cpp @@ -29,6 +29,8 @@ HGraph::Remove(const std::vector& ids, RemoveMode mode) { } if (mode == RemoveMode::FORCE_REMOVE) { + CHECK_ARGUMENT(this->support_force_remove(), + "force remove requires index_param.support_force_remove to be true"); std::unique_lock wlock(this->force_remove_mutex_); for (const auto& id : ids) { delete_count += this->force_remove_one(id); diff --git a/src/algorithm/hgraph/hgraph_param_mapping.cpp b/src/algorithm/hgraph/hgraph_param_mapping.cpp index 3664b92ac..65a5a42bb 100644 --- a/src/algorithm/hgraph/hgraph_param_mapping.cpp +++ b/src/algorithm/hgraph/hgraph_param_mapping.cpp @@ -332,6 +332,12 @@ HGraph::map_hgraph_param(const JsonType& hgraph_json) { HGRAPH_SUPPORT_REMOVE, {GRAPH_KEY, GRAPH_SUPPORT_REMOVE}, }, + { + HGRAPH_SUPPORT_FORCE_REMOVE, + { + SUPPORT_FORCE_REMOVE, + }, + }, { HGRAPH_REMOVE_FLAG_BIT, {GRAPH_KEY, REMOVE_FLAG_BIT}, @@ -454,6 +460,7 @@ HGraph::map_hgraph_param(const JsonType& hgraph_json) { }, "{HGRAPH_SUPPORT_DUPLICATE}": false, "{HGRAPH_SUPPORT_TOMBSTONE}": false, + "{SUPPORT_FORCE_REMOVE}": false, "{EF_CONSTRUCTION_KEY}": 400 })"; diff --git a/src/algorithm/hgraph/hgraph_parameter.cpp b/src/algorithm/hgraph/hgraph_parameter.cpp index 16c118dae..ba53b0620 100644 --- a/src/algorithm/hgraph/hgraph_parameter.cpp +++ b/src/algorithm/hgraph/hgraph_parameter.cpp @@ -131,6 +131,9 @@ HGraphParameter::FromJson(const JsonType& json) { if (json.Contains(SUPPORT_TOMBSTONE)) { this->support_tombstone = json[SUPPORT_TOMBSTONE].GetBool(); } + if (json.Contains(SUPPORT_FORCE_REMOVE)) { + this->support_force_remove = json[SUPPORT_FORCE_REMOVE].GetBool(); + } } JsonType @@ -147,6 +150,7 @@ HGraphParameter::ToJson() const { json[ALPHA_KEY].SetFloat(this->alpha); json[SUPPORT_DUPLICATE].SetBool(this->support_duplicate); json[DUPLICATE_DISTANCE_THRESHOLD].SetFloat(this->duplicate_distance_threshold); + json[SUPPORT_FORCE_REMOVE].SetBool(this->support_force_remove); json[TRAIN_SAMPLE_COUNT_KEY].SetInt(this->train_sample_count); return json; } @@ -198,6 +202,10 @@ HGraphParameter::CheckCompatibility(const ParamPtr& other) const { "HGraphParameter::CheckCompatibility: duplicate_distance_threshold must be the same"); return false; } + if (support_force_remove != hgraph_param->support_force_remove) { + logger::error("HGraphParameter::CheckCompatibility: support_force_remove must be the same"); + return false; + } return true; } diff --git a/src/algorithm/hgraph/hgraph_parameter.h b/src/algorithm/hgraph/hgraph_parameter.h index 74846357f..e8e601125 100644 --- a/src/algorithm/hgraph/hgraph_parameter.h +++ b/src/algorithm/hgraph/hgraph_parameter.h @@ -65,6 +65,7 @@ class HGraphParameter : public InnerIndexParameter { bool support_duplicate{false}; float duplicate_distance_threshold{0.0F}; bool support_tombstone{false}; + bool support_force_remove{false}; DataTypes data_type{DataTypes::DATA_TYPE_FLOAT}; diff --git a/src/algorithm/hgraph/hgraph_parameter_test.cpp b/src/algorithm/hgraph/hgraph_parameter_test.cpp index 14451d0b1..86361d9de 100644 --- a/src/algorithm/hgraph/hgraph_parameter_test.cpp +++ b/src/algorithm/hgraph/hgraph_parameter_test.cpp @@ -55,6 +55,7 @@ struct HGraphDefaultParam { bool use_attribute_filter = false; bool support_duplicate = false; float duplicate_distance_threshold = 0.0F; + bool support_force_remove = false; bool use_reorder = true; }; @@ -110,7 +111,8 @@ generate_hgraph_param(const HGraphDefaultParam& param) { "use_attribute_filter": {}, "use_reorder": {}, "support_duplicate": {}, - "duplicate_distance_threshold": {} + "duplicate_distance_threshold": {}, + "support_force_remove": {} }})"; return fmt::format(param_str, @@ -126,7 +128,8 @@ generate_hgraph_param(const HGraphDefaultParam& param) { param.use_attribute_filter, param.use_reorder, param.support_duplicate, - param.duplicate_distance_threshold); + param.duplicate_distance_threshold, + param.support_force_remove); } // clang-format off @@ -171,6 +174,8 @@ TEST_CASE("HGraph Parameters CheckCompatibility", "[ut][HGraphParameter][CheckCo 0.0F, 0.1F, false) + TEST_COMPATIBILITY_CASE( + "different support force remove", support_force_remove, true, false, false) } // clang-format on @@ -256,3 +261,28 @@ TEST_CASE("HGraphSearchParameters parses brute_force_threshold", R"({"hgraph": {"ef_search": 32, "brute_force_threshold": 1.5}})")); } } + +TEST_CASE("HGraph maps support_force_remove to inner parameter", "[ut][HGraphParameter]") { + auto param = vsag::JsonType::Parse(R"({ + "base_quantization_type": "fp32", + "base_io_type": "block_memory_io", + "precise_quantization_type": "fp32", + "precise_io_type": "block_memory_io", + "graph_io_type": "block_memory_io", + "graph_storage_type": "flat", + "graph_type": "nsw", + "max_degree": 32, + "ef_construction": 100, + "support_force_remove": true, + "use_reorder": true + })"); + + vsag::IndexCommonParam common_param; + common_param.dim_ = 128; + common_param.data_type_ = vsag::DataTypes::DATA_TYPE_FLOAT; + auto hgraph_param = vsag::HGraph::CheckAndMappingExternalParam(param, common_param); + auto typed_param = std::dynamic_pointer_cast(hgraph_param); + + REQUIRE(typed_param != nullptr); + REQUIRE(typed_param->support_force_remove); +} diff --git a/src/algorithm/hgraph/hgraph_search.cpp b/src/algorithm/hgraph/hgraph_search.cpp index fb4842c2a..af58b42bd 100644 --- a/src/algorithm/hgraph/hgraph_search.cpp +++ b/src/algorithm/hgraph/hgraph_search.cpp @@ -86,7 +86,10 @@ HGraph::KnnSearch(const DatasetPtr& query, (1 <= params.ef_search) and (params.ef_search <= ef_search_threshold), fmt::format("ef_search({}) must in range[1, {}]", params.ef_search, ef_search_threshold)); - std::shared_lock force_remove_rlock(this->force_remove_mutex_); + std::shared_lock force_remove_rlock; + if (this->support_force_remove()) { + force_remove_rlock = std::shared_lock(this->force_remove_mutex_); + } std::shared_lock shared_lock(this->global_mutex_); // check k CHECK_ARGUMENT(k > 0, fmt::format("k({}) must be greater than 0", k)); @@ -400,7 +403,10 @@ HGraph::RangeSearch(const DatasetPtr& query, CHECK_ARGUMENT(limited_size != 0, fmt::format("limited_size({}) must not be equal to 0", limited_size)); - std::shared_lock force_remove_rlock(this->force_remove_mutex_); + std::shared_lock force_remove_rlock; + if (this->support_force_remove()) { + force_remove_rlock = std::shared_lock(this->force_remove_mutex_); + } std::shared_lock shared_lock(this->global_mutex_); InnerSearchParam search_param; @@ -517,7 +523,10 @@ HGraph::SearchWithRequest(const SearchRequest& request) const { (1 <= params.ef_search) and (params.ef_search <= ef_search_threshold), fmt::format("ef_search({}) must in range[1, {}]", params.ef_search, ef_search_threshold)); - std::shared_lock force_remove_rlock(this->force_remove_mutex_); + std::shared_lock force_remove_rlock; + if (this->support_force_remove()) { + force_remove_rlock = std::shared_lock(this->force_remove_mutex_); + } std::shared_lock shared_lock(this->global_mutex_); // check k diff --git a/src/constants.cpp b/src/constants.cpp index ca67a0ece..cf39ad53d 100644 --- a/src/constants.cpp +++ b/src/constants.cpp @@ -149,6 +149,7 @@ const char* const RABITQ_USE_FHT = "rabitq_use_fht"; const char* const INDEX_TQ_CHAIN = "tq_chain"; const char* const HGRAPH_SUPPORT_REMOVE = "support_remove"; +const char* const HGRAPH_SUPPORT_FORCE_REMOVE = "support_force_remove"; const char* const HGRAPH_REMOVE_FLAG_BIT = "remove_flag_bit"; const char* const HGRAPH_USE_REORDER = USE_REORDER_KEY; const char* const HGRAPH_REORDER_SOURCE = "reorder_source"; diff --git a/src/inner_string_params.h b/src/inner_string_params.h index 282b0e1ce..f04342055 100644 --- a/src/inner_string_params.h +++ b/src/inner_string_params.h @@ -172,6 +172,7 @@ const char* const HOLD_MOLDS = "hold_molds"; const char* const SUPPORT_DUPLICATE = "support_duplicate"; const char* const DUPLICATE_DISTANCE_THRESHOLD = "duplicate_distance_threshold"; const char* const SUPPORT_TOMBSTONE = "support_tombstone"; +const char* const SUPPORT_FORCE_REMOVE = "support_force_remove"; const char* const SUPPORT_AUTOTUNE = "support_autotune"; const char* const DATACELL_OFFSETS = "datacell_offsets"; @@ -281,6 +282,7 @@ const std::unordered_map DEFAULT_MAP = { {"RABITQ_QUANTIZATION_ERROR_RATE_KEY", RABITQ_QUANTIZATION_ERROR_RATE_KEY}, {"TQ_CHAIN_KEY", TQ_CHAIN_KEY}, {"NO_BUILD_LEVELS", NO_BUILD_LEVELS}, - {"GRAPH_TYPE_KEY", GRAPH_TYPE_KEY}}; + {"GRAPH_TYPE_KEY", GRAPH_TYPE_KEY}, + {"SUPPORT_FORCE_REMOVE", SUPPORT_FORCE_REMOVE}}; } // namespace vsag diff --git a/tests/test_hgraph_remove.cpp b/tests/test_hgraph_remove.cpp index 354d0a66c..190e31005 100644 --- a/tests/test_hgraph_remove.cpp +++ b/tests/test_hgraph_remove.cpp @@ -35,7 +35,7 @@ constexpr int64_t EF_SEARCH = 20; constexpr int64_t THREAD_COUNT = 4; vsag::IndexPtr -CreateHGraphIndex() { +CreateHGraphIndex(bool support_force_remove = true) { auto origin_size = vsag::Options::Instance().block_size_limit(); vsag::Options::Instance().set_block_size_limit(1024 * 1024 * 2); @@ -45,6 +45,7 @@ CreateHGraphIndex() { index_param["ef_construction"] = EF_CONSTRUCTION; index_param["build_thread_count"] = 0; index_param["use_reverse_edges"] = true; + index_param["support_force_remove"] = support_force_remove; nlohmann::json param; param["dtype"] = "float32"; @@ -415,3 +416,25 @@ TEST_CASE("HGraph Batch ForceRemove", "[ft][hgraph]") { REQUIRE(remove_result.value() == remove_ids.size()); REQUIRE(index->GetNumElements() == NUM_ELEMENTS - remove_ids.size()); } + +TEST_CASE("HGraph ForceRemove Requires support_force_remove", "[ft][hgraph]") { + fixtures::logger::LoggerReplacer _; + + auto index = CreateHGraphIndex(false); + + int64_t id = 1; + std::vector vector(DIM, 0.5F); + auto dataset = vsag::Dataset::Make(); + dataset->Dim(DIM)->NumElements(1)->Ids(&id)->Float32Vectors(vector.data())->Owner(false); + + auto add_result = index->Add(dataset); + REQUIRE(add_result.has_value()); + + auto force_remove_result = index->Remove(id, vsag::RemoveMode::FORCE_REMOVE); + REQUIRE_FALSE(force_remove_result.has_value()); + REQUIRE(force_remove_result.error().type == vsag::ErrorType::INVALID_ARGUMENT); + + auto mark_remove_result = index->Remove(id, vsag::RemoveMode::MARK_REMOVE); + REQUIRE(mark_remove_result.has_value()); + REQUIRE(mark_remove_result.value() == 1); +}