|
| 1 | +// Copyright (C) 2019-2026 Zilliz. All rights reserved. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this |
| 4 | +// file except in compliance with the License. You may obtain a copy of the License at |
| 5 | +// |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +// |
| 8 | +// Unless required by applicable law or agreed to in writing, software distributed under |
| 9 | +// the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF |
| 10 | +// ANY KIND, either express or implied. See the License for the specific language |
| 11 | +// governing permissions and limitations under the License. |
| 12 | + |
| 13 | +#include "index/faiss/faiss_dispatch.h" |
| 14 | + |
| 15 | +#include <faiss/AutoTune.h> |
| 16 | +#include <faiss/Index.h> |
| 17 | +#include <faiss/IndexBinary.h> |
| 18 | +#include <faiss/cppcontrib/knowhere/SearchParamsDispatch.h> |
| 19 | +#include <faiss/impl/FaissException.h> |
| 20 | +#include <faiss/impl/IDSelector.h> |
| 21 | + |
| 22 | +namespace knowhere::faiss_vanilla { |
| 23 | + |
| 24 | +namespace { |
| 25 | + |
| 26 | +// Coerce a json value into a double for faiss consumption. Accepts: |
| 27 | +// - numbers: e.g. 16, 16.0 -> 16.0 |
| 28 | +// - booleans: true / false -> 1.0 / 0.0 |
| 29 | +// - stringified numbers: "16" -> 16.0 |
| 30 | +// - stringified booleans: "true" -> 1.0 |
| 31 | +// Rejects arrays, objects, null, and unparseable strings. Matches the spirit of |
| 32 | +// knowhere::Config::FormatAndCheck's string-to-typed coercion for declared fields, |
| 33 | +// so forwarded keys behave consistently with native Knowhere keys. |
| 34 | +Status |
| 35 | +coerce_to_double(const Json& v, const std::string& key, double* out, std::string* err_msg) { |
| 36 | + if (v.is_number()) { |
| 37 | + *out = v.get<double>(); |
| 38 | + return Status::success; |
| 39 | + } |
| 40 | + if (v.is_boolean()) { |
| 41 | + *out = v.get<bool>() ? 1.0 : 0.0; |
| 42 | + return Status::success; |
| 43 | + } |
| 44 | + if (v.is_string()) { |
| 45 | + const std::string s = v.get<std::string>(); |
| 46 | + if (s == "true") { |
| 47 | + *out = 1.0; |
| 48 | + return Status::success; |
| 49 | + } |
| 50 | + if (s == "false") { |
| 51 | + *out = 0.0; |
| 52 | + return Status::success; |
| 53 | + } |
| 54 | + try { |
| 55 | + size_t pos = 0; |
| 56 | + double parsed = std::stod(s, &pos); |
| 57 | + if (pos == s.size()) { |
| 58 | + *out = parsed; |
| 59 | + return Status::success; |
| 60 | + } |
| 61 | + } catch (const std::invalid_argument&) { |
| 62 | + } catch (const std::out_of_range&) { |
| 63 | + } |
| 64 | + } |
| 65 | + if (err_msg) { |
| 66 | + *err_msg = "faiss vanilla: param '" + key + "' expects a number or boolean; got " + v.dump(); |
| 67 | + } |
| 68 | + return Status::invalid_args; |
| 69 | +} |
| 70 | + |
| 71 | +// Apply every key in raw_params to the faiss index. raw_params has already been |
| 72 | +// filtered by FaissConfig::CaptureRawJson to exclude keys owned by Knowhere's own |
| 73 | +// config layer (fields declared via KNOWHERE_CONFIG_DECLARE_FIELD). We pre-validate |
| 74 | +// the remaining keys against the faiss-owned whitelist (supported_build_param_names |
| 75 | +// + "quantizer_*" prefix handling) before calling ParameterSpace. A key that fails |
| 76 | +// the whitelist (typo, non-faiss param) is rejected with a clear error; a key that |
| 77 | +// passes the whitelist but is incompatible with the concrete index type (e.g. |
| 78 | +// nprobe on an HNSW) is still caught by ParameterSpace's exception and surfaced |
| 79 | +// as invalid_args. |
| 80 | +template <typename IndexT> |
| 81 | +Status |
| 82 | +apply_impl(IndexT* index, const Json& raw_params, std::string* err_msg) { |
| 83 | + ::faiss::ParameterSpace ps; |
| 84 | + for (auto it = raw_params.begin(); it != raw_params.end(); ++it) { |
| 85 | + const std::string& key = it.key(); |
| 86 | + if (!::faiss::cppcontrib::knowhere::is_supported_build_param(key)) { |
| 87 | + if (err_msg) { |
| 88 | + *err_msg = "faiss vanilla: build param '" + key + "' is not recognized"; |
| 89 | + } |
| 90 | + return Status::invalid_args; |
| 91 | + } |
| 92 | + double val = 0.0; |
| 93 | + auto cst = coerce_to_double(it.value(), key, &val, err_msg); |
| 94 | + if (cst != Status::success) { |
| 95 | + return cst; |
| 96 | + } |
| 97 | + try { |
| 98 | + ps.set_index_parameter(index, key, val); |
| 99 | + } catch (const ::faiss::FaissException& e) { |
| 100 | + if (err_msg) { |
| 101 | + *err_msg = std::string("faiss rejected param '") + key + "': " + e.what(); |
| 102 | + } |
| 103 | + return Status::invalid_args; |
| 104 | + } |
| 105 | + } |
| 106 | + return Status::success; |
| 107 | +} |
| 108 | + |
| 109 | +// Shared logic for search-param builders. `index` can be faiss::Index* or IndexBinary*. |
| 110 | +// raw_params has already been filtered by FaissConfig::CaptureRawJson to contain only |
| 111 | +// keys NOT declared by Knowhere's typed config. Uses the faiss-owned whitelist |
| 112 | +// (supported_search_params) to validate remaining keys, and delegates both the |
| 113 | +// SearchParameters-family selection and the per-name field set to the upstream |
| 114 | +// helper. Knowhere layer only adds: (1) sel attach, (2) JSON->double conversion, |
| 115 | +// (3) clear error wording. |
| 116 | +template <typename IndexT> |
| 117 | +Status |
| 118 | +build_search_params_impl(const IndexT* index, const Json& raw_params, ::faiss::IDSelector* sel, |
| 119 | + std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) { |
| 120 | + auto params = ::faiss::cppcontrib::knowhere::make_search_params(index); |
| 121 | + params->sel = sel; |
| 122 | + |
| 123 | + const auto supported = ::faiss::cppcontrib::knowhere::supported_search_params(index); |
| 124 | + for (auto it = raw_params.begin(); it != raw_params.end(); ++it) { |
| 125 | + const std::string& key = it.key(); |
| 126 | + if (!supported.count(key)) { |
| 127 | + if (err_msg) { |
| 128 | + *err_msg = "faiss vanilla: search param '" + key + "' not supported for this index family"; |
| 129 | + } |
| 130 | + return Status::invalid_args; |
| 131 | + } |
| 132 | + double val = 0.0; |
| 133 | + auto cst = coerce_to_double(it.value(), key, &val, err_msg); |
| 134 | + if (cst != Status::success) { |
| 135 | + return cst; |
| 136 | + } |
| 137 | + // Whitelist already guarantees try_set_search_param returns true; treat a |
| 138 | + // false here as an invariant breach rather than user error. |
| 139 | + (void)::faiss::cppcontrib::knowhere::try_set_search_param(params.get(), key, val); |
| 140 | + } |
| 141 | + *out = std::move(params); |
| 142 | + return Status::success; |
| 143 | +} |
| 144 | + |
| 145 | +} // namespace |
| 146 | + |
| 147 | +Status |
| 148 | +apply_build_params(::faiss::Index* index, const Json& raw_params, std::string* err_msg) { |
| 149 | + return apply_impl(index, raw_params, err_msg); |
| 150 | +} |
| 151 | + |
| 152 | +Status |
| 153 | +apply_build_params(::faiss::IndexBinary* index, const Json& raw_params, std::string* err_msg) { |
| 154 | + return apply_impl(index, raw_params, err_msg); |
| 155 | +} |
| 156 | + |
| 157 | +Status |
| 158 | +build_search_params(const ::faiss::Index* index, const Json& raw_params, ::faiss::IDSelector* sel, |
| 159 | + std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) { |
| 160 | + return build_search_params_impl(index, raw_params, sel, out, err_msg); |
| 161 | +} |
| 162 | + |
| 163 | +Status |
| 164 | +build_search_params(const ::faiss::IndexBinary* index, const Json& raw_params, ::faiss::IDSelector* sel, |
| 165 | + std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) { |
| 166 | + // IndexBinaryIVF requires SearchParametersIVF; binary side also does not honor |
| 167 | + // IDSelector, so attaching sel here is typically a no-op at search time. |
| 168 | + return build_search_params_impl(index, raw_params, sel, out, err_msg); |
| 169 | +} |
| 170 | + |
| 171 | +} // namespace knowhere::faiss_vanilla |
0 commit comments