Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,7 @@ graph_info.json

# Claude Code plans (local only)
docs/plans/
docs/superpowers/

# Test artifacts (Catch2 tests write serialized indexes into CWD)
*.index
1 change: 1 addition & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ namespace IndexEnum {
constexpr const char* INVALID = "";

constexpr const char* INDEX_FAISS_BIN_IDMAP = "BIN_FLAT";
constexpr const char* INDEX_FAISS = "FAISS";
constexpr const char* INDEX_FAISS_BIN_IVFFLAT = "BIN_IVF_FLAT";

constexpr const char* INDEX_FAISS_IDMAP = "FLAT";
Expand Down
9 changes: 9 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,15 @@ class BaseConfig : public Config {
CFG_INT lemur_seed; // random seed for LEMUR
CFG_INT lemur_num_layers; // number of layers in feature_extractor
CFG_BOOL emb_list_rerank; // whether to perform MaxSim reranking after ANN search

/// Optional hook: runs after FormatAndCheck and before Config::Load consumes typed
/// fields. Used by FaissConfig to capture the raw JSON verbatim for pass-through to
/// faiss's ParameterSpace. Default is a no-op; do NOT override unless you need raw
/// JSON (most configs should rely on KNOWHERE_CONFIG_DECLARE_FIELD).
virtual void
CaptureRawJson(const Json& /*json*/) {
}

KNOWHERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(dim).allow_empty_without_default().description("vector dim").for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type)
Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/index/index_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ static std::set<std::pair<std::string, VecType>> legal_knowhere_index = {
{IndexEnum::INDEX_FAISS_IDMAP, VecType::VECTOR_BFLOAT16},
// {IndexEnum::INDEX_FAISS_IDMAP, VecType::VECTOR_INT8},

{IndexEnum::INDEX_FAISS, VecType::VECTOR_FLOAT},
{IndexEnum::INDEX_FAISS, VecType::VECTOR_BINARY},

{IndexEnum::INDEX_FAISS_IVFFLAT, VecType::VECTOR_FLOAT},
{IndexEnum::INDEX_FAISS_IVFFLAT, VecType::VECTOR_FLOAT16},
{IndexEnum::INDEX_FAISS_IVFFLAT, VecType::VECTOR_BFLOAT16},
Expand Down
431 changes: 431 additions & 0 deletions src/index/faiss/faiss.cc

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions src/index/faiss/faiss_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (C) 2019-2026 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under
// the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
// ANY KIND, either express or implied. See the License for the specific language
// governing permissions and limitations under the License.

#pragma once

#include "knowhere/config.h"

namespace knowhere {

class FaissConfig : public BaseConfig {
public:
// Required. faiss DSL understood by faiss::index_factory (fp32) or
// faiss::index_binary_factory (bin1). Examples: "Flat", "IVF1024,PQ16x8",
// "HNSW32,Flat", "BIVF256,Hamming".
CFG_STRING faiss_index_name;

// Captured subset of the incoming JSON: only keys that this config's __DICT__
// does NOT declare (i.e. not owned by Knowhere's native config layer). Those are
// the keys the vanilla faiss adapter forwards to faiss::ParameterSpace
// (build) and per-family SearchParametersXxx (search). Declared keys (k,
// metric_type, trace_id, faiss_index_name, ...) are consumed by Config::Load
// into typed fields and therefore filtered out of raw_params at capture time.
Json raw_params;

KNOWHERE_DECLARE_CONFIG(FaissConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(faiss_index_name)
.description("faiss factory string, e.g. \"IVF1024,PQ16x8\"")
.allow_empty_without_default()
.for_train()
.for_deserialize()
.for_deserialize_from_file();
}

void
CaptureRawJson(const Json& json) override {
raw_params = Json::object();
for (auto it = json.begin(); it != json.end(); ++it) {
// Skip any key already declared as a typed field on BaseConfig or
// FaissConfig — those are Knowhere's own and will be consumed by
// Config::Load. Everything else is a faiss-bound knob we forward.
if (__DICT__.count(it.key()) == 0) {
raw_params[it.key()] = it.value();
}
}
}
};

} // namespace knowhere
171 changes: 171 additions & 0 deletions src/index/faiss/faiss_dispatch.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// Copyright (C) 2019-2026 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under
// the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
// ANY KIND, either express or implied. See the License for the specific language
// governing permissions and limitations under the License.

#include "index/faiss/faiss_dispatch.h"

#include <faiss/AutoTune.h>
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>
#include <faiss/cppcontrib/knowhere/SearchParamsDispatch.h>
#include <faiss/impl/FaissException.h>
#include <faiss/impl/IDSelector.h>

namespace knowhere::faiss_vanilla {

namespace {

// Coerce a json value into a double for faiss consumption. Accepts:
// - numbers: e.g. 16, 16.0 -> 16.0
// - booleans: true / false -> 1.0 / 0.0
// - stringified numbers: "16" -> 16.0
// - stringified booleans: "true" -> 1.0
// Rejects arrays, objects, null, and unparseable strings. Matches the spirit of
// knowhere::Config::FormatAndCheck's string-to-typed coercion for declared fields,
// so forwarded keys behave consistently with native Knowhere keys.
Status
coerce_to_double(const Json& v, const std::string& key, double* out, std::string* err_msg) {
if (v.is_number()) {
*out = v.get<double>();
return Status::success;
}
if (v.is_boolean()) {
*out = v.get<bool>() ? 1.0 : 0.0;
return Status::success;
}
if (v.is_string()) {
const std::string s = v.get<std::string>();
if (s == "true") {
*out = 1.0;
return Status::success;
}
if (s == "false") {
*out = 0.0;
return Status::success;
}
try {
size_t pos = 0;
double parsed = std::stod(s, &pos);
if (pos == s.size()) {
*out = parsed;
return Status::success;
}
} catch (const std::invalid_argument&) {
} catch (const std::out_of_range&) {
}
}
if (err_msg) {
*err_msg = "faiss vanilla: param '" + key + "' expects a number or boolean; got " + v.dump();
}
return Status::invalid_args;
}

// Apply every key in raw_params to the faiss index. raw_params has already been
// filtered by FaissConfig::CaptureRawJson to exclude keys owned by Knowhere's own
// config layer (fields declared via KNOWHERE_CONFIG_DECLARE_FIELD). We pre-validate
// the remaining keys against the faiss-owned whitelist (supported_build_param_names
// + "quantizer_*" prefix handling) before calling ParameterSpace. A key that fails
// the whitelist (typo, non-faiss param) is rejected with a clear error; a key that
// passes the whitelist but is incompatible with the concrete index type (e.g.
// nprobe on an HNSW) is still caught by ParameterSpace's exception and surfaced
// as invalid_args.
template <typename IndexT>
Status
apply_impl(IndexT* index, const Json& raw_params, std::string* err_msg) {
::faiss::ParameterSpace ps;
for (auto it = raw_params.begin(); it != raw_params.end(); ++it) {
const std::string& key = it.key();
if (!::faiss::cppcontrib::knowhere::is_supported_build_param(key)) {
if (err_msg) {
*err_msg = "faiss vanilla: build param '" + key + "' is not recognized";
}
return Status::invalid_args;
}
double val = 0.0;
auto cst = coerce_to_double(it.value(), key, &val, err_msg);
if (cst != Status::success) {
return cst;
}
try {
ps.set_index_parameter(index, key, val);
} catch (const ::faiss::FaissException& e) {
if (err_msg) {
*err_msg = std::string("faiss rejected param '") + key + "': " + e.what();
}
return Status::invalid_args;
}
}
return Status::success;
}

// Shared logic for search-param builders. `index` can be faiss::Index* or IndexBinary*.
// raw_params has already been filtered by FaissConfig::CaptureRawJson to contain only
// keys NOT declared by Knowhere's typed config. Uses the faiss-owned whitelist
// (supported_search_params) to validate remaining keys, and delegates both the
// SearchParameters-family selection and the per-name field set to the upstream
// helper. Knowhere layer only adds: (1) sel attach, (2) JSON->double conversion,
// (3) clear error wording.
template <typename IndexT>
Status
build_search_params_impl(const IndexT* index, const Json& raw_params, ::faiss::IDSelector* sel,
std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) {
auto params = ::faiss::cppcontrib::knowhere::make_search_params(index);
params->sel = sel;

const auto supported = ::faiss::cppcontrib::knowhere::supported_search_params(index);
for (auto it = raw_params.begin(); it != raw_params.end(); ++it) {
const std::string& key = it.key();
if (!supported.count(key)) {
if (err_msg) {
*err_msg = "faiss vanilla: search param '" + key + "' not supported for this index family";
}
return Status::invalid_args;
}
double val = 0.0;
auto cst = coerce_to_double(it.value(), key, &val, err_msg);
if (cst != Status::success) {
return cst;
}
// Whitelist already guarantees try_set_search_param returns true; treat a
// false here as an invariant breach rather than user error.
(void)::faiss::cppcontrib::knowhere::try_set_search_param(params.get(), key, val);
}
*out = std::move(params);
return Status::success;
}

} // namespace

Status
apply_build_params(::faiss::Index* index, const Json& raw_params, std::string* err_msg) {
return apply_impl(index, raw_params, err_msg);
}

Status
apply_build_params(::faiss::IndexBinary* index, const Json& raw_params, std::string* err_msg) {
return apply_impl(index, raw_params, err_msg);
}

Status
build_search_params(const ::faiss::Index* index, const Json& raw_params, ::faiss::IDSelector* sel,
std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) {
return build_search_params_impl(index, raw_params, sel, out, err_msg);
}

Status
build_search_params(const ::faiss::IndexBinary* index, const Json& raw_params, ::faiss::IDSelector* sel,
std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg) {
// IndexBinaryIVF requires SearchParametersIVF; binary side also does not honor
// IDSelector, so attaching sel here is typically a no-op at search time.
return build_search_params_impl(index, raw_params, sel, out, err_msg);
}

} // namespace knowhere::faiss_vanilla
50 changes: 50 additions & 0 deletions src/index/faiss/faiss_dispatch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (C) 2019-2026 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
// file except in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under
// the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
// ANY KIND, either express or implied. See the License for the specific language
// governing permissions and limitations under the License.

#pragma once

#include <memory>
#include <string>

#include "knowhere/config.h"

namespace faiss {
struct Index;
struct IndexBinary;
struct IDSelector;
struct SearchParameters;
} // namespace faiss

namespace knowhere::faiss_vanilla {

// Forwards keys from raw_params to faiss::ParameterSpace::set_index_parameter
// on the given index. Converts faiss exceptions into Status::invalid_args with the
// faiss message in *err_msg.
Status
apply_build_params(::faiss::Index* index, const Json& raw_params, std::string* err_msg);

Status
apply_build_params(::faiss::IndexBinary* index, const Json& raw_params, std::string* err_msg);

// Build a per-request SearchParameters* appropriate for the concrete faiss index
// family. The family dispatch itself lives in faiss::cppcontrib::knowhere (upstream-
// bound helper); this wrapper adds: (1) sel assignment, (2) framework-key filtering,
// (3) JSON value extraction + unknown-key error surfacing.
Status
build_search_params(const ::faiss::Index* index, const Json& raw_params, ::faiss::IDSelector* sel,
std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg);

Status
build_search_params(const ::faiss::IndexBinary* index, const Json& raw_params, ::faiss::IDSelector* sel,
std::unique_ptr<::faiss::SearchParameters>* out, std::string* err_msg);

} // namespace knowhere::faiss_vanilla
1 change: 1 addition & 0 deletions src/index/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ LoadConfig(BaseConfig* cfg, const Json& json, knowhere::PARAM_TYPE param_type, c
auto res = Config::FormatAndCheck(*cfg, json_, msg);
LOG_KNOWHERE_DEBUG_ << method << " config dump: " << json_.dump();
RETURN_IF_ERROR(res);
cfg->CaptureRawJson(json_);
return Config::Load(*cfg, json_, param_type, msg);
}

Expand Down
Loading
Loading