Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions ci/pod/e2e-arm-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ spec:
args: ["cat"]
resources:
requests:
memory: "12Gi"
cpu: "3"
memory: "16Gi"
cpu: "4"
limits:
memory: "12Gi"
cpu: "3"
memory: "16Gi"
cpu: "4"
volumeMounts:
- mountPath: /home/data
name: db-data
Expand Down
41 changes: 35 additions & 6 deletions cmake/libs/libfaiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,31 @@ knowhere_file_glob(
FAISS_AVX512_SRCS
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/*avx512.cpp
)
# AVX512 vanilla Faiss dynamic dispatch related files
# AVX512 vanilla Faiss dynamic dispatch related files. Baseline
# sq-avx512.cpp is replaced by a knowhere-local prelude file that declares
# a fast DCTemplate specialization for QT_4bit_uniform + L2 and then
# textually #includes the baseline sq-avx512.cpp — see
# cppcontrib/knowhere/impl/sq-avx512-fastpath.cpp for the full design note.
knowhere_file_glob(
GLOB
FAISS_DD_AVX512_SRCS
thirdparty/faiss/faiss/impl/fast_scan/impl-avx512.cpp
thirdparty/faiss/faiss/impl/hnsw/avx512.cpp
thirdparty/faiss/faiss/impl/pq_code_distance/pq_code_distance-avx512.cpp
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/sq-avx512-fastpath.cpp
thirdparty/faiss/faiss/utils/distances_fused/avx512.cpp
thirdparty/faiss/faiss/utils/simd_impl/distances_avx512.cpp
thirdparty/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp
)
# Baseline sq-avx512.cpp is pulled in textually by the prelude file, not
# compiled directly. Remove it from the generic list so it is not picked
# up as a stand-alone TU (which would duplicate symbols).
knowhere_file_glob(
GLOB
FAISS_SQ_AVX512_EXCLUDE
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp
)
list(REMOVE_ITEM FAISS_SRCS ${FAISS_SQ_AVX512_EXCLUDE})
# combine files
list(APPEND FAISS_AVX512_SRCS ${FAISS_DD_AVX512_SRCS})
# remove platform files from general files
Expand All @@ -61,19 +74,27 @@ knowhere_file_glob(
FAISS_AVX2_SRCS
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/*avx.cpp
)
# AVX2 vanilla Faiss dynamic dispatch related files
# AVX2 vanilla Faiss dynamic dispatch related files. sq-avx2.cpp is
# textually wrapped by sq-avx2-fastpath.cpp (see design note there).
knowhere_file_glob(
GLOB
FAISS_DD_AVX2_SRCS
thirdparty/faiss/faiss/impl/approx_topk/avx2.cpp
thirdparty/faiss/faiss/impl/fast_scan/impl-avx2.cpp
thirdparty/faiss/faiss/impl/hnsw/avx2.cpp
thirdparty/faiss/faiss/impl/pq_code_distance/pq_code_distance-avx2.cpp
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/sq-avx2-fastpath.cpp
thirdparty/faiss/faiss/utils/distances_fused/simdlib_based.cpp
thirdparty/faiss/faiss/utils/simd_impl/distances_avx2.cpp
thirdparty/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp
thirdparty/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp
)
knowhere_file_glob(
GLOB
FAISS_SQ_AVX2_EXCLUDE
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp
)
list(REMOVE_ITEM FAISS_SRCS ${FAISS_SQ_AVX2_EXCLUDE})
# combine files
list(APPEND FAISS_AVX2_SRCS ${FAISS_DD_AVX2_SRCS})
# remove platform files from general files
Expand Down Expand Up @@ -104,17 +125,25 @@ knowhere_file_glob(
FAISS_NEON_SRCS
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/*neon.cpp
)
# NEON vanilla Faiss dynamic dispatch related files
# NEON vanilla Faiss dynamic dispatch related files. sq-neon.cpp is
# textually wrapped by sq-neon-fastpath.cpp (see design note there).
knowhere_file_glob(
GLOB
FAISS_DD_NEON_SRCS
thirdparty/faiss/faiss/impl/approx_topk/neon.cpp
thirdparty/faiss/faiss/impl/fast_scan/impl-neon.cpp
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp
thirdparty/faiss/faiss/cppcontrib/knowhere/impl/sq-neon-fastpath.cpp
thirdparty/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp
thirdparty/faiss/faiss/utils/simd_impl/distances_aarch64.cpp
thirdparty/faiss/faiss/utils/simd_impl/partitioning_neon.cpp
thirdparty/faiss/faiss/utils/simd_impl/rabitq_neon.cpp
)
knowhere_file_glob(
GLOB
FAISS_SQ_NEON_EXCLUDE
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp
)
list(REMOVE_ITEM FAISS_SRCS ${FAISS_SQ_NEON_EXCLUDE})
# combine files
list(APPEND FAISS_NEON_SRCS ${FAISS_DD_NEON_SRCS})
# remove platform files from general files
Expand Down
19 changes: 8 additions & 11 deletions src/index/data_view_dense_index/refine_computer.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// knowhere-specific indices
#pragma once

#include "faiss/cppcontrib/knowhere/impl/ScalarQuantizer.h"
#include "faiss/cppcontrib/knowhere/invlists/InvertedLists.h"
#include "faiss/impl/DistanceComputer.h"
#include "faiss/impl/ScalarQuantizer.h"
#include "knowhere/comp/index_param.h"
#include "knowhere/object.h"
#include "knowhere/operands.h"
Expand Down Expand Up @@ -63,16 +63,13 @@ struct QuantRefine {
}
switch (refine_type) {
case RefineType::UINT8_QUANT:
quantizer = new faiss::cppcontrib::knowhere::ScalarQuantizer(
d, faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_8bit);
quantizer = new faiss::ScalarQuantizer(d, faiss::ScalarQuantizer::QuantizerType::QT_8bit);
break;
case RefineType::BFLOAT16_QUANT:
quantizer = new faiss::cppcontrib::knowhere::ScalarQuantizer(
d, faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_bf16);
quantizer = new faiss::ScalarQuantizer(d, faiss::ScalarQuantizer::QuantizerType::QT_bf16);
break;
case RefineType::FLOAT16_QUANT:
quantizer = new faiss::cppcontrib::knowhere::ScalarQuantizer(
d, faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_fp16);
quantizer = new faiss::ScalarQuantizer(d, faiss::ScalarQuantizer::QuantizerType::QT_fp16);
break;
default:
throw std::runtime_error("Fail to generate quant for refiner if refine_type == RefineType::DATA_VIEW");
Expand Down Expand Up @@ -118,9 +115,9 @@ struct QuantRefine {
GetMetric() {
return metric_type;
}
std::unique_ptr<faiss::cppcontrib::knowhere::ScalarQuantizer::SQDistanceComputer>
std::unique_ptr<faiss::ScalarQuantizer::SQDistanceComputer>
GetQuantComputer() {
return std::unique_ptr<faiss::cppcontrib::knowhere::ScalarQuantizer::SQDistanceComputer>(
return std::unique_ptr<faiss::ScalarQuantizer::SQDistanceComputer>(
quantizer->get_distance_computer(metric_type));
}
DataFormatEnum
Expand All @@ -141,7 +138,7 @@ struct QuantRefine {
static constexpr size_t key = 0;
static constexpr size_t list_num = 1;
static constexpr size_t segment_size = 48;
faiss::cppcontrib::knowhere::ScalarQuantizer* quantizer = nullptr;
faiss::ScalarQuantizer* quantizer = nullptr;
faiss::cppcontrib::knowhere::InvertedLists* storage = nullptr;
faiss::MetricType metric_type;
DataFormatEnum origin_data_type;
Expand All @@ -153,7 +150,7 @@ template <bool NeedNormalize = false>
struct QuantDataDistanceComputer : faiss::DistanceComputer {
std::vector<float> query_buf;
std::shared_ptr<QuantRefine> quant_data;
std::unique_ptr<faiss::cppcontrib::knowhere::ScalarQuantizer::SQDistanceComputer> qc;
std::unique_ptr<faiss::ScalarQuantizer::SQDistanceComputer> qc;
float q_norm;
size_t dim;

Expand Down
77 changes: 45 additions & 32 deletions src/index/hnsw/faiss_hnsw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#include <faiss/cppcontrib/knowhere/IndexBinaryScalarQuantizer.h>
#include <faiss/cppcontrib/knowhere/IndexCosine.h>
#include <faiss/cppcontrib/knowhere/IndexFlat.h>
#include <faiss/cppcontrib/knowhere/IndexHNSWBinary.h>
#include <faiss/cppcontrib/knowhere/IndexSQ4Uniform.h>
#include <faiss/cppcontrib/knowhere/MetricType.h>
#include <faiss/cppcontrib/knowhere/impl/CountSizeIOWriter.h>
Expand All @@ -32,7 +34,6 @@
#include "common/metric.h"
#include "faiss/cppcontrib/knowhere/IndexHNSW.h"
#include "faiss/cppcontrib/knowhere/IndexRefine.h"
#include "faiss/cppcontrib/knowhere/impl/ScalarQuantizer.h"
#include "faiss/cppcontrib/knowhere/index_io.h"
#include "faiss/impl/mapped_io.h"
#include "index/clustering_config.h"
Expand Down Expand Up @@ -546,10 +547,10 @@ convert_rows_to_fp32(const void* const __restrict src_in, float* const __restric
// where each query_row has ((dim + 7) / 8) * 8 bits, and the total is nrows * ((dim + 7) / 8) * 8 bits.
// But the final format required is nrows * dim * 32 bits (float).
// There are actually two conversions happening here:
// 1. Each uint8_t value must be converted to float (in `BinarySQDistanceComputerWrapper::set_query`
// and `ScalarQuantizer::compute_codes`), it will be converted back to uint8_t). [same as int8]
// 1. Each uint8_t value must be converted to float (in `BinaryFlatCodesDC::set_query` inside
// IndexBinaryScalarQuantizer, it will be converted back to uint8_t). [same as int8]
// 2. Each row must occupy dim * 32 bits of space, even if not all bits are filled;
// this is required by the convention set in `ScalarQuantizer::compute_codes`.
// this is required by the convention set by IndexBinaryScalarQuantizer::sa_encode.
const knowhere::bin1* const src = reinterpret_cast<const knowhere::bin1*>(src_in);
auto uint8_dim = (dim + 7) / 8;
for (size_t i = 0; i < nrows; i++) {
Expand Down Expand Up @@ -711,20 +712,26 @@ get_index_data_format(const faiss::Index* index) {
return DataFormatEnum::fp32;
}

// is it sq?
// note: IndexScalarQuantizerCosine preserves the original data, no cosine norm is appliesd
auto index_sq = dynamic_cast<const faiss::cppcontrib::knowhere::IndexScalarQuantizer*>(index);
if (index_sq != nullptr) {
if (index_sq->sq.qtype == faiss::cppcontrib::knowhere::ScalarQuantizer::QT_bf16) {
return DataFormatEnum::bf16;
} else if (index_sq->sq.qtype == faiss::cppcontrib::knowhere::ScalarQuantizer::QT_fp16) {
return DataFormatEnum::fp16;
} else if (index_sq->sq.qtype == faiss::cppcontrib::knowhere::ScalarQuantizer::QT_8bit_direct_signed) {
return DataFormatEnum::int8;
} else if (index_sq->sq.qtype == faiss::cppcontrib::knowhere::ScalarQuantizer::QT_1bit_direct) {
return DataFormatEnum::bin1;
} else {
return std::nullopt;
// is it binary (1-bit-direct)? Routed through
// IndexBinaryScalarQuantizer, which replaces the legacy
// IndexScalarQuantizer(QT_1bit_direct) path.
if (dynamic_cast<const faiss::cppcontrib::knowhere::IndexBinaryScalarQuantizer*>(index) != nullptr) {
return DataFormatEnum::bin1;
}

// is it sq? All SQ storage produced by knowhere now inherits from
// baseline faiss::IndexScalarQuantizer (Cosine/SQ4U wrappers,
// plain IndexHNSWSQ, and refine).
if (auto* index_sq = dynamic_cast<const faiss::IndexScalarQuantizer*>(index)) {
switch (index_sq->sq.qtype) {
case faiss::ScalarQuantizer::QT_bf16:
return DataFormatEnum::bf16;
case faiss::ScalarQuantizer::QT_fp16:
return DataFormatEnum::fp16;
case faiss::ScalarQuantizer::QT_8bit_direct_signed:
return DataFormatEnum::int8;
default:
return std::nullopt;
}
}

Expand Down Expand Up @@ -2068,9 +2075,8 @@ class BaseFaissRegularIndexHNSWFlatNode : public BaseFaissRegularIndexHNSWNode {
if (is_binary) {
if (metric.value() == faiss::MetricType::METRIC_Hamming ||
metric.value() == faiss::MetricType::METRIC_Jaccard) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQ>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_1bit_direct, hnsw_cfg.M.value(),
metric.value());
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWBinary>(dim, hnsw_cfg.M.value(),
metric.value());
} else {
LOG_KNOWHERE_ERROR_ << "Unsupported metric for binary data: " << hnsw_cfg.metric_type.value();
return Status::invalid_metric_type;
Expand All @@ -2082,14 +2088,13 @@ class BaseFaissRegularIndexHNSWFlatNode : public BaseFaissRegularIndexHNSWNode {
std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWFlatCosine>(dim, hnsw_cfg.M.value());
} else if (data_format == DataFormatEnum::fp16) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQCosine>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_fp16, hnsw_cfg.M.value());
dim, faiss::ScalarQuantizer::QT_fp16, hnsw_cfg.M.value());
} else if (data_format == DataFormatEnum::bf16) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQCosine>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_bf16, hnsw_cfg.M.value());
dim, faiss::ScalarQuantizer::QT_bf16, hnsw_cfg.M.value());
} else if (data_format == DataFormatEnum::int8) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQCosine>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_8bit_direct_signed,
hnsw_cfg.M.value());
dim, faiss::ScalarQuantizer::QT_8bit_direct_signed, hnsw_cfg.M.value());
} else {
LOG_KNOWHERE_ERROR_ << "Unsupported metric type: " << hnsw_cfg.metric_type.value();
return Status::invalid_metric_type;
Expand All @@ -2100,16 +2105,13 @@ class BaseFaissRegularIndexHNSWFlatNode : public BaseFaissRegularIndexHNSWNode {
dim, hnsw_cfg.M.value(), metric.value());
} else if (data_format == DataFormatEnum::fp16) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQ>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_fp16, hnsw_cfg.M.value(),
metric.value());
dim, faiss::ScalarQuantizer::QT_fp16, hnsw_cfg.M.value(), metric.value());
} else if (data_format == DataFormatEnum::bf16) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQ>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_bf16, hnsw_cfg.M.value(),
metric.value());
dim, faiss::ScalarQuantizer::QT_bf16, hnsw_cfg.M.value(), metric.value());
} else if (data_format == DataFormatEnum::int8) {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQ>(
dim, faiss::cppcontrib::knowhere::ScalarQuantizer::QT_8bit_direct_signed,
hnsw_cfg.M.value(), metric.value());
dim, faiss::ScalarQuantizer::QT_8bit_direct_signed, hnsw_cfg.M.value(), metric.value());
} else {
LOG_KNOWHERE_ERROR_ << "Unsupported metric type: " << hnsw_cfg.metric_type.value();
return Status::invalid_metric_type;
Expand Down Expand Up @@ -2548,7 +2550,7 @@ class BaseFaissRegularIndexHNSWSQNode : public BaseFaissRegularIndexHNSWNode {

// create an index
const bool is_cosine = IsMetricType(hnsw_cfg.metric_type.value(), metric::COSINE);
const bool is_sq4u = sq_type.value() == faiss::cppcontrib::knowhere::ScalarQuantizer::QT_4bit_uniform;
const bool is_sq4u = sq_type.value() == faiss::ScalarQuantizer::QT_4bit_uniform;

// should refine be used?
std::unique_ptr<faiss::Index> final_index;
Expand All @@ -2570,6 +2572,17 @@ class BaseFaissRegularIndexHNSWSQNode : public BaseFaissRegularIndexHNSWNode {
} else {
hnsw_index = std::make_unique<faiss::cppcontrib::knowhere::IndexHNSWSQ>(
dim, sq_type.value(), hnsw_cfg.M.value(), metric.value());
// QT_4bit_uniform + L2 benefits from quantile-based range
// estimation. This used to be hard-coded inside the fork
// IndexScalarQuantizer ctor; moved here so that ctor is
// behaviorally equivalent to baseline.
if (is_sq4u) {
auto* idx_sq = dynamic_cast<faiss::IndexScalarQuantizer*>(hnsw_index->storage);
if (idx_sq != nullptr) {
idx_sq->sq.rangestat = faiss::ScalarQuantizer::RS_quantiles;
idx_sq->sq.rangestat_arg = 0.01;
}
}
}

hnsw_index->hnsw.efConstruction = hnsw_cfg.efConstruction.value();
Expand Down
12 changes: 6 additions & 6 deletions src/index/ivf/ivf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -507,19 +507,19 @@ to_index_flat(std::unique_ptr<faiss::cppcontrib::knowhere::IndexFlat>&& index) {
return std::make_unique<faiss::cppcontrib::knowhere::IndexFlat>(std::move(*index));
}

expected<faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType>
expected<faiss::ScalarQuantizer::QuantizerType>
get_ivf_sq_quantizer_type(int code_size) {
switch (code_size) {
case 4:
return faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_4bit;
return faiss::ScalarQuantizer::QuantizerType::QT_4bit;
case 6:
return faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_6bit;
return faiss::ScalarQuantizer::QuantizerType::QT_6bit;
case 8:
return faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_8bit;
return faiss::ScalarQuantizer::QuantizerType::QT_8bit;
case 16:
return faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_fp16;
return faiss::ScalarQuantizer::QuantizerType::QT_fp16;
default:
return expected<faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType>::Err(
return expected<faiss::ScalarQuantizer::QuantizerType>::Err(
Status::invalid_args, fmt::format("current code size {} not in (4, 6, 8, 16)", code_size));
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/index/ivf/ivf_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,15 @@ IndexIvfFactory::create_for_sq(faiss::cppcontrib::knowhere::IndexFlat* qzr_raw_p

// create IndexIVFSQ
// Index does not own qzr
faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType quantizer_type;
faiss::ScalarQuantizer::QuantizerType quantizer_type;
// ivf_sq_cfg.sq_type.value() has already been guaranteed to be legal in CheckAndAdjust
std::string quantizer_type_tolower = str_to_lower(ivf_sq_cfg.sq_type.value());
if (quantizer_type_tolower == "sq4") {
quantizer_type = faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_4bit;
quantizer_type = faiss::ScalarQuantizer::QuantizerType::QT_4bit;
} else if (quantizer_type_tolower == "sq6") {
quantizer_type = faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_6bit;
quantizer_type = faiss::ScalarQuantizer::QuantizerType::QT_6bit;
} else {
quantizer_type = faiss::cppcontrib::knowhere::ScalarQuantizer::QuantizerType::QT_8bit;
quantizer_type = faiss::ScalarQuantizer::QuantizerType::QT_8bit;
}
auto index = std::make_unique<faiss::cppcontrib::knowhere::IndexIVFScalarQuantizer>(qzr_raw_ptr, d, nlist,
quantizer_type, metric);
Expand Down
Loading