Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/libs/libfaiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ knowhere_file_glob(
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp
thirdparty/faiss/faiss/utils/distances_fused/simdlib_based.cpp
thirdparty/faiss/faiss/utils/simd_impl/distances_avx2.cpp
thirdparty/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp
thirdparty/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp
)
# combine files
Expand Down Expand Up @@ -113,6 +114,7 @@ knowhere_file_glob(
thirdparty/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp
thirdparty/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp
thirdparty/faiss/faiss/utils/simd_impl/distances_aarch64.cpp
thirdparty/faiss/faiss/utils/simd_impl/partitioning_neon.cpp
thirdparty/faiss/faiss/utils/simd_impl/rabitq_neon.cpp
)
# combine files
Expand Down
48 changes: 34 additions & 14 deletions thirdparty/faiss/.github/actions/build_cmake/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,27 +100,43 @@ runs:
sudo apt-get -qq update >/dev/null
sudo apt-get -qq install -y kmod wget gpg >/dev/null

# Download, prepare, and install the package signing key
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null

- name: Add rocm repository
if: inputs.rocm == 'ON'
shell: bash
run: |
# Get UBUNTU version name
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`

# Set ROCm version
ROCM_VERSION="6.2"

# Download, prepare, and install the package signing key
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
rocm_baseurl="https://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
sudo mkdir -p /etc/apt/keyrings
wget -qO /tmp/rocm.gpg.key https://repo.radeon.com/rocm/rocm.gpg.key
echo "2de99e2354646a90d9903e2a669fc4e36b02c1bbff7075c481e12d7edab2c88b /tmp/rocm.gpg.key" | sha256sum --check

# Add rocm repository
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" | sudo tee /etc/apt/sources.list.d/rocm.list
sudo apt-get -qq update --allow-insecure-repositories >/dev/null
sudo apt-get -qq install -y --allow-unauthenticated \
"rocm-dev${ROCM_VERSION}" "rocm-utils${ROCM_VERSION}" \
"rocm-libs${ROCM_VERSION}" >/dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" | sudo tee /etc/apt/sources.list.d/rocm.list

sudo apt-get -qq update >/dev/null
sudo apt-get -qq install -y \
"rocm-dev${ROCM_VERSION}" "rocm-utils${ROCM_VERSION}" "rocm-libs${ROCM_VERSION}" >/dev/null


- name: Pin BLAS/LAPACK versions
if: inputs.rocm == 'ON'
shell: bash
run: |
conda install -y \
"libblas=3.9.0=35_*" \
"libcblas=3.9.0=35_*" \
"liblapack=3.9.0=35_*"

# Fake presence of MI200-class accelerators
echo "gfx90a" | sudo tee /opt/rocm/bin/target.lst
echo "gfx942" | sudo tee /opt/rocm/bin/target.lst

# Cleanup
sudo apt-get -qq autoclean >/dev/null
Expand All @@ -135,10 +151,14 @@ runs:
sudo ln -s /lib/x86_64-linux-gnu/libc_nonshared.a /usr/lib64/libc_nonshared.a
sudo ln -s /usr/lib/x86_64-linux-gnu/libpthread.so.0 /lib64/libpthread.so.0
sudo ln -s $HOME/miniconda3/x86_64-conda-linux-gnu/sysroot/usr/lib64/libpthread_nonshared.a /usr/lib64/libpthread_nonshared.a
- name: Print GPU info
if: inputs.gpu == 'ON'
- name: Print NVIDIA GPU info
if: inputs.gpu == 'ON' && inputs.rocm != 'ON'
shell: bash
run: nvidia-smi
- name: Print AMD GPU info
if: inputs.gpu == 'ON' && inputs.rocm == 'ON'
shell: bash
run: rocm-smi
- name: Build all targets
shell: bash
run: |
Expand Down
23 changes: 23 additions & 0 deletions thirdparty/faiss/.github/workflows/build-pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,29 @@ jobs:
uses: ./.github/actions/build_cmake
with:
gpu: ON
linux-x86_64-GPU-w-ROCm-cmake:
name: Linux x86_64 GPU w/ ROCm (cmake)
needs: linux-x86_64-cmake
runs-on: linux-amd-rocm-mi325-ubuntu-24
container:
image: ubuntu:24.04
options: --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --cap-add=SYS_ADMIN
steps:
- name: Container setup
run: |
if [ -f /.dockerenv ]; then
apt-get update && apt-get install -y sudo && apt-get install -y git
git config --global --add safe.directory '*'
else
echo 'Skipping. Current job is not running inside a container.'
fi
- name: Checkout
uses: actions/checkout@v4
- name: Build and Test (cmake)
uses: ./.github/actions/build_cmake
with:
gpu: ON
rocm: ON
linux-x86_64-GPU-w-CUVS-cmake:
name: Linux x86_64 GPU w/ cuVS (cmake)
needs: linux-x86_64-cmake
Expand Down
10 changes: 10 additions & 0 deletions thirdparty/faiss/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,13 @@ faiss/python/swigfaiss_avx2.swig
faiss/python/swigfaiss_avx512.swig
faiss/python/swigfaiss_avx512_spr.swig
faiss/python/swigfaiss_sve.swig

# Local intermediate build artifacts (CUDA 13.2 scripts)
/_build/
/_build_python_*/
/_libfaiss_stage/
/build_output/

# Python package build outputs
/dist/
/*.egg-info/
2 changes: 1 addition & 1 deletion thirdparty/faiss/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ outlined on that page and do not file a public issue.

* 4 spaces for indentation in C++ (no tabs)
* 80 character line length (both for C++ and Python)
* C++ language level: C++17
* C++ language level: C++20

## License

Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ $ make -C build demo_ivfpq_indexing_gpu
$ ./build/demos/demo_ivfpq_indexing_gpu
```

This produce the GPU code equivalent to the CPU `demo_ivfpq_indexing`. It also
This produces the GPU code equivalent to the CPU `demo_ivfpq_indexing`. It also
shows how to translate indexes from/to a GPU.

### A real-life benchmark
Expand Down
4 changes: 2 additions & 2 deletions thirdparty/faiss/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The optional GPU implementation provides what is likely (as of March 2017) the f

The following are entry points for documentation:

- the full documentation can be found on the [wiki page](http://github.com/facebookresearch/faiss/wiki), including a [tutorial](https://github.com/facebookresearch/faiss/wiki/Getting-started), a [FAQ](https://github.com/facebookresearch/faiss/wiki/FAQ) and a [troubleshooting section](https://github.com/facebookresearch/faiss/wiki/Troubleshooting)
- the full documentation can be found on the [wiki page](https://github.com/facebookresearch/faiss/wiki), including a [tutorial](https://github.com/facebookresearch/faiss/wiki/Getting-started), a [FAQ](https://github.com/facebookresearch/faiss/wiki/FAQ) and a [troubleshooting section](https://github.com/facebookresearch/faiss/wiki/Troubleshooting)
- the [doxygen documentation](https://faiss.ai/) gives per-class information extracted from code comments
- to reproduce results from our research papers, [Polysemous codes](https://arxiv.org/abs/1609.01882) and [Billion-scale similarity search with GPUs](https://arxiv.org/abs/1702.08734), refer to the [benchmarks README](benchs/README.md). For [
Link and code: Fast indexing with graphs and compact regression codes](https://arxiv.org/abs/1804.09996), see the [link_and_code README](benchs/link_and_code)
Expand Down Expand Up @@ -82,7 +82,7 @@ For the GPU version of Faiss, please cite:

For public discussion of Faiss or for questions, visit https://github.com/facebookresearch/faiss/discussions.

We monitor the [issues page](http://github.com/facebookresearch/faiss/issues) of the repository.
We monitor the [issues page](https://github.com/facebookresearch/faiss/issues) of the repository.
You can report bugs, ask questions, etc.

## Legal
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/benchs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ The run produces two warnings:

- the add() function complains that there is an inefficient memory allocation, but this is a concern only when it happens often, and we are not benchmarking the add time anyways.

To index small datasets, it is more efficient to use a `GpuIVFFlat`, which just stores the full vectors in the inverted lists. We did not mention this in the the paper because it is not as scalable. To experiment with this setting, change the `index_factory` string from "IVF4096,PQ64" to "IVF16384,Flat". This gives:
To index small datasets, it is more efficient to use a `GpuIVFFlat`, which just stores the full vectors in the inverted lists. We did not mention this in the paper because it is not as scalable. To experiment with this setting, change the `index_factory` string from "IVF4096,PQ64" to "IVF16384,Flat". This gives:

```
nprobe= 1 0.025 s recalls= 0.4084 0.4105 0.4105
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/benchs/bench_gpu_1bn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python2
#! /usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/benchs/bench_vector_ops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python2
#! /usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/benchs/kmeans_mnist.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python2
#! /usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/c_api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ endif()
if(NOT WIN32)
# Architecture mode to support AVX512 extensions available since Intel(R) Sapphire Rapids.
# Ref: https://networkbuilders.intel.com/solutionslibrary/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide
target_compile_options(faiss_c_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-march=sapphirerapids -mtune=sapphirerapids>)
target_compile_options(faiss_c_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vpopcntdq -mpopcnt -mavx512fp16 -mavx512bf16>)
else()
target_compile_options(faiss_c_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
endif()
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/cmake/link_to_faiss_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function(link_to_faiss_lib target)
if(NOT WIN32)
# Architecture mode to support AVX512 extensions available since Intel (R) Sapphire Rapids.
# Ref: https://networkbuilders.intel.com/solutionslibrary/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-march=sapphirerapids -mtune=sapphirerapids>)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vpopcntdq -mpopcnt -mavx512fp16 -mavx512bf16>)
else()
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
endif()
Expand Down
2 changes: 0 additions & 2 deletions thirdparty/faiss/contrib/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ The contrib directory contains helper modules for Faiss for various tasks.
The contrib directory gets compiled in the module faiss.contrib.
Note that although some of the modules may depend on additional modules (eg. GPU Faiss, pytorch, hdf5), they are not necessarily compiled in to avoid adding dependencies. It is the user's responsibility to provide them.

In contrib, we are progressively dropping python2 support.

## List of contrib modules

### rpc.py
Expand Down
72 changes: 72 additions & 0 deletions thirdparty/faiss/contrib/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,78 @@ def train_ivf_index_with_2level(index, xt, **args):
index.train(xt)


def balanced_assignment_with_penalties(x, centroids, alpha = 0.03, num_iter = 20, maxk = 100):
"""
Assign vectors x to centroids with a balance constraint.

Iteratively adjusts per-cluster penalties so that oversized clusters
become less attractive. At each iteration the penalized distance for
cluster c is ``d(x, c)^2 + penalty_c^2`` and the penalty is updated as
``penalty_c *= (binsize_c / n_opt) ** alpha`` where ``n_opt = n / nc``.

A single kNN call (with *maxk* neighbors) is done upfront; subsequent
iterations only re-weight among those candidates, making the routine
fast even for large datasets.

Reference: "Balancing clusters to reduce response time variability in
large scale image search", Tavenard et al., CBMI 2011.
https://inria.hal.science/inria-00576886/document
See also notebook N10159950.

Args:
x: (n, d) float32 array of vectors to assign.
centroids: (nc, d) float32 array of cluster centroids.
alpha: exponent that controls how aggressively penalties grow.
Higher values yield more balanced clusters at the cost
of higher MSE. Typical range: 0.01 – 0.1.
num_iter: number of penalty-update iterations.
maxk: number of nearest centroids to consider per vector.
Must be <= nc.

Returns:
assign: (n,) int64 array of centroid indices.
stats: dict with keys

- *imf*: imbalance factor (1.0 = perfectly balanced)
- *mse*: mean squared error of the assignment
- *binsize_min*, *binsize_max*: smallest / largest cluster
- *penalty_min*, *penalty_max*: penalty value range
- *alpha*: the alpha value used
"""

nc = len(centroids)
n = len(x)
nopt = n / nc # targed bin sizes

# we assign to the top-maxk clusters. The final assignment will pick among these clusters.
full_d2, full_assign = faiss.knn(x, centroids, maxk)

# scalar penalty for each cluster
penalties = np.ones(nc, dtype=np.float32)

for it in range(num_iter):
# compute penalized assignment
penalties2 = penalties ** 2
full_d2_penalized = full_d2 + penalties2[full_assign]
a0 = full_d2_penalized.argmin(axis=1)
assign = np.take_along_axis(full_assign, a0[:, None], axis=1).ravel()
binsizes = np.bincount(assign, minlength=nc)
# print(imbalance_factor(nc, assign), mse, int(binsizes.min()), int(binsizes.max()))
penalties *= (binsizes / nopt) ** alpha

stats = dict(
alpha=alpha,
imf=imbalance_factor(nc, assign),
mse = ((x - centroids[assign]) ** 2).sum(1).mean(), # recompute MSE
binsize_min=int(binsizes.min()),
binsize_max=int(binsizes.max()),
penalty_min=penalties.min(),
penalty_max=penalties.max(),
)

return assign, stats


###############################################################################
# K-means implementation in Python
#
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/faiss/faiss/AutoTune.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ bool OperatingPoints::add(
break;
}
}
assert(i < a.size());
FAISS_THROW_IF_NOT(i < a.size());
if (t < a[i].t) {
if (a[i].perf == perf) {
a[i] = op;
Expand Down
11 changes: 6 additions & 5 deletions thirdparty/faiss/faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set(FAISS_SIMD_AVX2_SRC
impl/scalar_quantizer/sq-avx2.cpp
impl/approx_topk/avx2.cpp
utils/simd_impl/distances_avx2.cpp
utils/simd_impl/partitioning_avx2.cpp
utils/distances_fused/simdlib_based.cpp
utils/simd_impl/rabitq_avx2.cpp
)
Expand All @@ -32,6 +33,7 @@ set(FAISS_SIMD_NEON_SRC
impl/scalar_quantizer/sq-neon.cpp
impl/approx_topk/neon.cpp
utils/simd_impl/distances_aarch64.cpp
utils/simd_impl/partitioning_neon.cpp
utils/distances_fused/simdlib_based_neon.cpp
utils/simd_impl/rabitq_neon.cpp
)
Expand Down Expand Up @@ -118,7 +120,6 @@ set(FAISS_SRC
impl/AdditiveQuantizer.cpp
impl/RaBitQuantizer.cpp
impl/RaBitQuantizerMultiBit.cpp
impl/RaBitQStats.cpp
impl/RaBitQUtils.cpp
impl/ResidualQuantizer.cpp
impl/LocalSearchQuantizer.cpp
Expand Down Expand Up @@ -156,6 +157,7 @@ set(FAISS_SRC
utils/simd_levels.cpp
utils/distances_fused/distances_fused.cpp
factory_tools.cpp
# build.cpp excluded due to build errors on Windows
)

if(FAISS_ENABLE_SVS)
Expand Down Expand Up @@ -251,7 +253,6 @@ set(FAISS_HEADERS
impl/Quantizer.h
impl/RaBitQuantizer.h
impl/RaBitQuantizerMultiBit.h
impl/RaBitQStats.h
impl/RaBitQUtils.h
impl/ResidualQuantizer.h
impl/ResultHandler.h
Expand Down Expand Up @@ -298,7 +299,6 @@ set(FAISS_HEADERS
utils/NeuralNet.h
utils/WorkerThread.h
utils/distances.h
utils/distances_dispatch.h
utils/extra_distances.h
utils/fp16-fp16c.h
utils/fp16-inl.h
Expand Down Expand Up @@ -405,7 +405,7 @@ endif()
if(NOT WIN32)
# Architecture mode to support AVX512 extensions available since Intel(R) Sapphire Rapids.
# Ref: https://networkbuilders.intel.com/solutionslibrary/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide
target_compile_options(faiss_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-march=sapphirerapids -mtune=sapphirerapids>)
target_compile_options(faiss_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vpopcntdq -mpopcnt -mavx512fp16 -mavx512bf16>)
else()
target_compile_options(faiss_avx512_spr PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
# we need bigobj for the swig wrapper
Expand Down Expand Up @@ -454,7 +454,8 @@ if(FAISS_OPT_LEVEL STREQUAL "dd")
target_compile_definitions(faiss PRIVATE FAISS_ENABLE_DD)
# Architecture-specific SIMD definitions for Dynamic Dispatch
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64|amd64|AMD64)")
target_compile_definitions(faiss PRIVATE COMPILE_SIMD_AVX2 COMPILE_SIMD_AVX512)
target_compile_definitions(faiss PRIVATE
COMPILE_SIMD_AVX2 COMPILE_SIMD_AVX512 COMPILE_SIMD_AVX512_SPR)
# Baseline flags for common files (prevents auto-vectorization)
target_compile_options(faiss PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:-mpopcnt -msse4 -mno-avx -mno-avx2>)
Expand Down
3 changes: 2 additions & 1 deletion thirdparty/faiss/faiss/Clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ void compute_centroids(

for (size_t i = 0; i < n; i++) {
int64_t ci = assign[i];
assert(ci >= 0 && ci < k + k_frozen);
FAISS_THROW_IF_NOT_MSG(
ci >= 0 && ci < k + k_frozen, "invalid cluster assignment");
ci -= k_frozen;
if (ci >= static_cast<int64_t>(c0) &&
ci < static_cast<int64_t>(c1)) {
Expand Down
Loading
Loading