Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 104 additions & 44 deletions benchmark/hdf5/benchmark_float_qps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,30 @@
#include "knowhere/comp/knowhere_config.h"
#include "knowhere/dataset.h"

template <typename T>
bool
get_env(const char* name, T& out) {
const char* val = std::getenv(name);
if (!val || val[0] == '\0') {
return false;
}
std::istringstream iss(val);
iss >> out;
return !iss.fail();
}

// Specialization for double (handles "5" / "5.0")
template <>
bool
get_env<double>(const char* name, double& out) {
const char* val = std::getenv(name);
if (!val || val[0] == '\0') {
return false;
}
out = std::stod(val);
return true;
}

const int32_t GPU_DEVICE_ID = 0;

class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
Expand Down Expand Up @@ -297,8 +321,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
recall = CalcRecall(result.value()->GetIds(), nq_, topk_);
recall_map[search_list_size] = recall;
printf(
"[%0.3f s] iterate DISKANN param for expected recall %.4f: search_list_size=%4d, k=%d, R@=%.4f\n",
get_time_diff(), expected_recall, search_list_size, topk_, recall);
"[%0.3f s] iterate DISKANN param for expected recall %.4f: search_list_size=%4d, k=%d, R@=%.4f ram "
"is %ld bytes\n",
get_time_diff(), expected_recall, search_list_size, topk_, recall, index_.value().Size());
std::fflush(stdout);
if (std::abs(recall - expected_recall) <= 0.0001) {
return {search_list_size, recall_map[search_list_size]};
Expand Down Expand Up @@ -363,14 +388,23 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
void
SetUp() override {
T0_ = elapsed();
set_ann_test_name("sift-128-euclidean");
const char* dataset = std::getenv("DATASET");
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd use not DATASET, but BENCH_DATASET, in order to specialize environment variables and ensure that theirs names are related to the benchmark code. DATASET is a too generic name.

Same for others

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

if (dataset == nullptr || dataset[0] == '\0') {
set_ann_test_name("sift-128-euclidean");
} else {
set_ann_test_name(dataset);
}
parse_ann_test_name();
load_hdf5_data<knowhere::fp32>();
int num_search_threads = 48;
int i;
if (get_env("NUM_SEARCH_THREADS", i))
num_search_threads = i;

cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
knowhere::KnowhereConfig::SetBuildThreadPoolSize(default_build_thread_num);
knowhere::KnowhereConfig::SetSearchThreadPoolSize(default_search_thread_num);
knowhere::KnowhereConfig::SetBuildThreadPoolSize(48);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please keep these numbers as is, and add environment overrides, if needed. This is related only to these two numbers of threads. We also run these benchmarks internally.
Alternatively, just please introduce a new benchmark .cpp file, which is more suitable for your needs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

knowhere::KnowhereConfig::SetSearchThreadPoolSize(num_search_threads);
printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold());
#ifdef KNOWHERE_WITH_GPU
knowhere::KnowhereConfig::InitGPUResource(GPU_DEVICE_ID, 2);
Expand All @@ -390,9 +424,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
}

protected:
const int32_t topk_ = 100;
const std::vector<float> EXPECTED_RECALLs_ = {0.8, 0.95};
const std::vector<int32_t> THREAD_NUMs_ = {1, 2, 4, 8};
int32_t topk_ = 100;
const std::vector<float> EXPECTED_RECALLs_ = {0.9};
const std::vector<int32_t> THREAD_NUMs_ = {48};

// IVF index params
const std::vector<int32_t> NLISTs_ = {1024};
Expand Down Expand Up @@ -523,18 +557,43 @@ TEST_F(Benchmark_float_qps, TEST_SCANN) {
}

#ifdef KNOWHERE_WITH_DISKANN

TEST_F(Benchmark_float_qps, TEST_DISKANN) {
index_type_ = knowhere::IndexEnum::INDEX_DISKANN;

knowhere::Json conf = cfg_;
conf[knowhere::meta::DIM] = dim_;
conf[knowhere::meta::INDEX_PREFIX] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf[knowhere::meta::DATA_PATH] = kRawDataPath;
conf[knowhere::indexparam::MAX_DEGREE] = 56;
conf[knowhere::indexparam::MAX_DEGREE] = 64;
conf[knowhere::indexparam::PQ_CODE_BUDGET_GB] = sizeof(float) * dim_ * nb_ * 0.125 / (1024 * 1024 * 1024);
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = 32.0;
conf[knowhere::indexparam::SEARCH_CACHE_BUDGET_GB] = 0;
conf[knowhere::indexparam::BEAMWIDTH] = 8;
conf[knowhere::indexparam::DISK_PQ_DIMS] = 0;
fs::remove_all(kDir);
fs::remove(kDir);
// Overrides
int i;
double d;

if (get_env("MAX_DEGREE", i))
conf[knowhere::indexparam::MAX_DEGREE] = i;

if (get_env("PQ_CODE_BUDGET_GB", d))
conf[knowhere::indexparam::PQ_CODE_BUDGET_GB] = d;

if (get_env("BUILD_DRAM_BUDGET_GB", d))
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = d;

if (get_env("BEAMWIDTH", i))
conf[knowhere::indexparam::BEAMWIDTH] = i;

if (get_env("DISK_PQ_DIMS", i))
conf[knowhere::indexparam::DISK_PQ_DIMS] = i;

if (get_env("TOPK", i))
topk_ = i;

fs::create_directory(kDir);
fs::create_directory(kL2IndexDir);
Expand All @@ -554,60 +613,60 @@ TEST_F(Benchmark_float_qps, TEST_DISKANN) {
knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);

printf("index size in ram is %ld bytes. \n", index_.value().Size());
test_diskann<knowhere::fp32>(conf);
}

TEST_F(Benchmark_float_qps, TEST_AISAQ_P) {
TEST_F(Benchmark_float_qps, TEST_AISAQ) {
index_type_ = knowhere::IndexEnum::INDEX_AISAQ;

knowhere::Json conf = cfg_;
conf[knowhere::meta::DIM] = dim_;
conf[knowhere::meta::INDEX_PREFIX] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf[knowhere::meta::DATA_PATH] = kRawDataPath;
conf[knowhere::indexparam::MAX_DEGREE] = 56;
conf[knowhere::indexparam::INLINE_PQ] = 56;
conf[knowhere::indexparam::MAX_DEGREE] = 64;
conf[knowhere::indexparam::INLINE_PQ] = -1;
conf[knowhere::indexparam::PQ_CODE_BUDGET_GB] = sizeof(float) * dim_ * nb_ * 0.125 / (1024 * 1024 * 1024);
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = 32.0;
conf[knowhere::indexparam::SEARCH_CACHE_BUDGET_GB] = 0;
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = 5.0;
conf[knowhere::indexparam::BEAMWIDTH] = 8;
fs::remove_all(kDir);
fs::remove(kDir);
conf[knowhere::indexparam::NUM_ENTRY_POINTS] = 1000;
conf[knowhere::indexparam::DISK_PQ_DIMS] = 0;
// Overrides
int i;
double d;

fs::create_directory(kDir);
fs::create_directory(kL2IndexDir);
fs::create_directory(kIPIndexDir);
if (get_env("MAX_DEGREE", i))
conf[knowhere::indexparam::MAX_DEGREE] = i;

WriteRawDataToDisk(kRawDataPath, (const float*)xb_, (const uint32_t)nb_, (const uint32_t)dim_);
if (get_env("INLINE_PQ", i))
conf[knowhere::indexparam::INLINE_PQ] = i;

std::shared_ptr<milvus::FileManager> file_manager = std::make_shared<milvus::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);
if (get_env("PQ_CODE_BUDGET_GB", d))
conf[knowhere::indexparam::PQ_CODE_BUDGET_GB] = d;

index_ = knowhere::IndexFactory::Instance().Create<knowhere::fp32>(
index_type_, knowhere::Version::GetCurrentVersion().VersionNumber(), diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.value().Build(ds_ptr, conf);
if (get_env("BUILD_DRAM_BUDGET_GB", d))
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = d;

knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);
if (get_env("BEAMWIDTH", i))
conf[knowhere::indexparam::BEAMWIDTH] = i;

test_diskann<knowhere::fp32>(conf);
}
if (get_env("NUM_ENTRY_POINTS", i))
conf[knowhere::indexparam::NUM_ENTRY_POINTS] = i;

TEST_F(Benchmark_float_qps, TEST_AISAQ_S) {
index_type_ = knowhere::IndexEnum::INDEX_AISAQ;
if (get_env("PQ_CACHE_SIZE", i))
conf[knowhere::indexparam::PQ_CACHE_SIZE] = i;

knowhere::Json conf = cfg_;
conf[knowhere::meta::DIM] = dim_;
conf[knowhere::meta::INDEX_PREFIX] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf[knowhere::meta::DATA_PATH] = kRawDataPath;
conf[knowhere::indexparam::MAX_DEGREE] = 56;
conf[knowhere::indexparam::INLINE_PQ] = -1;
conf[knowhere::indexparam::PQ_CODE_BUDGET_GB] = sizeof(float) * dim_ * nb_ * 0.125 / (1024 * 1024 * 1024);
conf[knowhere::indexparam::BUILD_DRAM_BUDGET_GB] = 32.0;
conf[knowhere::indexparam::BEAMWIDTH] = 8;
if (get_env("PQ_READ_PAGE_CACHE_SIZE", i))
conf[knowhere::indexparam::PQ_READ_PAGE_CACHE_SIZE] = i;

if (get_env("REARRANGE", i))
conf[knowhere::indexparam::REARRANGE] = i == 0 ? false : true;

if (get_env("DISK_PQ_DIMS", i))
conf[knowhere::indexparam::DISK_PQ_DIMS] = i;

if (get_env("TOPK", i))
topk_ = i;

fs::remove_all(kDir);
fs::remove(kDir);
Expand All @@ -629,6 +688,7 @@ TEST_F(Benchmark_float_qps, TEST_AISAQ_S) {
knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);
printf("index size in ram is %ld bytes. \n", index_.value().Size());

test_diskann<knowhere::fp32>(conf);
}
Expand Down
20 changes: 15 additions & 5 deletions src/common/comp/knowhere_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ KnowhereConfig::ShowVersion() {
#ifdef KNOWHERE_WITH_CUVS
msg = msg + "-gpu";
#endif
#ifdef KNOWHERE_WITH_SVS
msg = msg + "-svs";
#endif
#else
msg = msg + " unknown";
#endif
Expand Down Expand Up @@ -145,7 +142,10 @@ KnowhereConfig::GetClusteringType() {
bool
KnowhereConfig::SetAioContextPool(size_t num_ctx) {
#ifdef KNOWHERE_WITH_DISKANN
return AioContextPool::InitGlobalAioPool(num_ctx, default_max_events);
size_t max_events = (num_ctx > 0) ? std::min(default_max_nr / num_ctx, default_max_events) : default_max_events;
LOG_KNOWHERE_INFO_ << "InitGlobalAioPool with " << num_ctx << " contexts and " << max_events
<< " events per context";
return AioContextPool::InitGlobalAioPool(num_ctx, max_events);
#endif
return true;
}
Expand Down Expand Up @@ -221,11 +221,21 @@ KnowhereConfig::SetRaftMemPool(size_t init_size, size_t max_size) {
cuvs_knowhere::initialize_raft(config);
#endif
}

void
KnowhereConfig::SetRaftMemPool() {
// Overload for default values
#ifdef KNOWHERE_WITH_CUVS
int count = 0;
auto status = cudaGetDeviceCount(&count);
if (status != cudaSuccess) {
LOG_KNOWHERE_INFO_ << cudaGetErrorString(status);
return;
}
if (count < 1) {
LOG_KNOWHERE_INFO_ << "GPU not available";
return;
}

auto config = cuvs_knowhere::raft_configuration{};
cuvs_knowhere::initialize_raft(config);
#endif
Expand Down
7 changes: 7 additions & 0 deletions src/index/diskann/aisaq_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ class AisaqConfig : public DiskANNConfig {
KNOWHERE_DECLARE_CONFIG(AisaqConfig) {
// Block AiSAQ parameters

KNOWHERE_CONFIG_DECLARE_FIELD(beamwidth)
.description("the maximum number of IO requests each query will issue per iteration of search code.")
.set_default(diskann::defaults::DEFAULT_AISAQ_BEAMWIDTH)
.set_range(1, diskann::defaults::MAX_AISAQ_BEAMWIDTH)
.for_search()
.for_range_search()
.for_iterator();
KNOWHERE_CONFIG_DECLARE_FIELD(vectors_beamwidth)
.set_default(1)
.set_range(1, diskann::defaults::MAX_AISAQ_VECTORS_BEAMWIDTH)
Expand Down
42 changes: 35 additions & 7 deletions src/index/diskann/diskann.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ class DiskANNIndexNode : public IndexNode {
}

uint64_t
GetCachedNodeNum(const float cache_dram_budget, const uint64_t data_dim, const uint64_t max_degree);
GetCachedNodeNum(const float cache_dram_budget, const uint64_t data_dim, size_t chunk_size,
const uint64_t max_degree);

std::string index_prefix_;
mutable std::mutex preparation_lock_;
Expand Down Expand Up @@ -380,8 +381,14 @@ std::vector<std::string>
GetOptionalFilenames(const std::string& prefix) {
std::vector<std::string> filenames;
auto disk_index_filename = diskann::get_disk_index_filename(prefix);
auto disk_pq_pivots_file_name = diskann::get_disk_index_pq_pivots_filename(disk_index_filename);
filenames.push_back(diskann::get_disk_index_centroids_filename(disk_index_filename));
filenames.push_back(diskann::get_disk_index_medoids_filename(disk_index_filename));
filenames.push_back(disk_pq_pivots_file_name);
filenames.push_back(diskann::get_pq_rearrangement_perm_filename(disk_pq_pivots_file_name));
filenames.push_back(diskann::get_pq_chunk_offsets_filename(disk_pq_pivots_file_name));
filenames.push_back(diskann::get_pq_centroid_filename(disk_pq_pivots_file_name));

filenames.push_back(diskann::get_cached_nodes_file(prefix));
filenames.push_back(diskann::get_emb_list_offset_file(prefix));
return filenames;
Expand Down Expand Up @@ -459,8 +466,18 @@ DiskANNIndexNode<DataType>::Build(const DataSetPtr dataset, std::shared_ptr<Conf
return diskann::Metric::INNER_PRODUCT;
}
}();
auto num_nodes_to_cache =
GetCachedNodeNum(build_conf.search_cache_budget_gb.value(), dim, build_conf.max_degree.value());
uint64_t num_nodes_to_cache;
if (build_conf.disk_pq_dims.value() > 0) {
uint64_t disk_pq_nchunks = dim;
if (std::cmp_less(build_conf.disk_pq_dims.value(), dim)) {
disk_pq_nchunks = build_conf.disk_pq_dims.value();
}
num_nodes_to_cache = GetCachedNodeNum(build_conf.search_cache_budget_gb.value(), disk_pq_nchunks, sizeof(_u8),
build_conf.max_degree.value());
} else {
num_nodes_to_cache = GetCachedNodeNum(build_conf.search_cache_budget_gb.value(), dim, sizeof(DataType),
build_conf.max_degree.value());
}
diskann::BuildConfig diskann_internal_build_config{data_path,
index_prefix_,
diskann_metric,
Expand Down Expand Up @@ -648,8 +665,19 @@ DiskANNIndexNode<DataType>::Deserialize(const BinarySet& binset, std::shared_ptr
diskann::load_bin<uint32_t>(cached_nodes_file, cached_nodes_ids, num_nodes, nodes_id_dim);
node_list.assign(cached_nodes_ids.get(), cached_nodes_ids.get() + num_nodes);
} else {
auto num_nodes_to_cache = GetCachedNodeNum(prep_conf.search_cache_budget_gb.value(),
pq_flash_index_->get_data_dim(), pq_flash_index_->get_max_degree());
uint64_t num_nodes_to_cache = 0;
if (prep_conf.disk_pq_dims.value() > 0) {
uint64_t disk_pq_nchunks = pq_flash_index_->get_data_dim();
if (prep_conf.disk_pq_dims.value() < static_cast<int>(pq_flash_index_->get_data_dim())) {
disk_pq_nchunks = prep_conf.disk_pq_dims.value();
}
num_nodes_to_cache = GetCachedNodeNum(prep_conf.search_cache_budget_gb.value(), disk_pq_nchunks,
sizeof(_u8), prep_conf.max_degree.value());
} else {
num_nodes_to_cache =
GetCachedNodeNum(prep_conf.search_cache_budget_gb.value(), pq_flash_index_->get_data_dim(),
sizeof(DataType), prep_conf.max_degree.value());
}
if (num_nodes_to_cache > pq_flash_index_->get_num_points() / 3) {
LOG_KNOWHERE_ERROR_ << "Failed to generate cache, num_nodes_to_cache(" << num_nodes_to_cache
<< ") is larger than 1/3 of the total data number.";
Expand Down Expand Up @@ -1030,9 +1058,9 @@ DiskANNIndexNode<DataType>::GetIndexMeta(std::unique_ptr<Config> cfg) const {

template <typename DataType>
uint64_t
DiskANNIndexNode<DataType>::GetCachedNodeNum(const float cache_dram_budget, const uint64_t data_dim,
DiskANNIndexNode<DataType>::GetCachedNodeNum(const float cache_dram_budget, const uint64_t data_dim, size_t chunk_size,
const uint64_t max_degree) {
uint32_t one_cached_node_budget = (max_degree + 1) * sizeof(unsigned) + sizeof(DataType) * data_dim;
uint32_t one_cached_node_budget = (max_degree + 1) * sizeof(unsigned) + chunk_size * data_dim;
auto num_nodes_to_cache =
static_cast<uint64_t>(1024 * 1024 * 1024 * cache_dram_budget) / (one_cached_node_budget * kCacheExpansionRate);
return num_nodes_to_cache;
Expand Down
Loading
Loading