|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +#![allow(clippy::unwrap_used)] |
| 5 | + |
| 6 | +use std::fmt; |
| 7 | +use std::sync::LazyLock; |
| 8 | + |
| 9 | +use divan::Bencher; |
| 10 | +use vortex_array::Canonical; |
| 11 | +use vortex_array::IntoArray; |
| 12 | +use vortex_array::VortexSessionExecute; |
| 13 | +use vortex_array::arrays::ConstantArray; |
| 14 | +use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; |
| 15 | +use vortex_array::scalar_fn::fns::like::Like; |
| 16 | +use vortex_array::scalar_fn::fns::like::LikeOptions; |
| 17 | +use vortex_array::session::ArraySession; |
| 18 | +use vortex_fsst::FSSTArray; |
| 19 | +use vortex_fsst::test_utils::NUM_STRINGS; |
| 20 | +use vortex_fsst::test_utils::make_fsst_clickbench_urls; |
| 21 | +use vortex_fsst::test_utils::make_fsst_emails; |
| 22 | +use vortex_fsst::test_utils::make_fsst_file_paths; |
| 23 | +use vortex_fsst::test_utils::make_fsst_json_strings; |
| 24 | +use vortex_fsst::test_utils::make_fsst_log_lines; |
| 25 | +use vortex_fsst::test_utils::make_fsst_rare_match; |
| 26 | +use vortex_fsst::test_utils::make_fsst_short_urls; |
| 27 | +use vortex_session::VortexSession; |
| 28 | + |
| 29 | +fn main() { |
| 30 | + divan::main(); |
| 31 | +} |
| 32 | + |
| 33 | +static SESSION: LazyLock<VortexSession> = |
| 34 | + LazyLock::new(|| VortexSession::empty().with::<ArraySession>()); |
| 35 | + |
| 36 | +const N: usize = NUM_STRINGS; |
| 37 | + |
| 38 | +static FSST_URLS: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_short_urls(N)); |
| 39 | +static FSST_CB_URLS: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_clickbench_urls(N)); |
| 40 | +static FSST_LOG_LINES: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_log_lines(N)); |
| 41 | +static FSST_JSON_STRINGS: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_json_strings(N)); |
| 42 | +static FSST_FILE_PATHS: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_file_paths(N)); |
| 43 | +static FSST_EMAILS: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_emails(N)); |
| 44 | +static FSST_RARE_MATCH: LazyLock<FSSTArray> = LazyLock::new(|| make_fsst_rare_match(N)); |
| 45 | + |
| 46 | +enum Dataset { |
| 47 | + Urls, |
| 48 | + Cb, |
| 49 | + Log, |
| 50 | + Json, |
| 51 | + Path, |
| 52 | + Email, |
| 53 | + Rare, |
| 54 | +} |
| 55 | + |
| 56 | +impl fmt::Display for Dataset { |
| 57 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 58 | + match self { |
| 59 | + Self::Urls => f.write_str("urls"), |
| 60 | + Self::Cb => f.write_str("cb"), |
| 61 | + Self::Log => f.write_str("log"), |
| 62 | + Self::Json => f.write_str("json"), |
| 63 | + Self::Path => f.write_str("path"), |
| 64 | + Self::Email => f.write_str("email"), |
| 65 | + Self::Rare => f.write_str("rare"), |
| 66 | + } |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +impl Dataset { |
| 71 | + fn fsst_array(&self) -> &'static FSSTArray { |
| 72 | + match self { |
| 73 | + Self::Urls => &FSST_URLS, |
| 74 | + Self::Cb => &FSST_CB_URLS, |
| 75 | + Self::Log => &FSST_LOG_LINES, |
| 76 | + Self::Json => &FSST_JSON_STRINGS, |
| 77 | + Self::Path => &FSST_FILE_PATHS, |
| 78 | + Self::Email => &FSST_EMAILS, |
| 79 | + Self::Rare => &FSST_RARE_MATCH, |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + fn pattern(&self) -> &'static str { |
| 84 | + match self { |
| 85 | + Self::Urls => "%google%", |
| 86 | + Self::Cb => "%yandex%", |
| 87 | + Self::Log => "%Googlebot%", |
| 88 | + Self::Json => "%enterprise%", |
| 89 | + Self::Path => "%target/release%", |
| 90 | + Self::Email => "%gmail%", |
| 91 | + Self::Rare => "%xyzzy%", |
| 92 | + } |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +#[divan::bench(args = [ |
| 97 | + Dataset::Urls, Dataset::Cb, Dataset::Log, Dataset::Json, |
| 98 | + Dataset::Path, Dataset::Email, Dataset::Rare, |
| 99 | +])] |
| 100 | +fn fsst_like(bencher: Bencher, dataset: &Dataset) { |
| 101 | + let fsst = dataset.fsst_array(); |
| 102 | + let len = fsst.len(); |
| 103 | + let arr = fsst.clone().into_array(); |
| 104 | + let pattern = ConstantArray::new(dataset.pattern(), len).into_array(); |
| 105 | + bencher.bench_local(|| { |
| 106 | + Like.try_new_array(len, LikeOptions::default(), [arr.clone(), pattern.clone()]) |
| 107 | + .unwrap() |
| 108 | + .into_array() |
| 109 | + .execute::<Canonical>(&mut SESSION.create_execution_ctx()) |
| 110 | + .unwrap() |
| 111 | + }); |
| 112 | +} |
0 commit comments