|
| 1 | +%% Lightweight BEAM-native benchmark harness for `str` functions. |
| 2 | +%% |
| 3 | +%% Usage (from repo root): |
| 4 | +%% erl -noshell \ |
| 5 | +%% -pa build/dev/erlang/gleam_stdlib/ebin \ |
| 6 | +%% -pa build/dev/erlang/str/ebin \ |
| 7 | +%% -eval "bench_beam:run(), halt()." |
| 8 | +%% |
| 9 | +%% The script writes CSV output to `scripts/bench_results/bench_beam_<ts>.csv`. |
| 10 | + |
| 11 | +-module(bench_beam). |
| 12 | +-export([run/0]). |
| 13 | + |
| 14 | +%% Simple helpers |
| 15 | +ensure_dir(Path) -> |
| 16 | + case filelib:is_dir(Path) of |
| 17 | + true -> ok; |
| 18 | + false -> file:make_dir(Path) |
| 19 | + end. |
| 20 | + |
| 21 | +timestamp() -> |
| 22 | + {{Y,Mo,D},{H,Mi,S}} = calendar:universal_time(), |
| 23 | + lists:flatten(io_lib:format("~4..0B~2..0B~2..0B_~2..0B~2..0B~2..0B", [Y,Mo,D,H,Mi,S])). |
| 24 | + |
| 25 | +do_warmup(_M,_F,_A,0) -> ok; |
| 26 | +do_warmup(M,F,A,N) when N > 0 -> |
| 27 | + _ = apply(M,F,A), |
| 28 | + do_warmup(M,F,A,N-1). |
| 29 | + |
| 30 | +time_fun(M,F,A,Iter) -> |
| 31 | + %% Warm-up |
| 32 | + do_warmup(M,F,A,5), |
| 33 | + {MicroSecs, _} = timer:tc(fun() -> lists:foreach(fun(_) -> _ = apply(M,F,A) end, lists:seq(1,Iter)) end), |
| 34 | + MicroSecs div Iter. |
| 35 | + |
| 36 | +gen_repetitive(Bin, N) when is_binary(Bin) -> |
| 37 | + iolist_to_binary(lists:duplicate(N, Bin)). |
| 38 | + |
| 39 | +gen_random(Alphabet, N) when is_list(Alphabet) -> |
| 40 | + %% Alphabet is a list of integers (string). Build list of N random elements and convert to binary. |
| 41 | + Len = length(Alphabet), |
| 42 | + Fun = fun(_) -> lists:nth(rand:uniform(Len), Alphabet) end, |
| 43 | + Chars = [Fun(Arg) || Arg <- lists:seq(1,N)], |
| 44 | + list_to_binary(Chars). |
| 45 | + |
| 46 | +write_csv_header(File) -> |
| 47 | + io:format(File, "case,scenario_type,text_len,pat_len,max_border,matches,index_of_us,index_of_auto_us,kmp_us,sliding_us,count_us,count_auto_us,iter~n", []). |
| 48 | + |
| 49 | +measure_case(File, Name, Type, Text, Pat, Iter) -> |
| 50 | + %% Compute matches using sliding_search_all for consistency |
| 51 | + MatchesList = catch 'str@core':sliding_search_all(Text, Pat), |
| 52 | + Matches = case MatchesList of |
| 53 | + {'EXIT', _} -> -1; |
| 54 | + L -> length(L) |
| 55 | + end, |
| 56 | + %% Compute prefix table max border for the pattern (0 if failure) |
| 57 | + Pi = case catch 'str@core':build_prefix_table(Pat) of |
| 58 | + {'EXIT', _} -> []; |
| 59 | + R -> R |
| 60 | + end, |
| 61 | + MaxBorder = case Pi of |
| 62 | + [] -> 0; |
| 63 | + _ -> lists:max(Pi) |
| 64 | + end, |
| 65 | + Iof = time_fun('str@core', index_of, [Text, Pat], Iter), |
| 66 | + Iaof = time_fun('str@core', index_of_auto, [Text, Pat], Iter), |
| 67 | + Kmp = time_fun('str@core', kmp_search_all, [Text, Pat], Iter), |
| 68 | + Slide = time_fun('str@core', sliding_search_all, [Text, Pat], Iter), |
| 69 | + Cnt = time_fun('str@core', count, [Text, Pat, true], Iter), |
| 70 | + Ca = time_fun('str@core', count_auto, [Text, Pat, true], Iter), |
| 71 | + io:format(File, "~s,~s,~p,~p,~p,~p,~p,~p,~p,~p,~p,~p,~p~n", |
| 72 | + [Name, Type, byte_size(Text), byte_size(Pat), MaxBorder, Matches, Iof, Iaof, Kmp, Slide, Cnt, Ca, Iter]). |
| 73 | + |
| 74 | +run() -> |
| 75 | + rand:seed(exsplus, {erlang:monotonic_time(), erlang:unique_integer([positive]), erlang:phash2(node())}), |
| 76 | + ensure_dir("scripts/bench_results"), |
| 77 | + Ts = timestamp(), |
| 78 | + Path = filename:join("scripts/bench_results", "bench_beam_" ++ Ts ++ ".csv"), |
| 79 | + {ok, File} = file:open(Path, [write, {encoding, utf8}]), |
| 80 | + write_csv_header(File), |
| 81 | + io:format("Starting BEAM benchmarks...~n"), |
| 82 | + Iter = 200, |
| 83 | + |
| 84 | + %% Scenarios |
| 85 | + %% 1) repetitive no match |
| 86 | + Text1 = gen_repetitive(<<$a>>, 20000), |
| 87 | + Bin1 = gen_repetitive(<<$a>>, 1000), |
| 88 | + Pat1 = <<Bin1/binary, $b>>, |
| 89 | + io:format("Running repetitive_nomatch (~p bytes text, ~p bytes pat)...~n", [byte_size(Text1), byte_size(Pat1)]), |
| 90 | + measure_case(File, "repetitive_nomatch", "repetitive_nomatch", Text1, Pat1, Iter), |
| 91 | + |
| 92 | + %% 2) repetitive many matches |
| 93 | + Text2 = gen_repetitive(<<$a>>, 20000), |
| 94 | + Pat2 = gen_repetitive(<<$a>>, 50), |
| 95 | + io:format("Running repetitive_many (~p bytes text, ~p bytes pat)...~n", [byte_size(Text2), byte_size(Pat2)]), |
| 96 | + measure_case(File, "repetitive_many", "repetitive_many", Text2, Pat2, Iter), |
| 97 | + |
| 98 | + %% 3) random small pat |
| 99 | + Text3 = gen_random("abcd", 20000), |
| 100 | + Pat3 = gen_random("abcd", 20), |
| 101 | + io:format("Running random_small_pat (~p bytes text, ~p bytes pat)...~n", [byte_size(Text3), byte_size(Pat3)]), |
| 102 | + measure_case(File, "random_small_pat", "random", Text3, Pat3, Iter), |
| 103 | + |
| 104 | + %% 4) large text small pat |
| 105 | + Text4 = gen_random("abcd", 200000), |
| 106 | + Pat4 = <<"abcdab">>, |
| 107 | + io:format("Running large_text_small_pat (~p bytes text, ~p bytes pat)...~n", [byte_size(Text4), byte_size(Pat4)]), |
| 108 | + measure_case(File, "large_text_small_pat", "random", Text4, Pat4, Iter div 4), |
| 109 | + |
| 110 | + %% (emoji case omitted in this BEAM harness to avoid encoding edge-cases) |
| 111 | + |
| 112 | + file:close(File), |
| 113 | + io:format("Wrote results to ~s~n", [Path]), |
| 114 | + ok. |
0 commit comments