diff --git a/Cargo.toml b/Cargo.toml index 725bea8..db29538 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,3 +35,7 @@ harness = false [[bench]] name = "timeline" harness = false + +[[bench]] +name = "feed" +harness = false diff --git a/README.md b/README.md index b7479d1..15a206c 100644 --- a/README.md +++ b/README.md @@ -78,3 +78,63 @@ Two `UserMap` instances (follows, blocks). `is_followed_by(p, t)` = `follows.con - **`examples/footprint.rs`** — memory footprint estimates from Bluesky-current to Twitter-scale - **`examples/timeline.rs`** — concurrent timeline demo with follows/blocks filtering + +## Benchmarks + +The crate includes both microbenchmarks (isolated operations on individual data structures) and a real-world benchmark (the full "render the feed" query path). + +### Microbenchmarks (Rust, via Criterion) + +```bash +cargo bench --bench usermap # add, contains_hit, contains_miss, remove on UserMap +cargo bench --bench timeline # iterate timelines with/without blob resolve, skip-half +cargo bench --bench arena # Hot→Cold conversion, Parquet import +``` + +These measure each operation in isolation across data sizes from 10 to 100,000 entries. Useful for understanding the building blocks but not the system as a whole. + +### ETS comparison (Elixir, via Benchee) + +A parallel set of benchmarks ports the `UserMap` operations to an Elixir/ETS implementation using a `:duplicate_bag` table — the natural choice for "many followers per user" in BEAM systems. + +```bash +elixir benches/usermap_ets.exs +``` + +Requires Elixir 1.18+. Dependencies are fetched automatically via `Mix.install`. + +### Real-world feed benchmark + +The microbenchmarks expose primitives, not workloads. The `feed` benchmark measures the actual user-facing operation: **"render the feed for viewer V"** — fetch all posts by authors V follows within a 2-day window. + +Setup mirrors a busy small-to-medium instance: + +- 1,000 active authors, each with 5 posts in the window (5,000 posts total) +- One viewer following F authors, where F ∈ {100, 500, 2,000} + +The Rust version iterates the timeline and filters per-post via `UserMap::contains` — leaning on its fast scan-and-filter: + +```rust +let mut slice = timeline.iter(0, Order::Desc); +while let Some(entry) = slice.next() { + if follows.contains(VIEWER, entry.uid) { + feed.push(entry.timestamp()); + } +} +``` + +The ETS version uses three `:duplicate_bag` tables (forward + reverse follow index, posts-by-author) and goes the other way — enumerate follows first, then fetch only relevant posts: + +```elixir +:ets.lookup(rev, viewer) +|> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end) +``` + +Run both: + +```bash +cargo bench --bench feed # Rust +elixir benches/feed_ets.exs # ETS +``` + +The interesting result: the 1,000x+ gap visible on isolated `contains` calls largely disappears in the realistic workload. At low follow counts the ETS approach is actually faster because it touches only the data it needs; at high follow counts Rust pulls ahead. See `benches/feed.rs` and `benches/feed_ets.exs` for the full setup. diff --git a/benches/feed.rs b/benches/feed.rs new file mode 100644 index 0000000..3893a09 --- /dev/null +++ b/benches/feed.rs @@ -0,0 +1,73 @@ +use std::hint::black_box; +use std::sync::Arc; + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; + +use crimeline::arena::{Cold, Hot}; +use crimeline::{Order, Sharding, Timeline, UserMap, Window}; + +const AUTHORS: u32 = 1_000; +const POSTS_PER_AUTHOR: usize = 5; +const BLOB_SIZE: usize = 256; +const FOLLOW_COUNTS: &[u32] = &[100, 500, 2_000]; + +const VIEWER: u32 = 10_000; + +fn make_blob(i: usize) -> Vec { + vec![(i & 0xff) as u8; BLOB_SIZE] +} + +fn build_timeline() -> Timeline { + let total_posts = AUTHORS as usize * POSTS_PER_AUTHOR; + let duration = (total_posts as u32 + 1) * 10; + let mut hot = Hot::new(Window::new(0, duration)).unwrap(); + + let mut cid: u64 = 0; + for author in 0..AUTHORS { + for p in 0..POSTS_PER_AUTHOR { + let ts = (author as u64 * POSTS_PER_AUTHOR as u64 + p as u64) * 10; + hot.add(author, cid, ts, &make_blob(cid as usize)) + .unwrap(); + cid += 1; + } + } + + let cold: Arc = hot.try_into().unwrap(); + Timeline::new(vec![cold]) +} + +fn build_follows(n_follows: u32) -> UserMap { + let follows = UserMap::new(Sharding::S128); + for author in 0..n_follows.min(AUTHORS) { + follows.add(VIEWER, author); + } + follows +} + +fn bench_feed(c: &mut Criterion) { + let mut group = c.benchmark_group("feed"); + let timeline = build_timeline(); + + for &f in FOLLOW_COUNTS { + let follows = build_follows(f); + + group.bench_with_input(BenchmarkId::from_parameter(f), &f, |b, _| { + b.iter(|| { + let mut slice = timeline.iter(0, Order::Desc); + let mut feed = Vec::new(); + while let Some(entry) = slice.next() { + if follows.contains(VIEWER, entry.uid) { + feed.push(entry.timestamp()); + } + } + black_box(feed) + }); + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_feed); + +criterion_main!(benches); diff --git a/benches/feed_ets.exs b/benches/feed_ets.exs new file mode 100644 index 0000000..1c48580 --- /dev/null +++ b/benches/feed_ets.exs @@ -0,0 +1,106 @@ +Mix.install([{:benchee, "~> 1.0"}, {:roaring, "~> 0.13"} ]) + +# Mirrors benches/feed.rs — "get timeline for viewer V" using three ETS tables. +# +# Run: elixir benches/feed_ets.exs + +defmodule FeedEts do + @authors 1_000 + @posts_per_author 5 + @viewer 10_000 + + def authors, do: @authors + def viewer, do: @viewer + + def setup(n_follows) do + rev = :ets.new(:follows_rev, [:duplicate_bag, :public, {:read_concurrency, true}]) + posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}]) + + rev_tuples = for author <- 0..(min(n_follows, @authors) - 1), do: {@viewer, author} + :ets.insert(rev, rev_tuples) + + post_tuples = + for author <- 0..(@authors - 1), + p <- 0..(@posts_per_author - 1), + do: {author, author * @posts_per_author + p} + + :ets.insert(posts, post_tuples) + + {rev, posts} + end + + def get_feed(rev, posts, viewer) do + :ets.lookup(rev, viewer) + |> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end) + end +end + +defmodule FeedRoaring do + @authors 1_000 + @posts_per_author 5 + @viewer 10_000 + + def authors, do: @authors + def viewer, do: @viewer + + def setup(n_follows) do + posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}]) + + # A single viewer as in this case simplifies the setup for roaring because we need a single bitmap. + # Generalizing to multiple viewers would require a bitmap per viewer and an additional lookup of + # the correct bitmap first. + follows = for author <- 0..(min(n_follows, @authors) - 1), do: author + {:ok, rev} = RoaringBitmap64.from_list(follows) + + post_tuples = + for author <- 0..(@authors - 1), + p <- 0..(@posts_per_author - 1), + do: {author, author * @posts_per_author + p} + + :ets.insert(posts, post_tuples) + + {rev, posts} + end + + def get_feed(rev, posts, _viewer) do + {:ok, authors} = RoaringBitmap64.to_list(rev) + + Enum.flat_map(authors, fn author -> :ets.lookup(posts, author) end) + end +end + +follow_counts = [100, 500, 2_000] + +inputs = + for f <- follow_counts, into: %{} do + {"#{f}", f} + end + +Benchee.run( + %{ + "FeedEts.feed" => { + fn {rev, posts, viewer} -> FeedEts.get_feed(rev, posts, viewer) end, + before_scenario: fn n_follows -> + {rev, posts} = FeedEts.setup(n_follows) + {rev, posts, FeedEts.viewer()} + end, + after_scenario: fn {rev, posts, _viewer} -> + :ets.delete(rev) + :ets.delete(posts) + end + }, + "FeedRoaring.feed" => { + fn {rev, posts, viewer} -> FeedRoaring.get_feed(rev, posts, viewer) end, + before_scenario: fn n_follows -> + {rev, posts} = FeedRoaring.setup(n_follows) + {rev, posts, FeedRoaring.viewer()} + end, + after_scenario: fn {_rev, posts, _viewer} -> + :ets.delete(posts) + end + } + }, + inputs: inputs, + title: "feed", + print: [configuration: false] +) diff --git a/benches/usermap_ets.exs b/benches/usermap_ets.exs new file mode 100644 index 0000000..2ea8a74 --- /dev/null +++ b/benches/usermap_ets.exs @@ -0,0 +1,273 @@ +Mix.install([{:benchee, "~> 1.0"}, {:roaring, "~> 0.13"} ]) + +# Mirrors benches/usermap.rs — same sizes, data patterns, and operations. +# +# Data model: one :duplicate_bag ETS table with {user_id, follower_id} tuples, +# keyed on user_id. Equivalent to the Rust UserMap forward direction. +# +# Run: elixir benches/usermap_ets.exs + +defmodule UserMapEts do + @user 0 + + def new do + :ets.new(:forward, [:duplicate_bag, :public, {:read_concurrency, true}]) + end + + def populated(n) do + table = new() + tuples = for t <- 0..(n - 1), do: {@user, t * 3} + :ets.insert(table, tuples) + table + end + + def add(table, target) do + :ets.insert(table, {@user, target}) + end + + def add_bulk_list(table, targets) do + tuples = for t <- targets, do: {@user, t} + :ets.insert(table, tuples) + end + + def add_bulk_each(table, targets) do + Enum.each(targets, fn t -> :ets.insert(table, {@user, t}) end) + end + + def contains(table, target) do + :ets.match_object(table, {@user, target}) != [] + end + + def remove(table, target) do + :ets.delete_object(table, {@user, target}) + end + + def destroy(table) do + :ets.delete(table) + end +end + +defmodule UserMapRoaring do + def new do + {:ok, roaring} = RoaringBitmap64.new() + roaring + end + + def populated(n) do + list = for t <- 0..(n - 1), do: t + {:ok, roaring} = RoaringBitmap64.from_list(list) + roaring + end + + def add(roaring, target) do + RoaringBitmap64.insert(roaring, target) + end + + def add_bulk_list(roaring, targets) do + for t <- targets, do: RoaringBitmap64.insert(roaring, t) + end + + def add_bulk_each(roaring, targets) do + Enum.each(targets, fn t -> RoaringBitmap64.insert(roaring, t) end) + end + + def contains(roaring, target) do + RoaringBitmap64.contains?(roaring, target) + end + + def remove(roaring, target) do + RoaringBitmap64.remove(roaring, target) + end + + def destroy(_roaring) do + :ok + end +end + +sizes = [10, 100, 1_000, 10_000, 100_000] + +# --- add --- +# Table is populated once per size. Each iteration inserts one duplicate target; +# the extra rows are negligible relative to the initial n entries. + +add_inputs = + for size <- sizes, into: %{} do + {"#{size}", size} + end + +Benchee.run( + %{ + "UserMapEts.add" => { + fn {table, target} -> UserMapEts.add(table, target) end, + before_scenario: fn size -> + table = UserMapEts.populated(size) + {table, size * 3 + 1} + end, + after_scenario: fn {table, _target} -> UserMapEts.destroy(table) end + } , + "UserMapRoaring.add" => { + fn {table, target} -> UserMapRoaring.add(table, target) end, + before_scenario: fn size -> + table = UserMapRoaring.populated(size) + {table, size * 3 + 1} + end, + } + }, + inputs: add_inputs, + title: "add", + print: [configuration: false] +) + +# --- add_bulk --- +# Table is populated once per size. Each iteration bulk-inserts n targets; +# duplicates accumulate but don't change the O(n) insert cost. + +add_bulk_inputs = + for size <- sizes, into: %{} do + incoming = for t <- 0..(size - 1), do: t * 3 + 1 + {"#{size}", {size, incoming}} + end + +Benchee.run( + %{ + "UserMapEts.add_bulk_list" => { + fn {table, incoming} -> UserMapEts.add_bulk_list(table, incoming) end, + before_scenario: fn {size, incoming} -> + table = UserMapEts.populated(size) + {table, incoming} + end, + after_scenario: fn {table, _incoming} -> UserMapEts.destroy(table) end + }, + "UserMapEts.add_bulk_each" => { + fn {table, incoming} -> UserMapEts.add_bulk_each(table, incoming) end, + before_scenario: fn {size, incoming} -> + table = UserMapEts.populated(size) + {table, incoming} + end, + after_scenario: fn {table, _incoming} -> UserMapEts.destroy(table) end + }, + "UserMapRoaring.add_bulk_list" => { + fn {table, incoming} -> UserMapRoaring.add_bulk_list(table, incoming) end, + before_scenario: fn {size, incoming} -> + table = UserMapRoaring.populated(size) + {table, incoming} + end, + after_scenario: fn {table, _incoming} -> UserMapRoaring.destroy(table) end + }, + "UserMapRoaring.add_bulk_each" => { + fn {table, incoming} -> UserMapRoaring.add_bulk_each(table, incoming) end, + before_scenario: fn {size, incoming} -> + table = UserMapRoaring.populated(size) + {table, incoming} + end, + after_scenario: fn {table, _incoming} -> UserMapRoaring.destroy(table) end + } + }, + inputs: add_bulk_inputs, + title: "add_bulk", + print: [configuration: false] +) + +# --- contains_hit --- +# Rust: reuses one map across all iterations (no setup per iteration). + +contains_hit_inputs = + for size <- sizes, into: %{} do + {"#{size}", size} + end + +Benchee.run( + %{ + "UserMapEts.contains_hit" => { + fn {table, target} -> UserMapEts.contains(table, target) end, + before_scenario: fn size -> + table = UserMapEts.populated(size) + target = div(size, 2) * 3 + {table, target} + end, + after_scenario: fn {table, _target} -> UserMapEts.destroy(table) end + }, + "UserMapRoaring.contains_hit" => { + fn {table, target} -> UserMapRoaring.contains(table, target) end, + before_scenario: fn size -> + table = UserMapRoaring.populated(size) + target = div(size, 2) * 3 + {table, target} + end, + after_scenario: fn {table, _target} -> UserMapRoaring.destroy(table) end + } + }, + inputs: contains_hit_inputs, + title: "contains_hit", + print: [configuration: false] +) + +# --- contains_miss --- +# Rust: reuses one map, looks up u32::MAX (guaranteed miss). + +contains_miss_inputs = + for size <- sizes, into: %{} do + {"#{size}", size} + end + +miss_target = 4_294_967_295 + +Benchee.run( + %{ + "UserMapEts.contains_miss" => { + fn {table, target} -> UserMapEts.contains(table, target) end, + before_scenario: fn size -> + table = UserMapEts.populated(size) + {table, miss_target} + end, + after_scenario: fn {table, _target} -> UserMapEts.destroy(table) end + }, + "UserMapRoaring.contains_miss" => { + fn {table, target} -> UserMapRoaring.contains(table, target) end, + before_scenario: fn size -> + table = UserMapRoaring.populated(size) + {table, miss_target} + end, + after_scenario: fn {table, _target} -> UserMapRoaring.destroy(table) end + } + }, + inputs: contains_miss_inputs, + title: "contains_miss", + print: [configuration: false] +) + +# --- remove --- +# Table is populated once per size. The first iteration removes the target; +# subsequent iterations are no-op deletes (target already gone). This measures +# the scan cost through the bucket, which is the dominant cost either way. + +remove_inputs = + for size <- sizes, into: %{} do + {"#{size}", size} + end + +Benchee.run( + %{ + "UserMapEts.remove" => { + fn {table, target} -> UserMapEts.remove(table, target) end, + before_scenario: fn size -> + table = UserMapEts.populated(size) + target = div(size, 2) * 3 + {table, target} + end, + after_scenario: fn {table, _target} -> UserMapEts.destroy(table) end + }, + "UserMapRoaring.remove" => { + fn {table, target} -> UserMapRoaring.remove(table, target) end, + before_scenario: fn size -> + table = UserMapRoaring.populated(size) + target = div(size, 2) * 3 + {table, target} + end, + after_scenario: fn {table, _target} -> UserMapRoaring.destroy(table) end + } + }, + inputs: remove_inputs, + title: "remove", + print: [configuration: false] +) diff --git a/examples/footprint.rs b/examples/footprint.rs index d7d6e48..aa04c3a 100644 --- a/examples/footprint.rs +++ b/examples/footprint.rs @@ -104,6 +104,13 @@ fn print_user_scenarios() { avg_follows: 200, avg_blocks: 15, }, + UserScenario { + name: "Twitter-scale-high-follow (500M users)", + users: 500_000_000, + max_uid: 500_000_000, + avg_follows: 700, + avg_blocks: 15, + }, ]; println!("relationship graph\n");