bitcrowd · xhr15 · May 21, 2026 · May 21, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -35,3 +35,7 @@ harness = false
 [[bench]]
 name = "timeline"
 harness = false
+
+[[bench]]
+name = "feed"
+harness = false
diff --git a/README.md b/README.md
@@ -78,3 +78,63 @@ Two `UserMap` instances (follows, blocks). `is_followed_by(p, t)` = `follows.con
 
 - **`examples/footprint.rs`** — memory footprint estimates from Bluesky-current to Twitter-scale
 - **`examples/timeline.rs`** — concurrent timeline demo with follows/blocks filtering
+
+## Benchmarks
+
+The crate includes both microbenchmarks (isolated operations on individual data structures) and a real-world benchmark (the full "render the feed" query path).
+
+### Microbenchmarks (Rust, via Criterion)
+
+```bash
+cargo bench --bench usermap   # add, contains_hit, contains_miss, remove on UserMap
+cargo bench --bench timeline  # iterate timelines with/without blob resolve, skip-half
+cargo bench --bench arena     # Hot→Cold conversion, Parquet import
+```
+
+These measure each operation in isolation across data sizes from 10 to 100,000 entries. Useful for understanding the building blocks but not the system as a whole.
+
+### ETS comparison (Elixir, via Benchee)
+
+A parallel set of benchmarks ports the `UserMap` operations to an Elixir/ETS implementation using a `:duplicate_bag` table — the natural choice for "many followers per user" in BEAM systems.
+
+```bash
+elixir benches/usermap_ets.exs
+```
+
+Requires Elixir 1.18+. Dependencies are fetched automatically via `Mix.install`.
+
+### Real-world feed benchmark
+
+The microbenchmarks expose primitives, not workloads. The `feed` benchmark measures the actual user-facing operation: **"render the feed for viewer V"** — fetch all posts by authors V follows within a 2-day window.
+
+Setup mirrors a busy small-to-medium instance:
+
+- 1,000 active authors, each with 5 posts in the window (5,000 posts total)
+- One viewer following F authors, where F ∈ {100, 500, 2,000}
+
+The Rust version iterates the timeline and filters per-post via `UserMap::contains` — leaning on its fast scan-and-filter:
+
+```rust
+let mut slice = timeline.iter(0, Order::Desc);
+while let Some(entry) = slice.next() {
+    if follows.contains(VIEWER, entry.uid) {
+        feed.push(entry.timestamp());
+    }
+}
+```
+
+The ETS version uses three `:duplicate_bag` tables (forward + reverse follow index, posts-by-author) and goes the other way — enumerate follows first, then fetch only relevant posts:
+
+```elixir
+:ets.lookup(rev, viewer)
+|> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end)
+```
+
+Run both:
+
+```bash
+cargo bench --bench feed       # Rust
+elixir benches/feed_ets.exs    # ETS
+```
+
+The interesting result: the 1,000x+ gap visible on isolated `contains` calls largely disappears in the realistic workload. At low follow counts the ETS approach is actually faster because it touches only the data it needs; at high follow counts Rust pulls ahead. See `benches/feed.rs` and `benches/feed_ets.exs` for the full setup.
diff --git a/benches/feed.rs b/benches/feed.rs
@@ -0,0 +1,73 @@
+use std::hint::black_box;
+use std::sync::Arc;
+
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+
+use crimeline::arena::{Cold, Hot};
+use crimeline::{Order, Sharding, Timeline, UserMap, Window};
+
+const AUTHORS: u32 = 1_000;
+const POSTS_PER_AUTHOR: usize = 5;
+const BLOB_SIZE: usize = 256;
+const FOLLOW_COUNTS: &[u32] = &[100, 500, 2_000];
+
+const VIEWER: u32 = 10_000;
+
+fn make_blob(i: usize) -> Vec<u8> {
+    vec![(i & 0xff) as u8; BLOB_SIZE]
+}
+
+fn build_timeline() -> Timeline {
+    let total_posts = AUTHORS as usize * POSTS_PER_AUTHOR;
+    let duration = (total_posts as u32 + 1) * 10;
+    let mut hot = Hot::new(Window::new(0, duration)).unwrap();
+
+    let mut cid: u64 = 0;
+    for author in 0..AUTHORS {
+        for p in 0..POSTS_PER_AUTHOR {
+            let ts = (author as u64 * POSTS_PER_AUTHOR as u64 + p as u64) * 10;
+            hot.add(author, cid, ts, &make_blob(cid as usize))
+                .unwrap();
+            cid += 1;
+        }
+    }
+
+    let cold: Arc<Cold> = hot.try_into().unwrap();
+    Timeline::new(vec![cold])
+}
+
+fn build_follows(n_follows: u32) -> UserMap {
+    let follows = UserMap::new(Sharding::S128);
+    for author in 0..n_follows.min(AUTHORS) {
+        follows.add(VIEWER, author);
+    }
+    follows
+}
+
+fn bench_feed(c: &mut Criterion) {
+    let mut group = c.benchmark_group("feed");
+    let timeline = build_timeline();
+
+    for &f in FOLLOW_COUNTS {
+        let follows = build_follows(f);
+
+        group.bench_with_input(BenchmarkId::from_parameter(f), &f, |b, _| {
+            b.iter(|| {
+                let mut slice = timeline.iter(0, Order::Desc);
+                let mut feed = Vec::new();
+                while let Some(entry) = slice.next() {
+                    if follows.contains(VIEWER, entry.uid) {
+                        feed.push(entry.timestamp());
+                    }
+                }
+                black_box(feed)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_feed);
+
+criterion_main!(benches);
diff --git a/benches/feed_ets.exs b/benches/feed_ets.exs
@@ -0,0 +1,106 @@
+Mix.install([{:benchee, "~> 1.0"},  {:roaring, "~> 0.13"} ])
+
+# Mirrors benches/feed.rs — "get timeline for viewer V" using three ETS tables.
+#
+# Run: elixir benches/feed_ets.exs
+
+defmodule FeedEts do
+  @authors 1_000
+  @posts_per_author 5
+  @viewer 10_000
+
+  def authors, do: @authors
+  def viewer, do: @viewer
+
+  def setup(n_follows) do
+    rev = :ets.new(:follows_rev, [:duplicate_bag, :public, {:read_concurrency, true}])
+    posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}])
+
+    rev_tuples = for author <- 0..(min(n_follows, @authors) - 1), do: {@viewer, author}
+    :ets.insert(rev, rev_tuples)
+
+    post_tuples =
+      for author <- 0..(@authors - 1),
+          p <- 0..(@posts_per_author - 1),
+          do: {author, author * @posts_per_author + p}
+
+    :ets.insert(posts, post_tuples)
+
+    {rev, posts}
+  end
+
+  def get_feed(rev, posts, viewer) do
+    :ets.lookup(rev, viewer)
+    |> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end)
+  end
+end
+
+defmodule FeedRoaring do
+  @authors 1_000
+  @posts_per_author 5
+  @viewer 10_000
+
+  def authors, do: @authors
+  def viewer, do: @viewer
+
+  def setup(n_follows) do
+    posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}])
+
+   # A single viewer as in this case simplifies the setup for roaring because we need a single bitmap.
+   # Generalizing to multiple viewers would require a bitmap per viewer and an additional lookup of
+   # the correct bitmap first.
+    follows = for author <- 0..(min(n_follows, @authors) - 1), do: author
+    {:ok, rev} = RoaringBitmap64.from_list(follows)
+
+    post_tuples =
+      for author <- 0..(@authors - 1),
+          p <- 0..(@posts_per_author - 1),
+          do: {author, author * @posts_per_author + p}
+
+    :ets.insert(posts, post_tuples)
+
+    {rev, posts}
+  end
+
+  def get_feed(rev, posts, _viewer) do
+    {:ok, authors} = RoaringBitmap64.to_list(rev)
+
+    Enum.flat_map(authors, fn author -> :ets.lookup(posts, author) end)
+  end
+end
+
+follow_counts = [100, 500, 2_000]
+
+inputs =
+  for f <- follow_counts, into: %{} do
+    {"#{f}", f}
+  end
+
+Benchee.run(
+  %{
+    "FeedEts.feed" => {
+      fn {rev, posts, viewer} -> FeedEts.get_feed(rev, posts, viewer) end,
+      before_scenario: fn n_follows ->
+        {rev, posts} = FeedEts.setup(n_follows)
+        {rev, posts, FeedEts.viewer()}
+      end,
+      after_scenario: fn {rev, posts, _viewer} ->
+        :ets.delete(rev)
+        :ets.delete(posts)
+      end
+    },
+    "FeedRoaring.feed" => {
+      fn {rev, posts, viewer} -> FeedRoaring.get_feed(rev, posts, viewer) end,
+      before_scenario: fn n_follows ->
+        {rev, posts} = FeedRoaring.setup(n_follows)
+        {rev, posts, FeedRoaring.viewer()}
+      end,
+      after_scenario: fn {_rev, posts, _viewer} ->
+        :ets.delete(posts)
+      end
+    }
+  },
+  inputs: inputs,
+  title: "feed",
+  print: [configuration: false]
+)