Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ harness = false
[[bench]]
name = "timeline"
harness = false

[[bench]]
name = "feed"
harness = false
60 changes: 60 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,63 @@ Two `UserMap` instances (follows, blocks). `is_followed_by(p, t)` = `follows.con

- **`examples/footprint.rs`** — memory footprint estimates from Bluesky-current to Twitter-scale
- **`examples/timeline.rs`** — concurrent timeline demo with follows/blocks filtering

## Benchmarks

The crate includes both microbenchmarks (isolated operations on individual data structures) and a real-world benchmark (the full "render the feed" query path).

### Microbenchmarks (Rust, via Criterion)

```bash
cargo bench --bench usermap # add, contains_hit, contains_miss, remove on UserMap
cargo bench --bench timeline # iterate timelines with/without blob resolve, skip-half
cargo bench --bench arena # Hot→Cold conversion, Parquet import
```

These measure each operation in isolation across data sizes from 10 to 100,000 entries. Useful for understanding the building blocks but not the system as a whole.

### ETS comparison (Elixir, via Benchee)

A parallel set of benchmarks ports the `UserMap` operations to an Elixir/ETS implementation using a `:duplicate_bag` table — the natural choice for "many followers per user" in BEAM systems.

```bash
elixir benches/usermap_ets.exs
```

Requires Elixir 1.18+. Dependencies are fetched automatically via `Mix.install`.

### Real-world feed benchmark

The microbenchmarks expose primitives, not workloads. The `feed` benchmark measures the actual user-facing operation: **"render the feed for viewer V"** — fetch all posts by authors V follows within a 2-day window.

Setup mirrors a busy small-to-medium instance:

- 1,000 active authors, each with 5 posts in the window (5,000 posts total)
- One viewer following F authors, where F ∈ {100, 500, 2,000}

The Rust version iterates the timeline and filters per-post via `UserMap::contains` — leaning on its fast scan-and-filter:

```rust
let mut slice = timeline.iter(0, Order::Desc);
while let Some(entry) = slice.next() {
if follows.contains(VIEWER, entry.uid) {
feed.push(entry.timestamp());
}
}
```

The ETS version uses three `:duplicate_bag` tables (forward + reverse follow index, posts-by-author) and goes the other way — enumerate follows first, then fetch only relevant posts:

```elixir
:ets.lookup(rev, viewer)
|> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end)
```

Run both:

```bash
cargo bench --bench feed # Rust
elixir benches/feed_ets.exs # ETS
```

The interesting result: the 1,000x+ gap visible on isolated `contains` calls largely disappears in the realistic workload. At low follow counts the ETS approach is actually faster because it touches only the data it needs; at high follow counts Rust pulls ahead. See `benches/feed.rs` and `benches/feed_ets.exs` for the full setup.
73 changes: 73 additions & 0 deletions benches/feed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use std::hint::black_box;
use std::sync::Arc;

use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};

use crimeline::arena::{Cold, Hot};
use crimeline::{Order, Sharding, Timeline, UserMap, Window};

const AUTHORS: u32 = 1_000;
const POSTS_PER_AUTHOR: usize = 5;
const BLOB_SIZE: usize = 256;
const FOLLOW_COUNTS: &[u32] = &[100, 500, 2_000];

const VIEWER: u32 = 10_000;

fn make_blob(i: usize) -> Vec<u8> {
vec![(i & 0xff) as u8; BLOB_SIZE]
}

fn build_timeline() -> Timeline {
let total_posts = AUTHORS as usize * POSTS_PER_AUTHOR;
let duration = (total_posts as u32 + 1) * 10;
let mut hot = Hot::new(Window::new(0, duration)).unwrap();

let mut cid: u64 = 0;
for author in 0..AUTHORS {
for p in 0..POSTS_PER_AUTHOR {
let ts = (author as u64 * POSTS_PER_AUTHOR as u64 + p as u64) * 10;
hot.add(author, cid, ts, &make_blob(cid as usize))
.unwrap();
cid += 1;
}
}

let cold: Arc<Cold> = hot.try_into().unwrap();
Timeline::new(vec![cold])
}

fn build_follows(n_follows: u32) -> UserMap {
let follows = UserMap::new(Sharding::S128);
for author in 0..n_follows.min(AUTHORS) {
follows.add(VIEWER, author);
}
follows
}

fn bench_feed(c: &mut Criterion) {
let mut group = c.benchmark_group("feed");
let timeline = build_timeline();

for &f in FOLLOW_COUNTS {
let follows = build_follows(f);

group.bench_with_input(BenchmarkId::from_parameter(f), &f, |b, _| {
b.iter(|| {
let mut slice = timeline.iter(0, Order::Desc);
let mut feed = Vec::new();
while let Some(entry) = slice.next() {
if follows.contains(VIEWER, entry.uid) {
feed.push(entry.timestamp());
}
}
black_box(feed)
});
});
}

group.finish();
}

criterion_group!(benches, bench_feed);

criterion_main!(benches);
106 changes: 106 additions & 0 deletions benches/feed_ets.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Mix.install([{:benchee, "~> 1.0"}, {:roaring, "~> 0.13"} ])

# Mirrors benches/feed.rs — "get timeline for viewer V" using three ETS tables.
#
# Run: elixir benches/feed_ets.exs

defmodule FeedEts do
@authors 1_000
@posts_per_author 5
@viewer 10_000

def authors, do: @authors
def viewer, do: @viewer

def setup(n_follows) do
rev = :ets.new(:follows_rev, [:duplicate_bag, :public, {:read_concurrency, true}])
posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}])

rev_tuples = for author <- 0..(min(n_follows, @authors) - 1), do: {@viewer, author}
:ets.insert(rev, rev_tuples)

post_tuples =
for author <- 0..(@authors - 1),
p <- 0..(@posts_per_author - 1),
do: {author, author * @posts_per_author + p}

:ets.insert(posts, post_tuples)

{rev, posts}
end

def get_feed(rev, posts, viewer) do
:ets.lookup(rev, viewer)
|> Enum.flat_map(fn {_v, author} -> :ets.lookup(posts, author) end)
end
end

defmodule FeedRoaring do
@authors 1_000
@posts_per_author 5
@viewer 10_000

def authors, do: @authors
def viewer, do: @viewer

def setup(n_follows) do
posts = :ets.new(:posts, [:duplicate_bag, :public, {:read_concurrency, true}])

# A single viewer as in this case simplifies the setup for roaring because we need a single bitmap.
# Generalizing to multiple viewers would require a bitmap per viewer and an additional lookup of
# the correct bitmap first.
follows = for author <- 0..(min(n_follows, @authors) - 1), do: author
{:ok, rev} = RoaringBitmap64.from_list(follows)

post_tuples =
for author <- 0..(@authors - 1),
p <- 0..(@posts_per_author - 1),
do: {author, author * @posts_per_author + p}

:ets.insert(posts, post_tuples)

{rev, posts}
end

def get_feed(rev, posts, _viewer) do
{:ok, authors} = RoaringBitmap64.to_list(rev)

Enum.flat_map(authors, fn author -> :ets.lookup(posts, author) end)
end
end

follow_counts = [100, 500, 2_000]

inputs =
for f <- follow_counts, into: %{} do
{"#{f}", f}
end

Benchee.run(
%{
"FeedEts.feed" => {
fn {rev, posts, viewer} -> FeedEts.get_feed(rev, posts, viewer) end,
before_scenario: fn n_follows ->
{rev, posts} = FeedEts.setup(n_follows)
{rev, posts, FeedEts.viewer()}
end,
after_scenario: fn {rev, posts, _viewer} ->
:ets.delete(rev)
:ets.delete(posts)
end
},
"FeedRoaring.feed" => {
fn {rev, posts, viewer} -> FeedRoaring.get_feed(rev, posts, viewer) end,
before_scenario: fn n_follows ->
{rev, posts} = FeedRoaring.setup(n_follows)
{rev, posts, FeedRoaring.viewer()}
end,
after_scenario: fn {_rev, posts, _viewer} ->
:ets.delete(posts)
end
}
},
inputs: inputs,
title: "feed",
print: [configuration: false]
)
Loading