From 6267da8640f67e45ba56d1886f4e7b57a3a72541 Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Wed, 13 May 2026 09:24:44 -0700 Subject: [PATCH 1/5] Add Stefanie to the README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 44e0723..11a7d43 100644 --- a/README.md +++ b/README.md @@ -91,3 +91,4 @@ scripts/ ``` Each round's `data/` directory is generated locally and gitignored. +This is Stefanie's PR From 1ea779ca37edb9d095267947fac97ad3e1aad51f Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Wed, 13 May 2026 09:52:19 -0700 Subject: [PATCH 2/5] Use deque and Counter to speed up --- rounds/1_histogram/baseline.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/rounds/1_histogram/baseline.py b/rounds/1_histogram/baseline.py index 81982fa..fd416e8 100644 --- a/rounds/1_histogram/baseline.py +++ b/rounds/1_histogram/baseline.py @@ -4,21 +4,10 @@ tokens) in a binary payload. """ +from collections import Counter, deque +from pathlib import Path + def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # Step 1: read the whole file into memory as a single bytes object. - with open(path, "rb") as f: - data = f.read() - - # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the - # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window, - # bump the matching bucket in a ``dict`` keyed by the bigram itself. - counts: dict[bytes, int] = {} - for i in range(len(data) - 1): - bigram = data[i : i + 2] - if bigram in counts: - counts[bigram] += 1 - else: - counts[bigram] = 1 - return counts + return Counter(deque(Path(path).read_bytes(), maxlen=2)) From 02d44c8d9bf7f208f5236ecf2698fe90bcee1f6b Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Wed, 13 May 2026 09:58:59 -0700 Subject: [PATCH 3/5] Use open() --- rounds/1_histogram/baseline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rounds/1_histogram/baseline.py b/rounds/1_histogram/baseline.py index fd416e8..1e55b89 100644 --- a/rounds/1_histogram/baseline.py +++ b/rounds/1_histogram/baseline.py @@ -5,9 +5,10 @@ """ from collections import Counter, deque -from pathlib import Path def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - return Counter(deque(Path(path).read_bytes(), maxlen=2)) + with open(path, "rb") as f: + data = f.read() + return Counter(deque(data, maxlen=2)) From 9d017180c421ef2a9366e609b0e094ff9728b7ac Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Wed, 13 May 2026 10:08:22 -0700 Subject: [PATCH 4/5] Counter --- rounds/1_histogram/baseline.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rounds/1_histogram/baseline.py b/rounds/1_histogram/baseline.py index 1e55b89..3601f0c 100644 --- a/rounds/1_histogram/baseline.py +++ b/rounds/1_histogram/baseline.py @@ -4,11 +4,13 @@ tokens) in a binary payload. """ -from collections import Counter, deque +from collections import Counter +from itertools import pairwise def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" with open(path, "rb") as f: data = f.read() - return Counter(deque(data, maxlen=2)) + + return Counter(bytes(bigram) for bigram in pairwise(data)) From 1a21fe6a548f19c0cd09c9175441cf2738bf4b5d Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Wed, 13 May 2026 10:20:21 -0700 Subject: [PATCH 5/5] Fix file --- rounds/1_histogram/baseline.py | 16 ++++++++++++---- rounds/1_histogram/solution.py | 9 +++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/rounds/1_histogram/baseline.py b/rounds/1_histogram/baseline.py index 3601f0c..81982fa 100644 --- a/rounds/1_histogram/baseline.py +++ b/rounds/1_histogram/baseline.py @@ -4,13 +4,21 @@ tokens) in a binary payload. """ -from collections import Counter -from itertools import pairwise - def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" + # Step 1: read the whole file into memory as a single bytes object. with open(path, "rb") as f: data = f.read() - return Counter(bytes(bigram) for bigram in pairwise(data)) + # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the + # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window, + # bump the matching bucket in a ``dict`` keyed by the bigram itself. + counts: dict[bytes, int] = {} + for i in range(len(data) - 1): + bigram = data[i : i + 2] + if bigram in counts: + counts[bigram] += 1 + else: + counts[bigram] = 1 + return counts diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py index dffbee5..987c315 100644 --- a/rounds/1_histogram/solution.py +++ b/rounds/1_histogram/solution.py @@ -5,10 +5,11 @@ own faster implementation. """ +from collections import Counter +from itertools import pairwise +from pathlib import Path + def compute_histogram(path: str) -> dict[bytes, int]: """Frequency of every 2-byte bigram in the file at ``path``.""" - # TODO: remove this delegation and write your own implementation here. - from .baseline import compute_histogram as _baseline - - return _baseline(path) + return Counter(bytes(bigram) for bigram in pairwise(Path(path).read_bytes()))