Skip to content

Commit a02fce2

Browse files
committed
Round 1 fixes
1 parent c0ae909 commit a02fce2

1 file changed

Lines changed: 9 additions & 16 deletions

File tree

rounds/1_histogram/solution.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,15 @@
1-
"""Your Round 1 solution — byte-pair histogram.
1+
"""Your Round 1 solution — byte-pair histogram."""
22

3-
**Edit this file.** It currently delegates to ``baseline.py`` so everything
4-
passes out of the box. Replace the body of ``compute_histogram`` with your
5-
own faster implementation.
6-
"""
3+
import numpy as np
4+
import mmap
75

86

97
def compute_histogram(path: str) -> dict[bytes, int]:
108
"""Frequency of every 2-byte bigram in the file at ``path``."""
119
with open(path, "rb") as f:
12-
data = f.read()
13-
counts: dict[bytes, int] = {}
14-
bigrams_seen = set()
15-
for i in range(len(data) - 1):
16-
bigram = data[i : i + 2]
17-
if bigram in bigrams_seen:
18-
counts[bigram] += 1
19-
else:
20-
bigrams_seen.add(bigram)
21-
counts[bigram] = 1
22-
return counts
10+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
11+
# Copy while mmap is still open — no exported pointer issue
12+
data = np.frombuffer(mm, dtype=np.uint8).copy()
13+
keys = data[:-1].astype(np.uint16) << 8 | data[1:].astype(np.uint16)
14+
counts = np.bincount(keys, minlength=65536)
15+
return {bytes([k >> 8, k & 0xFF]): int(counts[k]) for k in np.nonzero(counts)[0]}

0 commit comments

Comments
 (0)