Skip to content

Commit 5dc4655

Browse files
committed
iteration one
Signed-off-by: Drew Wock <dwock@esri.com>
1 parent 1842678 commit 5dc4655

1 file changed

Lines changed: 22 additions & 2 deletions

File tree

rounds/1_histogram/solution.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,30 @@
55
own faster implementation.
66
"""
77

8+
from concurrent.futures import ProcessPoolExecutor
9+
import os
10+
11+
CHUNK_SIZE = 8 * 1024 * 1024
812

913
def compute_histogram(path: str) -> dict[bytes, int]:
1014
"""Frequency of every 2-byte bigram in the file at ``path``."""
1115
# TODO: remove this delegation and write your own implementation here.
12-
from .baseline import compute_histogram as _baseline
16+
with open(path, "rb") as f:
17+
data = f.read()
1318

14-
return _baseline(path)
19+
counts: list[int] = [0] * 65536
20+
if len(data) == 0:
21+
return {}
22+
data_iter = iter(data)
23+
window_idx = next(data_iter)
24+
for b in data_iter:
25+
window_idx <<= 8
26+
window_idx &= 0xff00
27+
window_idx |= b
28+
counts[window_idx] += 1
29+
d = {}
30+
for i,cnt in enumerate(counts):
31+
if counts[i] != 0:
32+
b = i.to_bytes(2, byteorder="big")
33+
d[b] = cnt
34+
return d

0 commit comments

Comments
 (0)