Skip to content

Commit a431308

Browse files
committed
Add solutions for 1 and 2
1 parent 8158213 commit a431308

2 files changed

Lines changed: 56 additions & 6 deletions

File tree

rounds/1_histogram/solution.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
def compute_histogram(path: str) -> dict[bytes, int]:
1010
"""Frequency of every 2-byte bigram in the file at ``path``."""
11-
# TODO: remove this delegation and write your own implementation here.
12-
from .baseline import compute_histogram as _baseline
11+
with open(path, "rb") as f:
12+
data = f.read()
1313

14-
return _baseline(path)
14+
counts = [[0] * 256 for _ in range(256)]
15+
for i in range(len(data) - 1):
16+
counts[data[i]][data[i + 1]] += 1
17+
return {bytes((i, j)): counts[i][j] for i in range(256) for j in range(256)}

rounds/2_corruption/solution.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,57 @@
55
own faster implementation.
66
"""
77

8-
from .baseline import find_corruptions as _baseline
8+
from concurrent.futures import ThreadPoolExecutor
9+
10+
11+
def compare_bytes(ref: bytes, cor: bytes, offset: int) -> list[tuple[int, int]]:
12+
ranges: list[tuple[int, int]] = []
13+
start: int | None = None
14+
for i in range(len(ref)):
15+
if ref[i] != cor[i]:
16+
if start is None:
17+
start = i
18+
elif start is not None:
19+
ranges.append((start + offset, i - start))
20+
start = None
21+
if start is not None:
22+
ranges.append((start + offset, len(ref) - start))
23+
return ranges
924

1025

1126
def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
1227
"""Return ``[(offset, length), ...]`` for every differing byte range."""
13-
# TODO: remove this delegation and write your own implementation here.
14-
return _baseline(ref_path, cor_path)
28+
# Step 1: read both files fully into memory as bytes objects.
29+
with open(ref_path, "rb") as f:
30+
ref = f.read()
31+
with open(cor_path, "rb") as f:
32+
cor = f.read()
33+
if len(ref) != len(cor):
34+
raise ValueError("reference and corrupted files differ in length")
35+
36+
N = 16
37+
chunk_size = len(ref) // N
38+
39+
with ThreadPoolExecutor(N) as ex:
40+
futures = [
41+
ex.submit(
42+
compare_bytes,
43+
ref[i * chunk_size : (i + 1) * chunk_size],
44+
cor[i * chunk_size : (i + 1) * chunk_size],
45+
i * chunk_size,
46+
)
47+
for i in range(N)
48+
]
49+
results = [future.result() for future in futures]
50+
51+
ranges: list[tuple[int, int]] = []
52+
for result in results:
53+
if not result:
54+
continue
55+
if ranges and result[0][0] == ranges[-1][0] + ranges[-1][1]:
56+
ranges[-1] = (ranges[-1][0], ranges[-1][1] + result[0][1])
57+
ranges.extend(result[1:])
58+
else:
59+
ranges.extend(result)
60+
61+
return ranges

0 commit comments

Comments
 (0)