Add solutions for 1 and 2

EmilyBZhang · EmilyBZhang · commit a431308b37d0 · 2026-05-13T12:08:20.000-07:00
diff --git a/rounds/1_histogram/solution.py b/rounds/1_histogram/solution.py
@@ -8,7 +8,10 @@
 
 def compute_histogram(path: str) -> dict[bytes, int]:
     """Frequency of every 2-byte bigram in the file at ``path``."""
-    # TODO: remove this delegation and write your own implementation here.
-    from .baseline import compute_histogram as _baseline
+    with open(path, "rb") as f:
+        data = f.read()
 
-    return _baseline(path)
+    counts = [[0] * 256 for _ in range(256)]
+    for i in range(len(data) - 1):
+        counts[data[i]][data[i + 1]] += 1
+    return {bytes((i, j)): counts[i][j] for i in range(256) for j in range(256)}
diff --git a/rounds/2_corruption/solution.py b/rounds/2_corruption/solution.py
@@ -5,10 +5,57 @@
 own faster implementation.
 """
 
-from .baseline import find_corruptions as _baseline
+from concurrent.futures import ThreadPoolExecutor
+
+
+def compare_bytes(ref: bytes, cor: bytes, offset: int) -> list[tuple[int, int]]:
+    ranges: list[tuple[int, int]] = []
+    start: int | None = None
+    for i in range(len(ref)):
+        if ref[i] != cor[i]:
+            if start is None:
+                start = i
+        elif start is not None:
+            ranges.append((start + offset, i - start))
+            start = None
+    if start is not None:
+        ranges.append((start + offset, len(ref) - start))
+    return ranges
 
 
 def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
     """Return ``[(offset, length), ...]`` for every differing byte range."""
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(ref_path, cor_path)
+    # Step 1: read both files fully into memory as bytes objects.
+    with open(ref_path, "rb") as f:
+        ref = f.read()
+    with open(cor_path, "rb") as f:
+        cor = f.read()
+    if len(ref) != len(cor):
+        raise ValueError("reference and corrupted files differ in length")
+
+    N = 16
+    chunk_size = len(ref) // N
+
+    with ThreadPoolExecutor(N) as ex:
+        futures = [
+            ex.submit(
+                compare_bytes,
+                ref[i * chunk_size : (i + 1) * chunk_size],
+                cor[i * chunk_size : (i + 1) * chunk_size],
+                i * chunk_size,
+            )
+            for i in range(N)
+        ]
+        results = [future.result() for future in futures]
+
+    ranges: list[tuple[int, int]] = []
+    for result in results:
+        if not result:
+            continue
+        if ranges and result[0][0] == ranges[-1][0] + ranges[-1][1]:
+            ranges[-1] = (ranges[-1][0], ranges[-1][1] + result[0][1])
+            ranges.extend(result[1:])
+        else:
+            ranges.extend(result)
+
+    return ranges