File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1- """Your Round 1 solution — byte-pair histogram.
1+ """Your Round 1 solution — byte-pair histogram."""
22
3- **Edit this file.** It currently delegates to ``baseline.py`` so everything
4- passes out of the box. Replace the body of ``compute_histogram`` with your
5- own faster implementation.
6- """
3+ import numpy as np
4+ import mmap
75
86
97def compute_histogram (path : str ) -> dict [bytes , int ]:
108 """Frequency of every 2-byte bigram in the file at ``path``."""
119 with open (path , "rb" ) as f :
12- data = f .read ()
13- counts : dict [bytes , int ] = {}
14- bigrams_seen = set ()
15- for i in range (len (data ) - 1 ):
16- bigram = data [i : i + 2 ]
17- if bigram in bigrams_seen :
18- counts [bigram ] += 1
19- else :
20- bigrams_seen .add (bigram )
21- counts [bigram ] = 1
22- return counts
10+ with mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ ) as mm :
11+ # Copy while mmap is still open — no exported pointer issue
12+ data = np .frombuffer (mm , dtype = np .uint8 ).copy ()
13+ keys = data [:- 1 ].astype (np .uint16 ) << 8 | data [1 :].astype (np .uint16 )
14+ counts = np .bincount (keys , minlength = 65536 )
15+ return {bytes ([k >> 8 , k & 0xFF ]): int (counts [k ]) for k in np .nonzero (counts )[0 ]}
You can’t perform that action at this time.
0 commit comments