File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 44tokens) in a binary payload.
55"""
66
7+ from collections import Counter , deque
8+ from pathlib import Path
9+
710
811def compute_histogram (path : str ) -> dict [bytes , int ]:
912 """Frequency of every 2-byte bigram in the file at ``path``."""
10- # Step 1: read the whole file into memory as a single bytes object.
11- with open (path , "rb" ) as f :
12- data = f .read ()
13-
14- # Step 2: slide a 2-byte window across the buffer. For ``b"ABCD"`` the
15- # iterations produce ``b"AB"``, ``b"BC"``, then ``b"CD"``. For each window,
16- # bump the matching bucket in a ``dict`` keyed by the bigram itself.
17- counts : dict [bytes , int ] = {}
18- for i in range (len (data ) - 1 ):
19- bigram = data [i : i + 2 ]
20- if bigram in counts :
21- counts [bigram ] += 1
22- else :
23- counts [bigram ] = 1
24- return counts
13+ return Counter (deque (Path (path ).read_bytes (), maxlen = 2 ))
You can’t perform that action at this time.
0 commit comments