Skip to content

Commit 3617b63

Browse files
buzhash64: use own CSPRNG
1 parent bb7a464 commit 3617b63

4 files changed

Lines changed: 21 additions & 18 deletions

File tree

src/borg/chunkers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ def get_chunker(algo, *params, **kw):
1313
# key.chunk_seed only has 32bits
1414
seed = key.chunk_seed if key is not None else 0
1515
# for buzhash64, we want a much longer key, so we derive it from the id key
16-
bh64_key = key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b""
16+
bh64_key = (
17+
key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b"\0" * 32
18+
)
1719
if algo == "buzhash":
1820
return Chunker(seed, *params, sparse=sparse)
1921
if algo == "buzhash64":

src/borg/chunkers/buzhash64.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
API_VERSION = '1.2_01'
44

55
import cython
6-
import random
76
import time
87

98
from cpython.bytes cimport PyBytes_AsString
109
from libc.stdint cimport uint8_t, uint64_t
1110
from libc.stdlib cimport malloc, free
1211
from libc.string cimport memcpy, memmove
1312

13+
from ..crypto.low_level import CSPRNG
14+
1415
from ..constants import CH_DATA, CH_ALLOC, CH_HOLE, zeros
1516
from .reader import FileReader, Chunk
1617

@@ -45,7 +46,7 @@ cdef uint64_t* buzhash64_init_table(bytes key):
4546
Balanced means that for each bit position 0..63, exactly 50% of the table values have the bit set to 1.
4647
"""
4748
# Create deterministic random number generator
48-
rng = random.Random(int.from_bytes(key, 'big'))
49+
rng = CSPRNG(key)
4950

5051
cdef int i, j, bit_pos
5152
cdef uint64_t* table = <uint64_t*>malloc(2048) # 256 * sizeof(uint64_t)

src/borg/testsuite/chunkers/buzhash64_self_test.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,50 +25,50 @@ def test_chunkify64(self):
2525
self.assert_equal(cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b""))), [])
2626
self.assert_equal(
2727
cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
28-
[b"foobarboobaz", b"foobarboobaz", b"foobarboobaz"],
28+
[b"foobarb", b"ooba", b"zf", b"oobarb", b"ooba", b"zf", b"oobarb", b"oobaz"],
2929
)
3030
self.assert_equal(
3131
cf(ChunkerBuzHash64(key1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
32-
[b"foobar", b"boob", b"az", b"foobar", b"boob", b"az", b"foobar", b"boobaz"],
32+
[b"fo", b"oba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oobaz"],
3333
)
3434
self.assert_equal(
3535
cf(ChunkerBuzHash64(key2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
36-
[b"foobarb", b"oob", b"az", b"foobarb", b"oob", b"az", b"foobarb", b"oobaz"],
36+
[b"foobar", b"booba", b"zfoobar", b"booba", b"zfoobar", b"boobaz"],
3737
)
3838
self.assert_equal(
3939
cf(ChunkerBuzHash64(key0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
40-
[b"foobarb", b"oobazf", b"oobarb", b"oobazf", b"oobarb", b"oobaz"],
40+
[b"foobarbo", b"obaz", b"foobarbo", b"obaz", b"foobarbo", b"obaz"],
4141
)
4242
self.assert_equal(
4343
cf(ChunkerBuzHash64(key1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
44-
[b"foobarb", b"oobaz", b"foobarb", b"oobaz", b"foobarb", b"oobaz"],
44+
[b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
4545
)
4646
self.assert_equal(
4747
cf(ChunkerBuzHash64(key2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
48-
[b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
48+
[b"foob", b"arboobazfoob", b"arboobazfoob", b"arboobaz"],
4949
)
5050
self.assert_equal(
5151
cf(ChunkerBuzHash64(key0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
52-
[b"foobarboobazf", b"oobarboobazf", b"oobarboobaz"],
52+
[b"foobarbo", b"obazfoobarbo", b"obazfoobarbo", b"obaz"],
5353
)
5454
self.assert_equal(
5555
cf(ChunkerBuzHash64(key1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
56-
[b"foobarbo", b"obazfoobarb", b"oobazfoobarb", b"oobaz"],
56+
[b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
5757
)
5858
self.assert_equal(
5959
cf(ChunkerBuzHash64(key2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
60-
[b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
60+
[b"foobarboobazfoob", b"arboobazfoob", b"arboobaz"],
6161
)
6262

6363
def test_buzhash64(self):
64-
self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 15080163834872228739)
65-
self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 9505908538285923444)
64+
self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 17414563089559790077)
65+
self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 1397285894609271345)
6666
expected = buzhash64(b"abcdefghijklmnop", key0)
6767
previous = buzhash64(b"Xabcdefghijklmno", key0)
6868
this = buzhash64_update(previous, ord("X"), ord("p"), 16, key0)
6969
self.assert_equal(this, expected)
7070
# Test with more than 63 bytes to make sure our barrel_shift macro works correctly
71-
self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 1936382207158378368)
71+
self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 17683050804041322250)
7272

7373
def test_small_reads64(self):
7474
class SmallReadFile:

src/borg/testsuite/chunkers/buzhash64_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def twist(size):
3636
if minexp >= maxexp:
3737
continue
3838
for maskbits in (4, 7, 10, 12):
39-
for key in (b"first_key", b"second_key"):
39+
for key in (key0, key1):
4040
fh = BytesIO(data)
4141
chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize)
4242
chunks = [H(c) for c in cf(chunker.chunkify(fh, -1))]
@@ -46,13 +46,13 @@ def twist(size):
4646
# Future chunker optimisations must not change this, or existing repos will bloat.
4747
overall_hash = H(b"".join(runs))
4848
print(overall_hash.hex())
49-
assert overall_hash == hex_to_bin("db4b37fbe0cb841d79cfbb52bff8ac2f11040bf83a7d389640c7afb314fc4bfb")
49+
assert overall_hash == hex_to_bin("676676133fb3621ada0f6cc1b18002c3e37016c9469217d18f8e382fadaf23fd")
5050

5151

5252
def test_buzhash64_chunksize_distribution():
5353
data = os.urandom(1048576)
5454
min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16kiB, clip at 1kiB and 64kiB
55-
chunker = ChunkerBuzHash64(b"", min_exp, max_exp, mask, 4095)
55+
chunker = ChunkerBuzHash64(key0, min_exp, max_exp, mask, 4095)
5656
f = BytesIO(data)
5757
chunks = cf(chunker.chunkify(f))
5858
del chunks[-1] # get rid of the last chunk, it can be smaller than 2**min_exp

0 commit comments

Comments
 (0)