Skip to content

Commit bb7a464

Browse files
add own CSPRNG based on AES256-CTR and a 256bit key/seed.
the stuff in Python stdlib "random.Random" is not cryptographically strong and the stuff in Python stdlib "secrets" can't be seeded and does not offer shuffle.
1 parent 17a5326 commit bb7a464

2 files changed

Lines changed: 345 additions & 0 deletions

File tree

src/borg/crypto/low_level.pyx

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ from math import ceil
4040

4141
from cpython cimport PyMem_Malloc, PyMem_Free
4242
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
43+
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString
44+
from libc.stdlib cimport malloc, free
45+
from libc.stdint cimport uint8_t, uint32_t, uint64_t
46+
from libc.string cimport memset, memcpy
4347

4448
API_VERSION = '1.3_01'
4549

@@ -714,3 +718,161 @@ def blake2b_256(key, data):
714718

715719
def blake2b_128(data):
716720
return hashlib.blake2b(data, digest_size=16).digest()
721+
722+
723+
cdef class CSPRNG:
724+
"""
725+
Cryptographically Secure Pseudo-Random Number Generator based on AES-CTR mode.
726+
727+
This class provides methods for generating random bytes and shuffling lists
728+
using a deterministic algorithm seeded with a 256-bit key.
729+
730+
The implementation uses AES-256 in CTR mode, which is a well-established
731+
method for creating a CSPRNG.
732+
"""
733+
cdef EVP_CIPHER_CTX *ctx
734+
cdef uint8_t key[32]
735+
cdef uint8_t iv[16]
736+
cdef uint8_t zeros[4096] # Static buffer for zeros
737+
cdef uint8_t buffer[4096] # Static buffer for random bytes
738+
cdef size_t buffer_size
739+
cdef size_t buffer_pos
740+
741+
def __cinit__(self, bytes seed_key):
742+
"""
743+
Initialize the CSPRNG with a 256-bit key.
744+
745+
:param seed_key: A 32-byte key used as the seed for the CSPRNG
746+
"""
747+
if len(seed_key) != 32:
748+
raise ValueError("Seed key must be 32 bytes (256 bits)")
749+
750+
# Initialize context
751+
self.ctx = EVP_CIPHER_CTX_new()
752+
if self.ctx == NULL:
753+
raise MemoryError("Failed to allocate cipher context")
754+
755+
self.key = seed_key[:32]
756+
757+
# Initialize to zeros
758+
memset(self.iv, 0, 16)
759+
memset(self.zeros, 0, 4096)
760+
761+
self.buffer_size = 4096
762+
self.buffer_pos = self.buffer_size # Force refill on first use
763+
764+
# Initialize the cipher
765+
if not EVP_EncryptInit_ex(self.ctx, EVP_aes_256_ctr(), NULL, self.key, self.iv):
766+
EVP_CIPHER_CTX_free(self.ctx)
767+
raise CryptoError("Failed to initialize AES-CTR cipher")
768+
769+
def __dealloc__(self):
770+
"""Free resources when the object is deallocated."""
771+
if self.ctx != NULL:
772+
EVP_CIPHER_CTX_free(self.ctx)
773+
self.ctx = NULL
774+
775+
cdef _refill_buffer(self):
776+
"""Refill the internal buffer with random bytes."""
777+
cdef int outlen = 0
778+
779+
# Encrypt zeros to get random bytes
780+
if not EVP_EncryptUpdate(self.ctx, self.buffer, &outlen, self.zeros, self.buffer_size):
781+
raise CryptoError("Failed to generate random bytes")
782+
if outlen != self.buffer_size:
783+
raise CryptoError("Unexpected length of random bytes")
784+
785+
self.buffer_pos = 0
786+
787+
def random_bytes(self, size_t n):
788+
"""
789+
Generate n random bytes.
790+
791+
:param n: Number of bytes to generate
792+
:return: a bytes object containing the random bytes
793+
"""
794+
# Directly create a Python bytes object of the required size
795+
cdef object py_bytes = PyBytes_FromStringAndSize(NULL, n)
796+
cdef uint8_t *result = <uint8_t *>PyBytes_AsString(py_bytes)
797+
cdef size_t remaining
798+
cdef size_t pos
799+
cdef size_t to_copy
800+
cdef size_t available
801+
802+
remaining = n
803+
pos = 0
804+
805+
while remaining > 0:
806+
if self.buffer_pos >= self.buffer_size:
807+
self._refill_buffer()
808+
809+
# Calculate how many bytes we can copy
810+
available = self.buffer_size - self.buffer_pos
811+
to_copy = remaining if remaining < available else available
812+
813+
# Copy bytes from buffer to result
814+
memcpy(result + pos, &self.buffer[self.buffer_pos], to_copy)
815+
816+
self.buffer_pos += to_copy
817+
pos += to_copy
818+
remaining -= to_copy
819+
820+
return py_bytes
821+
822+
def random_int(self, n):
823+
"""
824+
Generate a random integer in the range [0, n).
825+
826+
:param n: Upper bound (exclusive)
827+
:return: Random integer
828+
"""
829+
if n <= 0:
830+
raise ValueError("Upper bound must be positive")
831+
if n == 1:
832+
return 0
833+
834+
# Calculate the number of bits and bytes needed
835+
bits_needed = 0
836+
temp = n - 1
837+
while temp > 0:
838+
bits_needed += 1
839+
temp >>= 1
840+
bytes_needed = (bits_needed + 7) // 8
841+
842+
# Generate random bytes
843+
mask = (1 << bits_needed) - 1
844+
max_attempts = 1000 # Prevent infinite loop
845+
846+
# Rejection sampling to avoid bias
847+
attempts = 0
848+
while attempts < max_attempts:
849+
attempts += 1
850+
random_data = self.random_bytes(bytes_needed)
851+
result = int.from_bytes(random_data, byteorder='big')
852+
853+
# Apply mask to get the right number of bits
854+
result &= mask
855+
if result < n:
856+
return result
857+
858+
# If we reach here, we've made too many attempts
859+
# Fall back to a slightly biased but guaranteed-to-terminate method
860+
random_data = self.random_bytes(bytes_needed)
861+
result = int.from_bytes(random_data, byteorder='big')
862+
return result % n
863+
864+
def shuffle(self, list items):
865+
"""
866+
Shuffle a list in-place using the Fisher-Yates algorithm.
867+
868+
:param items: List to shuffle
869+
"""
870+
cdef size_t n = len(items)
871+
cdef size_t i, j
872+
873+
for i in range(n - 1, 0, -1):
874+
# Generate random index j such that 0 <= j <= i
875+
j = self.random_int(i + 1)
876+
877+
# Swap items[i] and items[j]
878+
items[i], items[j] = items[j], items[i]
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import pytest
2+
3+
from ...crypto.low_level import CSPRNG
4+
5+
6+
# Test keys (32 bytes each)
7+
key1 = bytes.fromhex("0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef")
8+
key2 = bytes.fromhex("fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210")
9+
10+
11+
def test_deterministic_output():
12+
"""Test that the same key produces the same random sequence."""
13+
# Create two CSPRNGs with the same key
14+
rng1 = CSPRNG(key1)
15+
rng2 = CSPRNG(key1)
16+
17+
# Generate random bytes from both
18+
bytes1 = rng1.random_bytes(100)
19+
bytes2 = rng2.random_bytes(100)
20+
21+
# They should be identical
22+
assert bytes1 == bytes2
23+
24+
# Different keys should produce different outputs
25+
rng3 = CSPRNG(key2)
26+
bytes3 = rng3.random_bytes(100)
27+
assert bytes1 != bytes3
28+
29+
30+
def test_random_bytes():
31+
"""Test the random_bytes method."""
32+
rng = CSPRNG(key1)
33+
34+
# Test different sizes
35+
for size in [1, 10, 100, 1000, 10000]:
36+
random_data = rng.random_bytes(size)
37+
38+
# Check type
39+
assert isinstance(random_data, bytes)
40+
41+
# Check length
42+
assert len(random_data) == size
43+
44+
45+
def test_random_int():
46+
"""Test the random_int method."""
47+
rng = CSPRNG(key1)
48+
49+
# Test different ranges
50+
for upper_bound in [2, 10, 100, 1000, 1000000, 1000000000, 1000000000000]:
51+
# Generate multiple random integers
52+
for _ in range(10):
53+
random_int = rng.random_int(upper_bound)
54+
55+
# Check range
56+
assert 0 <= random_int < upper_bound
57+
58+
# Check type
59+
assert isinstance(random_int, int)
60+
61+
62+
def test_random_int_edge_cases():
63+
"""Test the random_int method with edge cases."""
64+
rng = CSPRNG(key1)
65+
66+
# Test error case: upper_bound <= 0
67+
with pytest.raises(ValueError):
68+
rng.random_int(-1)
69+
70+
with pytest.raises(ValueError):
71+
rng.random_int(0)
72+
73+
# Test with upper bound 1
74+
assert rng.random_int(1) == 0
75+
76+
# Test with upper bound 2
77+
for _ in range(10):
78+
result = rng.random_int(2)
79+
assert 0 <= result < 2
80+
81+
# Test with upper bound that is a power of 2
82+
power_of_2 = 256
83+
for _ in range(10):
84+
result = rng.random_int(power_of_2)
85+
assert 0 <= result < power_of_2
86+
87+
# Test with upper bound that is one less than a power of 2
88+
almost_power_of_2 = 255
89+
for _ in range(10):
90+
result = rng.random_int(almost_power_of_2)
91+
assert 0 <= result < almost_power_of_2
92+
93+
# Test with upper bound that is one more than a power of 2
94+
just_over_power_of_2 = 257
95+
for _ in range(10):
96+
result = rng.random_int(just_over_power_of_2)
97+
assert 0 <= result < just_over_power_of_2
98+
99+
# Test with a large upper bound
100+
large_bound = 1000000000
101+
for _ in range(10):
102+
result = rng.random_int(large_bound)
103+
assert 0 <= result < large_bound
104+
105+
106+
def test_shuffle():
107+
"""Test the shuffle method."""
108+
rng1 = CSPRNG(key1)
109+
rng2 = CSPRNG(key1)
110+
111+
# Create two identical lists
112+
list1 = list(range(100))
113+
list2 = list(range(100))
114+
115+
# Shuffle both lists with the same key
116+
rng1.shuffle(list1)
117+
rng2.shuffle(list2)
118+
119+
# They should be identical after shuffling
120+
assert list1 == list2
121+
122+
# The shuffled list should be a permutation of the original
123+
assert sorted(list1) == list(range(100))
124+
125+
# Different keys should produce different shuffles
126+
rng3 = CSPRNG(key2)
127+
list3 = list(range(100))
128+
rng3.shuffle(list3)
129+
assert list1 != list3
130+
131+
# Getting another shuffled list by an already used RNG should produce a different shuffle
132+
list4 = list(range(100))
133+
rng1.shuffle(list4)
134+
assert list1 != list4
135+
136+
137+
def test_statistical_properties():
138+
"""Test basic statistical properties of the random output."""
139+
rng = CSPRNG(key1)
140+
141+
# Generate a large number of random bytes
142+
data = rng.random_bytes(10000)
143+
144+
# Count occurrences of each byte value
145+
counts = [0] * 256
146+
for byte in data:
147+
counts[byte] += 1
148+
149+
# Check that each byte value appears with roughly equal frequency
150+
# For 10000 bytes, each value should appear about 39 times (10000/256)
151+
# We allow a generous margin of error (±50%)
152+
for count in counts:
153+
assert 19 <= count <= 59, "Byte distribution is not uniform"
154+
155+
# Test bit distribution
156+
bits_set = 0
157+
for byte in data:
158+
bits_set += bin(byte).count("1")
159+
160+
# For random data, approximately 50% of bits should be set
161+
# 10000 bytes = 80000 bits, so about 40000 should be set
162+
# Allow ±5% margin
163+
assert 38000 <= bits_set <= 42000, "Bit distribution is not uniform"
164+
165+
166+
def test_large_shuffle():
167+
"""Test shuffling a large list."""
168+
rng = CSPRNG(key1)
169+
170+
# Create a large list
171+
large_list = list(range(10000))
172+
173+
# Make a copy for comparison
174+
original = large_list.copy()
175+
176+
# Shuffle the list
177+
rng.shuffle(large_list)
178+
179+
# The shuffled list should be different from the original
180+
assert large_list != original
181+
182+
# The shuffled list should be a permutation of the original
183+
assert sorted(large_list) == original

0 commit comments

Comments
 (0)