Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
564 changes: 407 additions & 157 deletions src/_xxhash.c

Large diffs are not rendered by default.

35 changes: 5 additions & 30 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,42 +90,17 @@ def test_xxh64_hexdigest_5b():
xxhash.xxh64_hexdigest(DATA_5B)


# ── str input (tests _get_buffer_or_str UTF-8 encoding path) ────────

DATA_STR = "hello world"


@pytest.mark.benchmark
def test_xxh32_intdigest_str():
xxhash.xxh32_intdigest(DATA_STR)


@pytest.mark.benchmark
def test_xxh64_intdigest_str():
xxhash.xxh64_intdigest(DATA_STR)


@pytest.mark.benchmark
def test_xxh3_64_intdigest_str():
xxhash.xxh3_64_intdigest(DATA_STR)


@pytest.mark.benchmark
def test_xxh3_128_intdigest_str():
xxhash.xxh3_128_intdigest(DATA_STR)


# ── type constructor (tests tp_vectorcall) ──────────────────────────


@pytest.mark.benchmark
def test_xxh32_ctor():
xxhash.xxh32(DATA_STR)
xxhash.xxh32(DATA_5B)


@pytest.mark.benchmark
def test_xxh32_ctor_seed():
xxhash.xxh32(DATA_STR, seed=SEED_32)
xxhash.xxh32(DATA_5B, seed=SEED_32)


@pytest.mark.benchmark
Expand All @@ -135,17 +110,17 @@ def test_xxh32_ctor_empty():

@pytest.mark.benchmark
def test_xxh64_ctor():
xxhash.xxh64(DATA_STR, seed=SEED_64)
xxhash.xxh64(DATA_5B, seed=SEED_64)


@pytest.mark.benchmark
def test_xxh3_64_ctor():
xxhash.xxh3_64(DATA_STR, seed=SEED_64)
xxhash.xxh3_64(DATA_5B, seed=SEED_64)


@pytest.mark.benchmark
def test_xxh3_128_ctor():
xxhash.xxh3_128(DATA_STR, seed=SEED_64)
xxhash.xxh3_128(DATA_5B, seed=SEED_64)


# ── 2MB throughput: hashing dominates, call overhead negligible ─────
Expand Down
55 changes: 43 additions & 12 deletions tests/test_fastcall.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,12 @@ def test_input_bytes(self):
self._check(a, self.data)

def test_input_str(self):
"""hashlib compatibility: str raises TypeError."""
s = self.data.decode()
for a in self.algorithms:
self._check(a, s)
for fn in self._funcs(a):
with self.assertRaises(TypeError):
fn(s)

def test_input_empty(self):
for a in self.algorithms:
Expand All @@ -64,13 +67,13 @@ def test_positional_seed_xxh3_128(self):

# ── keyword input ─────────────────────────────────────────────

def test_keyword_input(self):
def test_keyword_data(self):
for a in self.algorithms:
self._check(a, input=self.data)
self._check(a, data=self.data)

def test_keyword_input_and_seed(self):
def test_keyword_data_and_seed(self):
for a in self.algorithms:
self._check(a, input=self.data, seed=42)
self._check(a, data=self.data, seed=42)

# ── keyword seed (with positional input) ──────────────────────

Expand Down Expand Up @@ -105,6 +108,33 @@ def test_input_array(self):
for a in self.algorithms:
self._check(a, array.array('B', self.data))

def test_input_mmap(self):
import mmap, tempfile, os
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(self.data)
f.flush()
try:
with open(f.name, 'rb') as f2:
with mmap.mmap(f2.fileno(), 0, access=mmap.ACCESS_READ) as m:
for a in self.algorithms:
self._check(a, m)
finally:
os.unlink(f.name)

def test_input_pickle_buffer(self):
try:
from pickle import PickleBuffer
except ImportError:
raise self.skipTest('PickleBuffer not available')
for a in self.algorithms:
self._check(a, PickleBuffer(self.data))

def test_input_ctypes(self):
import ctypes
buf = (ctypes.c_char * len(self.data)).from_buffer_copy(self.data)
for a in self.algorithms:
self._check(a, buf)


class TestFastcallErrors(unittest.TestCase):
"""Invalid argument passing: all error cases."""
Expand Down Expand Up @@ -141,16 +171,17 @@ def test_too_many_positional(self):

# ── unknown keyword ───────────────────────────────────────────

def test_unknown_keyword(self):
self._assert_all_raise(TypeError, self.data, bad=1)
def test_unknown_keyword_input(self):
"""Old 'input' keyword is now unknown — was renamed to 'data'."""
self._assert_all_raise(TypeError, input=self.data)

def test_unknown_keyword_input_kw(self):
self._assert_all_raise(TypeError, input=self.data, bad=1)
def test_unknown_keyword_data_kw(self):
self._assert_all_raise(TypeError, data=self.data, bad=1)

# ── duplicate arguments ───────────────────────────────────────

def test_duplicate_input(self):
self._assert_all_raise(TypeError, self.data, input=self.data)
self._assert_all_raise(TypeError, self.data, data=self.data)

def test_duplicate_seed(self):
self._assert_all_raise(TypeError, self.data, 0, seed=1)
Expand All @@ -164,15 +195,15 @@ def test_invalid_seed_keyword(self):
self._assert_all_raise(TypeError, self.data, seed='bad')

def test_invalid_seed_with_input_kw(self):
self._assert_all_raise(TypeError, input=self.data, seed='bad')
self._assert_all_raise(TypeError, data=self.data, seed='bad')

# ── invalid input type (not str, not buffer) ──────────────────

def test_input_not_bytes_or_str(self):
self._assert_all_raise(TypeError, 12345)

def test_input_not_bytes_or_str_kw(self):
self._assert_all_raise(TypeError, input=12345)
self._assert_all_raise(TypeError, data=12345)


class TestFastcallSeedOverflow(unittest.TestCase):
Expand Down
164 changes: 164 additions & 0 deletions tests/test_hashlib_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""Tests for hashlib compatibility."""
import unittest
import xxhash


class TestHashlibCompat(unittest.TestCase):
"""Verify hashlib-compatible interface."""

data = b'hello world'

def test_algorithms_available(self):
self.assertIsInstance(xxhash.algorithms_available, set)
for a in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128', 'xxh128'):
self.assertIn(a, xxhash.algorithms_available)

def test_algorithms_guaranteed(self):
self.assertEqual(xxhash.algorithms_guaranteed, xxhash.algorithms_available)

# ── str rejection ──────────────────────────────────────────────

def test_str_rejected(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
for fn in (getattr(xxhash, f'{algo}_digest'),
getattr(xxhash, f'{algo}_intdigest'),
getattr(xxhash, f'{algo}_hexdigest')):
# positional str
with self.assertRaisesRegex(TypeError,
'Strings must be encoded before hashing'):
fn('hello')
Comment thread
ifduyue marked this conversation as resolved.
# keyword str
with self.assertRaisesRegex(TypeError,
'Strings must be encoded before hashing'):
fn(data='hello')
# None
with self.assertRaisesRegex(TypeError,
'object supporting the buffer API required'):
fn(None)

def test_str_rejected_constructor(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
cls = getattr(xxhash, algo)
# positional str
with self.assertRaisesRegex(TypeError,
'Strings must be encoded before hashing'):
cls('hello')
Comment thread
ifduyue marked this conversation as resolved.
# keyword str
with self.assertRaisesRegex(TypeError,
'Strings must be encoded before hashing'):
cls(data='hello')
# None
with self.assertRaisesRegex(TypeError,
'object supporting the buffer API required'):
cls(None)
with self.assertRaisesRegex(TypeError,
'object supporting the buffer API required'):
cls(data=None)

def test_str_rejected_update(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
obj = getattr(xxhash, algo)()
with self.assertRaisesRegex(TypeError,
'Strings must be encoded before hashing'):
obj.update('hello')
# also test that bytes work after
obj.update(b'hello')
self.assertIsInstance(obj.intdigest(), int)
# None
with self.assertRaisesRegex(TypeError,
'object supporting the buffer API required'):
obj.update(None)
with self.assertRaisesRegex(TypeError,
'object supporting the buffer API required'):
obj.update(data=None)

# ── unknown keyword ───────────────────────────────────────────

def test_unknown_keyword(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
cls = getattr(xxhash, algo)
with self.assertRaises(TypeError):
cls(b'hello', bad=1)
with self.assertRaises(TypeError):
cls(data=b'hello', bad=1)
obj = cls()
with self.assertRaises(TypeError):
obj.update(b'hello', bad=1)
with self.assertRaises(TypeError):
obj.update(data=b'hello', bad=1)

# ── data keyword ───────────────────────────────────────────────

def test_data_keyword(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
obj = getattr(xxhash, algo)(self.data)
d_fn = getattr(xxhash, f'{algo}_digest')
i_fn = getattr(xxhash, f'{algo}_intdigest')
h_fn = getattr(xxhash, f'{algo}_hexdigest')
self.assertEqual(d_fn(data=self.data), obj.digest())
self.assertEqual(i_fn(data=self.data), obj.intdigest())
self.assertEqual(h_fn(data=self.data), obj.hexdigest())

def test_data_keyword_constructor(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
cls = getattr(xxhash, algo)
obj = cls(data=self.data)
self.assertEqual(obj.intdigest(),
getattr(xxhash, f'{algo}_intdigest')(self.data))

# ── digest_size / block_size / name ────────────────────────────

def test_digest_size(self):
self.assertEqual(xxhash.xxh32().digest_size, 4)
self.assertEqual(xxhash.xxh64().digest_size, 8)
self.assertEqual(xxhash.xxh3_64().digest_size, 8)
self.assertEqual(xxhash.xxh3_128().digest_size, 16)

def test_block_size(self):
self.assertEqual(xxhash.xxh32().block_size, 16)
self.assertEqual(xxhash.xxh64().block_size, 32)
self.assertEqual(xxhash.xxh3_64().block_size, 32)
self.assertEqual(xxhash.xxh3_128().block_size, 64)

def test_name(self):
self.assertEqual(xxhash.xxh32().name, 'XXH32')
self.assertEqual(xxhash.xxh64().name, 'XXH64')
self.assertEqual(xxhash.xxh3_64().name, 'XXH3_64')
self.assertEqual(xxhash.xxh3_128().name, 'XXH3_128')

# ── digest / hexdigest ─────────────────────────────────────────

def test_digest(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
obj = getattr(xxhash, algo)(self.data)
d_fn = getattr(xxhash, f'{algo}_digest')
self.assertEqual(obj.digest(), d_fn(self.data))
self.assertIsInstance(obj.digest(), bytes)
self.assertEqual(len(obj.digest()), obj.digest_size)

def test_hexdigest(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
obj = getattr(xxhash, algo)(self.data)
h_fn = getattr(xxhash, f'{algo}_hexdigest')
self.assertEqual(obj.hexdigest(), h_fn(self.data))
self.assertIsInstance(obj.hexdigest(), str)
self.assertEqual(len(obj.hexdigest()), obj.digest_size * 2)

# ── update ─────────────────────────────────────────────────────

def test_update(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
a = getattr(xxhash, algo)()
a.update(self.data)
b = getattr(xxhash, algo)(self.data)
self.assertEqual(a.digest(), b.digest())

# ── copy ───────────────────────────────────────────────────────

def test_copy(self):
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
a = getattr(xxhash, algo)(self.data)
b = a.copy()
self.assertEqual(a.digest(), b.digest())
b.update(b'more')
self.assertNotEqual(a.digest(), b.digest())
Loading
Loading