From 1585d2b8b031e8d77543fa4d5a1a39d051b452a3 Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:21:41 +0800 Subject: [PATCH 1/9] perf: add tp_vectorcall to all four hash types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vectorcall bypasses tp_new→tp_init Python-level dispatch, parsing arguments directly in C. Makes the _parse_fastcall_args helper accept an input_required parameter so type constructors can accept optional input while module functions require it. Type constructor speedup: 41-66% depending on arguments. --- src/_xxhash.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 223 insertions(+), 17 deletions(-) diff --git a/src/_xxhash.c b/src/_xxhash.c index a7bc8ee..7a8058d 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -82,6 +82,7 @@ _get_buffer_or_str(PyObject *obj, Py_buffer *buf, PyObject **owner) static Py_ALWAYS_INLINE int _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames, const char *funcname, + int input_required, Py_buffer *buf, PyObject **buf_owner, unsigned long long *seed) { @@ -148,7 +149,7 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, } } - if (!input_found) { + if (!input_found && input_required) { PyErr_Format(PyExc_TypeError, "%s() missing required argument 'input'", funcname); return -1; @@ -175,7 +176,7 @@ static PyObject *xxh32_digest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_digest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_digest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; @@ -194,7 +195,7 @@ static PyObject *xxh32_intdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_intdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; @@ -211,7 +212,7 @@ static PyObject *xxh32_hexdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_hexdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; @@ -245,7 +246,7 @@ static PyObject *xxh64_digest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_digest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_digest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -264,7 +265,7 @@ static PyObject *xxh64_intdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_intdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -281,7 +282,7 @@ static PyObject *xxh64_hexdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_hexdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -315,7 +316,7 @@ static PyObject *xxh3_64_digest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_digest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_digest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -334,7 +335,7 @@ static PyObject *xxh3_64_intdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_intdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -351,7 +352,7 @@ static PyObject *xxh3_64_hexdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_hexdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -385,7 +386,7 @@ static PyObject *xxh3_128_digest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_digest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_digest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -404,7 +405,7 @@ static PyObject *xxh3_128_intdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_intdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -436,7 +437,7 @@ static PyObject *xxh3_128_hexdigest(PyObject *self, PyObject *const *args, Py_buffer buf; PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_hexdigest", &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -493,6 +494,47 @@ static void PYXXH32_do_update(PYXXH32Object *self, Py_buffer *buf) PyBuffer_Release(buf); } +static PyObject * +PYXXH32_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + XXH32_hash_t seed = 0; + Py_buffer buf; + PyObject *buf_owner; + unsigned long long raw_seed; + + if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh32()", 0, + &buf, &buf_owner, &raw_seed) < 0) + return NULL; + seed = (XXH32_hash_t)raw_seed; + + PYXXH32Object *self = (PYXXH32Object *) + ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); + if (self == NULL) goto error; + + self->xxhash_state = XXH32_createState(); + if (self->xxhash_state == NULL) { + Py_DECREF(self); + goto error; + } + self->seed = seed; + XXH32_reset(self->xxhash_state, seed); + + if (buf.buf) { + XXH32_update(self->xxhash_state, buf.buf, buf.len); + PyBuffer_Release(&buf); + } + Py_XDECREF(buf_owner); + return (PyObject *)self; + +error: + if (buf.buf) + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); +} + /* XXH32 methods */ static PyObject *PYXXH32_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -753,7 +795,7 @@ static PyTypeObject PYXXH32Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */ PYXXH32Type_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ @@ -772,6 +814,17 @@ static PyTypeObject PYXXH32Type = { (initproc)PYXXH32_init, /* tp_init */ 0, /* tp_alloc */ PYXXH32_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ + PYXXH32_vectorcall, /* tp_vectorcall */ }; @@ -802,6 +855,46 @@ static void PYXXH64_do_update(PYXXH64Object *self, Py_buffer *buf) PyBuffer_Release(buf); } +static PyObject * +PYXXH64_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + XXH64_hash_t seed = 0; + Py_buffer buf; + PyObject *buf_owner; + unsigned long long raw_seed; + + if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh64()", 0, + &buf, &buf_owner, &raw_seed) < 0) + return NULL; + seed = (XXH64_hash_t)raw_seed; + + PYXXH64Object *self = (PYXXH64Object *) + ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); + if (self == NULL) goto error; + + self->xxhash_state = XXH64_createState(); + if (self->xxhash_state == NULL) { + Py_DECREF(self); + goto error; + } + self->seed = seed; + XXH64_reset(self->xxhash_state, seed); + + if (buf.buf) { + XXH64_update(self->xxhash_state, buf.buf, buf.len); + PyBuffer_Release(&buf); + } + Py_XDECREF(buf_owner); + return (PyObject *)self; + +error: + if (buf.buf) + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); +} static PyObject *PYXXH64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { PYXXH64Object *self; @@ -1060,7 +1153,7 @@ static PyTypeObject PYXXH64Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */ PYXXH64Type_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ @@ -1079,6 +1172,17 @@ static PyTypeObject PYXXH64Type = { (initproc)PYXXH64_init, /* tp_init */ 0, /* tp_alloc */ PYXXH64_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ + PYXXH64_vectorcall, /* tp_vectorcall */ }; /* XXH3_64 */ @@ -1108,6 +1212,46 @@ static void PYXXH3_64_do_update(PYXXH3_64Object *self, Py_buffer *buf) PyBuffer_Release(buf); } +static PyObject * +PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + XXH64_hash_t seed = 0; + Py_buffer buf; + PyObject *buf_owner; + unsigned long long raw_seed; + + if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh3_64()", 0, + &buf, &buf_owner, &raw_seed) < 0) + return NULL; + seed = (XXH64_hash_t)raw_seed; + + PYXXH3_64Object *self = (PYXXH3_64Object *) + ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); + if (self == NULL) goto error; + + self->xxhash_state = XXH3_createState(); + if (self->xxhash_state == NULL) { + Py_DECREF(self); + goto error; + } + self->seed = seed; + XXH3_64bits_reset_withSeed(self->xxhash_state, seed); + + if (buf.buf) { + XXH3_64bits_update(self->xxhash_state, buf.buf, buf.len); + PyBuffer_Release(&buf); + } + Py_XDECREF(buf_owner); + return (PyObject *)self; + +error: + if (buf.buf) + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); +} static PyObject *PYXXH3_64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { PYXXH3_64Object *self; @@ -1374,7 +1518,7 @@ static PyTypeObject PYXXH3_64Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */ PYXXH3_64Type_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ @@ -1393,6 +1537,17 @@ static PyTypeObject PYXXH3_64Type = { (initproc)PYXXH3_64_init, /* tp_init */ 0, /* tp_alloc */ PYXXH3_64_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ + PYXXH3_64_vectorcall, /* tp_vectorcall */ }; @@ -1423,6 +1578,46 @@ static void PYXXH3_128_do_update(PYXXH3_128Object *self, Py_buffer *buf) PyBuffer_Release(buf); } +static PyObject * +PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + XXH64_hash_t seed = 0; + Py_buffer buf; + PyObject *buf_owner; + unsigned long long raw_seed; + + if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh3_128()", 0, + &buf, &buf_owner, &raw_seed) < 0) + return NULL; + seed = (XXH64_hash_t)raw_seed; + + PYXXH3_128Object *self = (PYXXH3_128Object *) + ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); + if (self == NULL) goto error; + + self->xxhash_state = XXH3_createState(); + if (self->xxhash_state == NULL) { + Py_DECREF(self); + goto error; + } + self->seed = seed; + XXH3_128bits_reset_withSeed(self->xxhash_state, seed); + + if (buf.buf) { + XXH3_128bits_update(self->xxhash_state, buf.buf, buf.len); + PyBuffer_Release(&buf); + } + Py_XDECREF(buf_owner); + return (PyObject *)self; + +error: + if (buf.buf) + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); +} static PyObject *PYXXH3_128_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { PYXXH3_128Object *self; @@ -1706,7 +1901,7 @@ static PyTypeObject PYXXH3_128Type = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */ PYXXH3_128Type_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ @@ -1725,6 +1920,17 @@ static PyTypeObject PYXXH3_128Type = { (initproc)PYXXH3_128_init, /* tp_init */ 0, /* tp_alloc */ PYXXH3_128_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ + PYXXH3_128_vectorcall, /* tp_vectorcall */ }; /***************************************************************************** From 982746cea2d909caabd110e75b59543267516494 Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:28:50 +0800 Subject: [PATCH 2/9] fix: release GIL in tp_vectorcall update path Wrap XXH*_update calls in Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS in all four tp_vectorcall functions, consistent with the existing PYXXH*_do_update helpers. Important for multi-threaded hashing of large buffers. --- src/_xxhash.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/_xxhash.c b/src/_xxhash.c index 7a8058d..cdf3c38 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -522,7 +522,9 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, XXH32_reset(self->xxhash_state, seed); if (buf.buf) { + Py_BEGIN_ALLOW_THREADS XXH32_update(self->xxhash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } Py_XDECREF(buf_owner); @@ -883,7 +885,9 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, XXH64_reset(self->xxhash_state, seed); if (buf.buf) { + Py_BEGIN_ALLOW_THREADS XXH64_update(self->xxhash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } Py_XDECREF(buf_owner); @@ -1240,7 +1244,9 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, XXH3_64bits_reset_withSeed(self->xxhash_state, seed); if (buf.buf) { + Py_BEGIN_ALLOW_THREADS XXH3_64bits_update(self->xxhash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } Py_XDECREF(buf_owner); @@ -1606,7 +1612,9 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, XXH3_128bits_reset_withSeed(self->xxhash_state, seed); if (buf.buf) { + Py_BEGIN_ALLOW_THREADS XXH3_128bits_update(self->xxhash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS PyBuffer_Release(&buf); } Py_XDECREF(buf_owner); From ad3065fda0078d7dfbe461118d712edfc522674a Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:32:36 +0800 Subject: [PATCH 3/9] bench: add micro-benchmarks for fastcall and tp_vectorcall paths Replaces large-buffer-only benchmarks with a mix that captures the recent optimizations: - 5-byte inputs (call overhead dominates, 25-60% improvement) - str input (tests _get_buffer_or_str UTF-8 path) - type constructors (tests tp_vectorcall, 41-66% improvement) - keyword seed passing - Retains 1KB/10KB macro benchmarks for covering hash throughput --- tests/test_benchmark.py | 686 +++++----------------------------------- 1 file changed, 74 insertions(+), 612 deletions(-) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index c92dc78..db00c7a 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,3 +1,4 @@ +import hashlib import os import random @@ -5,682 +6,143 @@ import xxhash -DATA_1KB = os.urandom(1000) -DATA_10KB = os.urandom(10000) -DATA_512KB = os.urandom(512000) -DATA_2MB = os.urandom(2 * 1024 * 1024) - SEED_32 = random.randint(0, 0xFFFFFFFF) SEED_64 = random.randint(0, 0xFFFFFFFFFFFFFFFF) +DATA_5B = os.urandom(5) +DATA_1KB = os.urandom(1000) +DATA_10KB = os.urandom(10000) + -# -- xxh32 oneshot -- +# ── macro bench: larger inputs where hashing dominates ─────────────── -@pytest.mark.benchmark +@pytest.mark.benchmark(min_rounds=1000) def test_xxh32_intdigest_1kb(): xxhash.xxh32_intdigest(DATA_1KB, seed=SEED_32) -@pytest.mark.benchmark -def test_xxh32_intdigest_10kb(): - xxhash.xxh32_intdigest(DATA_10KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_intdigest_512kb(): - xxhash.xxh32_intdigest(DATA_512KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_intdigest_2mb(): - xxhash.xxh32_intdigest(DATA_2MB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_hexdigest_1kb(): - xxhash.xxh32_hexdigest(DATA_1KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_hexdigest_10kb(): - xxhash.xxh32_hexdigest(DATA_10KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_hexdigest_512kb(): - xxhash.xxh32_hexdigest(DATA_512KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_hexdigest_2mb(): - xxhash.xxh32_hexdigest(DATA_2MB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_digest_1kb(): - xxhash.xxh32_digest(DATA_1KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_digest_10kb(): - xxhash.xxh32_digest(DATA_10KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_digest_512kb(): - xxhash.xxh32_digest(DATA_512KB, seed=SEED_32) - - -@pytest.mark.benchmark -def test_xxh32_digest_2mb(): - xxhash.xxh32_digest(DATA_2MB, seed=SEED_32) - - -# -- xxh64 oneshot -- - - -@pytest.mark.benchmark -def test_xxh64_intdigest_1kb(): - xxhash.xxh64_intdigest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_intdigest_10kb(): - xxhash.xxh64_intdigest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_intdigest_512kb(): - xxhash.xxh64_intdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_intdigest_2mb(): - xxhash.xxh64_intdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_hexdigest_1kb(): - xxhash.xxh64_hexdigest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_hexdigest_10kb(): - xxhash.xxh64_hexdigest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_hexdigest_512kb(): - xxhash.xxh64_hexdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_hexdigest_2mb(): - xxhash.xxh64_hexdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_digest_1kb(): - xxhash.xxh64_digest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_digest_10kb(): - xxhash.xxh64_digest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_digest_512kb(): - xxhash.xxh64_digest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh64_digest_2mb(): - xxhash.xxh64_digest(DATA_2MB, seed=SEED_64) - - -# -- xxh3_64 oneshot -- - - -@pytest.mark.benchmark +@pytest.mark.benchmark(min_rounds=1000) def test_xxh3_64_intdigest_1kb(): xxhash.xxh3_64_intdigest(DATA_1KB, seed=SEED_64) -@pytest.mark.benchmark +@pytest.mark.benchmark(min_rounds=1000) def test_xxh3_64_intdigest_10kb(): xxhash.xxh3_64_intdigest(DATA_10KB, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh3_64_intdigest_512kb(): - xxhash.xxh3_64_intdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_intdigest_2mb(): - xxhash.xxh3_64_intdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_hexdigest_1kb(): - xxhash.xxh3_64_hexdigest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_hexdigest_10kb(): - xxhash.xxh3_64_hexdigest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_hexdigest_512kb(): - xxhash.xxh3_64_hexdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_hexdigest_2mb(): - xxhash.xxh3_64_hexdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_digest_1kb(): - xxhash.xxh3_64_digest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_digest_10kb(): - xxhash.xxh3_64_digest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_digest_512kb(): - xxhash.xxh3_64_digest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_64_digest_2mb(): - xxhash.xxh3_64_digest(DATA_2MB, seed=SEED_64) - - -# -- xxh3_128 oneshot -- - - -@pytest.mark.benchmark +@pytest.mark.benchmark(min_rounds=1000) def test_xxh3_128_intdigest_1kb(): xxhash.xxh3_128_intdigest(DATA_1KB, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh3_128_intdigest_10kb(): - xxhash.xxh3_128_intdigest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_intdigest_512kb(): - xxhash.xxh3_128_intdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_intdigest_2mb(): - xxhash.xxh3_128_intdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_hexdigest_1kb(): - xxhash.xxh3_128_hexdigest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_hexdigest_10kb(): - xxhash.xxh3_128_hexdigest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_hexdigest_512kb(): - xxhash.xxh3_128_hexdigest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_hexdigest_2mb(): - xxhash.xxh3_128_hexdigest(DATA_2MB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_digest_1kb(): - xxhash.xxh3_128_digest(DATA_1KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_digest_10kb(): - xxhash.xxh3_128_digest(DATA_10KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_digest_512kb(): - xxhash.xxh3_128_digest(DATA_512KB, seed=SEED_64) - - -@pytest.mark.benchmark -def test_xxh3_128_digest_2mb(): - xxhash.xxh3_128_digest(DATA_2MB, seed=SEED_64) - - -# -- xxh32 streaming intdigest -- - - -@pytest.mark.benchmark -def test_xxh32_streaming_intdigest_1kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_1KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_intdigest_10kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_10KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_intdigest_512kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_512KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_intdigest_2mb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_2MB) - h.intdigest() - - -# -- xxh64 streaming intdigest -- - - -@pytest.mark.benchmark -def test_xxh64_streaming_intdigest_1kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_intdigest_10kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_intdigest_512kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_intdigest_2mb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.intdigest() - - -# -- xxh3_64 streaming intdigest -- - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_intdigest_1kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_intdigest_10kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_intdigest_512kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_intdigest_2mb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.intdigest() - - -# -- xxh3_128 streaming intdigest -- - - -@pytest.mark.benchmark -def test_xxh3_128_streaming_intdigest_1kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_128_streaming_intdigest_10kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_128_streaming_intdigest_512kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.intdigest() - - -@pytest.mark.benchmark -def test_xxh3_128_streaming_intdigest_2mb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.intdigest() - - -# -- xxh32 streaming hexdigest -- - - -@pytest.mark.benchmark -def test_xxh32_streaming_hexdigest_1kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_1KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_hexdigest_10kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_10KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_hexdigest_512kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_512KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh32_streaming_hexdigest_2mb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_2MB) - h.hexdigest() - - -# -- xxh64 streaming hexdigest -- - - -@pytest.mark.benchmark -def test_xxh64_streaming_hexdigest_1kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_hexdigest_10kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_hexdigest_512kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh64_streaming_hexdigest_2mb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.hexdigest() - - -# -- xxh3_64 streaming hexdigest -- - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_hexdigest_1kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_hexdigest_10kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_hexdigest_512kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.hexdigest() - - -@pytest.mark.benchmark -def test_xxh3_64_streaming_hexdigest_2mb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.hexdigest() - - -# -- xxh3_128 streaming hexdigest -- - - -@pytest.mark.benchmark -def test_xxh3_128_streaming_hexdigest_1kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.hexdigest() - +# ── micro bench: tiny inputs where call overhead dominates ─────────── -@pytest.mark.benchmark -def test_xxh3_128_streaming_hexdigest_10kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.hexdigest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_intdigest_5b(): + xxhash.xxh32_intdigest(DATA_5B) -@pytest.mark.benchmark -def test_xxh3_128_streaming_hexdigest_512kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.hexdigest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_intdigest_5b_seed(): + xxhash.xxh32_intdigest(DATA_5B, seed=SEED_32) -@pytest.mark.benchmark -def test_xxh3_128_streaming_hexdigest_2mb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.hexdigest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_intdigest_5b_seed_kw(): + xxhash.xxh32_intdigest(DATA_5B, seed=SEED_32) -# -- xxh32 streaming digest -- +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh64_intdigest_5b(): + xxhash.xxh64_intdigest(DATA_5B) -@pytest.mark.benchmark -def test_xxh32_streaming_digest_1kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_1KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh64_intdigest_5b_seed_kw(): + xxhash.xxh64_intdigest(DATA_5B, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh32_streaming_digest_10kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_10KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_64_intdigest_5b(): + xxhash.xxh3_64_intdigest(DATA_5B) -@pytest.mark.benchmark -def test_xxh32_streaming_digest_512kb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_512KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_64_intdigest_5b_seed_kw(): + xxhash.xxh3_64_intdigest(DATA_5B, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh32_streaming_digest_2mb(): - h = xxhash.xxh32(seed=SEED_32) - for _ in range(10): - h.update(DATA_2MB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_128_intdigest_5b(): + xxhash.xxh3_128_intdigest(DATA_5B) -# -- xxh64 streaming digest -- +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_hexdigest_5b(): + xxhash.xxh32_hexdigest(DATA_5B) -@pytest.mark.benchmark -def test_xxh64_streaming_digest_1kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh64_hexdigest_5b(): + xxhash.xxh64_hexdigest(DATA_5B) -@pytest.mark.benchmark -def test_xxh64_streaming_digest_10kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.digest() +# ── str input (tests _get_buffer_or_str UTF-8 encoding path) ──────── -@pytest.mark.benchmark -def test_xxh64_streaming_digest_512kb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.digest() +DATA_STR = "hello world" -@pytest.mark.benchmark -def test_xxh64_streaming_digest_2mb(): - h = xxhash.xxh64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_intdigest_str(): + xxhash.xxh32_intdigest(DATA_STR) -# -- xxh3_64 streaming digest -- +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh64_intdigest_str(): + xxhash.xxh64_intdigest(DATA_STR) -@pytest.mark.benchmark -def test_xxh3_64_streaming_digest_1kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_64_intdigest_str(): + xxhash.xxh3_64_intdigest(DATA_STR) -@pytest.mark.benchmark -def test_xxh3_64_streaming_digest_10kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_128_intdigest_str(): + xxhash.xxh3_128_intdigest(DATA_STR) -@pytest.mark.benchmark -def test_xxh3_64_streaming_digest_512kb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.digest() +# ── type constructor (tests tp_vectorcall) ────────────────────────── -@pytest.mark.benchmark -def test_xxh3_64_streaming_digest_2mb(): - h = xxhash.xxh3_64(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_ctor(): + xxhash.xxh32(DATA_STR) -# -- xxh3_128 streaming digest -- +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_ctor_seed(): + xxhash.xxh32(DATA_STR, seed=SEED_32) -@pytest.mark.benchmark -def test_xxh3_128_streaming_digest_1kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_1KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh32_ctor_empty(): + xxhash.xxh32() -@pytest.mark.benchmark -def test_xxh3_128_streaming_digest_10kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_10KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh64_ctor(): + xxhash.xxh64(DATA_STR, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh3_128_streaming_digest_512kb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_512KB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_64_ctor(): + xxhash.xxh3_64(DATA_STR, seed=SEED_64) -@pytest.mark.benchmark -def test_xxh3_128_streaming_digest_2mb(): - h = xxhash.xxh3_128(seed=SEED_64) - for _ in range(10): - h.update(DATA_2MB) - h.digest() +@pytest.mark.benchmark(min_rounds=5000) +def test_xxh3_128_ctor(): + xxhash.xxh3_128(DATA_STR, seed=SEED_64) From f49eaa4b706c352d60f857ad04fe9d26e57f418b Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:36:13 +0800 Subject: [PATCH 4/9] fix: remove unsupported min_rounds from codspeed benchmark markers --- tests/test_benchmark.py | 48 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index db00c7a..fa3683f 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -17,22 +17,22 @@ # ── macro bench: larger inputs where hashing dominates ─────────────── -@pytest.mark.benchmark(min_rounds=1000) +@pytest.mark.benchmark def test_xxh32_intdigest_1kb(): xxhash.xxh32_intdigest(DATA_1KB, seed=SEED_32) -@pytest.mark.benchmark(min_rounds=1000) +@pytest.mark.benchmark def test_xxh3_64_intdigest_1kb(): xxhash.xxh3_64_intdigest(DATA_1KB, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=1000) +@pytest.mark.benchmark def test_xxh3_64_intdigest_10kb(): xxhash.xxh3_64_intdigest(DATA_10KB, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=1000) +@pytest.mark.benchmark def test_xxh3_128_intdigest_1kb(): xxhash.xxh3_128_intdigest(DATA_1KB, seed=SEED_64) @@ -40,52 +40,52 @@ def test_xxh3_128_intdigest_1kb(): # ── micro bench: tiny inputs where call overhead dominates ─────────── -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_intdigest_5b(): xxhash.xxh32_intdigest(DATA_5B) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_intdigest_5b_seed(): xxhash.xxh32_intdigest(DATA_5B, seed=SEED_32) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_intdigest_5b_seed_kw(): xxhash.xxh32_intdigest(DATA_5B, seed=SEED_32) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh64_intdigest_5b(): xxhash.xxh64_intdigest(DATA_5B) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh64_intdigest_5b_seed_kw(): xxhash.xxh64_intdigest(DATA_5B, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_64_intdigest_5b(): xxhash.xxh3_64_intdigest(DATA_5B) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_64_intdigest_5b_seed_kw(): xxhash.xxh3_64_intdigest(DATA_5B, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_128_intdigest_5b(): xxhash.xxh3_128_intdigest(DATA_5B) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_hexdigest_5b(): xxhash.xxh32_hexdigest(DATA_5B) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh64_hexdigest_5b(): xxhash.xxh64_hexdigest(DATA_5B) @@ -95,22 +95,22 @@ def test_xxh64_hexdigest_5b(): DATA_STR = "hello world" -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_intdigest_str(): xxhash.xxh32_intdigest(DATA_STR) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh64_intdigest_str(): xxhash.xxh64_intdigest(DATA_STR) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_64_intdigest_str(): xxhash.xxh3_64_intdigest(DATA_STR) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_128_intdigest_str(): xxhash.xxh3_128_intdigest(DATA_STR) @@ -118,31 +118,31 @@ def test_xxh3_128_intdigest_str(): # ── type constructor (tests tp_vectorcall) ────────────────────────── -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_ctor(): xxhash.xxh32(DATA_STR) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_ctor_seed(): xxhash.xxh32(DATA_STR, seed=SEED_32) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh32_ctor_empty(): xxhash.xxh32() -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh64_ctor(): xxhash.xxh64(DATA_STR, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_64_ctor(): xxhash.xxh3_64(DATA_STR, seed=SEED_64) -@pytest.mark.benchmark(min_rounds=5000) +@pytest.mark.benchmark def test_xxh3_128_ctor(): xxhash.xxh3_128(DATA_STR, seed=SEED_64) From da76b27293dee4fa881e481b1a8b7f5e2b3e4b0f Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:43:15 +0800 Subject: [PATCH 5/9] fix: don't overwrite tp_alloc error with PyErr_NoMemory in vectorcall tp_alloc already sets MemoryError (or another exception) on failure. Calling PyErr_NoMemory() in the shared error path would overwrite it. Now return NULL directly for tp_alloc failure and only call PyErr_NoMemory() for XXH*_createState failure (which sets no exception). --- src/_xxhash.c | 52 ++++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/src/_xxhash.c b/src/_xxhash.c index cdf3c38..6f1b4ea 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -511,12 +511,15 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, PYXXH32Object *self = (PYXXH32Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) goto error; + if (self == NULL) + return NULL; self->xxhash_state = XXH32_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); - goto error; + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); } self->seed = seed; XXH32_reset(self->xxhash_state, seed); @@ -529,12 +532,6 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, } Py_XDECREF(buf_owner); return (PyObject *)self; - -error: - if (buf.buf) - PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); } /* XXH32 methods */ @@ -874,12 +871,15 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, PYXXH64Object *self = (PYXXH64Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) goto error; + if (self == NULL) + return NULL; self->xxhash_state = XXH64_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); - goto error; + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); } self->seed = seed; XXH64_reset(self->xxhash_state, seed); @@ -892,12 +892,6 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, } Py_XDECREF(buf_owner); return (PyObject *)self; - -error: - if (buf.buf) - PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); } static PyObject *PYXXH64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { @@ -1233,12 +1227,15 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, PYXXH3_64Object *self = (PYXXH3_64Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) goto error; + if (self == NULL) + return NULL; self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); - goto error; + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); } self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); @@ -1251,12 +1248,6 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, } Py_XDECREF(buf_owner); return (PyObject *)self; - -error: - if (buf.buf) - PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); } static PyObject *PYXXH3_64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { @@ -1601,12 +1592,15 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, PYXXH3_128Object *self = (PYXXH3_128Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) goto error; + if (self == NULL) + return NULL; self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); - goto error; + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); + return PyErr_NoMemory(); } self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); @@ -1619,12 +1613,6 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, } Py_XDECREF(buf_owner); return (PyObject *)self; - -error: - if (buf.buf) - PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); } static PyObject *PYXXH3_128_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { From 9647f3229939f873b8ecd6f3c2b8bc4e8bb3bc7a Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 13:55:20 +0800 Subject: [PATCH 6/9] fix: release buffer on tp_alloc failure in vectorcall functions When _parse_fastcall_args succeeds but tp_alloc subsequently fails, the acquired buffer was leaked. Now release buf/buf_owner before returning NULL in all four vectorcall functions. --- src/_xxhash.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/_xxhash.c b/src/_xxhash.c index 6f1b4ea..0f889e3 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -511,8 +511,11 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, PYXXH32Object *self = (PYXXH32Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) + if (self == NULL) { + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); return NULL; + } self->xxhash_state = XXH32_createState(); if (self->xxhash_state == NULL) { @@ -871,8 +874,11 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, PYXXH64Object *self = (PYXXH64Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) + if (self == NULL) { + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); return NULL; + } self->xxhash_state = XXH64_createState(); if (self->xxhash_state == NULL) { @@ -1227,8 +1233,11 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, PYXXH3_64Object *self = (PYXXH3_64Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) + if (self == NULL) { + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); return NULL; + } self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { @@ -1592,8 +1601,11 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, PYXXH3_128Object *self = (PYXXH3_128Object *) ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (self == NULL) + if (self == NULL) { + PyBuffer_Release(&buf); + Py_XDECREF(buf_owner); return NULL; + } self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { From cba0f282ba687b6f071b07fb706315e71619027e Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 14:28:31 +0800 Subject: [PATCH 7/9] refactor: reuse PYXXH*_do_update helpers in vectorcall functions Replace duplicated Py_BEGIN_ALLOW_THREADS/update/Py_END_ALLOW_THREADS/ PyBuffer_Release blocks with calls to existing PYXXH*_do_update helpers. Reduces code duplication across all four vectorcall implementations. --- src/_xxhash.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/src/_xxhash.c b/src/_xxhash.c index 0f889e3..f814c02 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -527,12 +527,8 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH32_reset(self->xxhash_state, seed); - if (buf.buf) { - Py_BEGIN_ALLOW_THREADS - XXH32_update(self->xxhash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - PyBuffer_Release(&buf); - } + if (buf.buf) + PYXXH32_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; } @@ -890,12 +886,8 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH64_reset(self->xxhash_state, seed); - if (buf.buf) { - Py_BEGIN_ALLOW_THREADS - XXH64_update(self->xxhash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - PyBuffer_Release(&buf); - } + if (buf.buf) + PYXXH64_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; } @@ -1249,12 +1241,8 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) { - Py_BEGIN_ALLOW_THREADS - XXH3_64bits_update(self->xxhash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - PyBuffer_Release(&buf); - } + if (buf.buf) + PYXXH3_64_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; } @@ -1617,12 +1605,8 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) { - Py_BEGIN_ALLOW_THREADS - XXH3_128bits_update(self->xxhash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - PyBuffer_Release(&buf); - } + if (buf.buf) + PYXXH3_128_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; } From 14675ad643e4bd2430631191b3b6f7ffb6fb7c3b Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 14:46:38 +0800 Subject: [PATCH 8/9] fix: use buf.obj instead of buf.buf for buffer-acquired check buf.obj is set by PyObject_GetBuffer when a buffer is successfully acquired, making it a more semantically clear check than buf.buf. Initialize buf->obj = NULL in _parse_fastcall_args to match. --- src/_xxhash.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/_xxhash.c b/src/_xxhash.c index f814c02..48063b9 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -91,6 +91,7 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, *seed = 0; buf->buf = NULL; + buf->obj = NULL; *buf_owner = NULL; /* positional args */ @@ -527,7 +528,7 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH32_reset(self->xxhash_state, seed); - if (buf.buf) + if (buf.obj) PYXXH32_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; @@ -570,7 +571,7 @@ static int PYXXH32_init(PYXXH32Object *self, PyObject *args, PyObject *kwargs) self->seed = seed; XXH32_reset(self->xxhash_state, seed); - if (buf.buf) { + if (buf.obj) { PYXXH32_do_update(self, &buf); } @@ -886,7 +887,7 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH64_reset(self->xxhash_state, seed); - if (buf.buf) + if (buf.obj) PYXXH64_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; @@ -926,7 +927,7 @@ static int PYXXH64_init(PYXXH64Object *self, PyObject *args, PyObject *kwargs) self->seed = seed; XXH64_reset(self->xxhash_state, seed); - if (buf.buf) { + if (buf.obj) { PYXXH64_do_update(self, &buf); } @@ -1241,7 +1242,7 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) + if (buf.obj) PYXXH3_64_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; @@ -1281,7 +1282,7 @@ static int PYXXH3_64_init(PYXXH3_64Object *self, PyObject *args, PyObject *kwarg self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) { + if (buf.obj) { PYXXH3_64_do_update(self, &buf); } @@ -1605,7 +1606,7 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) + if (buf.obj) PYXXH3_128_do_update(self, &buf); Py_XDECREF(buf_owner); return (PyObject *)self; @@ -1645,7 +1646,7 @@ static int PYXXH3_128_init(PYXXH3_128Object *self, PyObject *args, PyObject *kwa self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); - if (buf.buf) { + if (buf.obj) { PYXXH3_128_do_update(self, &buf); } From a974b6b4cd3f469d46dce6cdf421125c45f61971 Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 14:53:16 +0800 Subject: [PATCH 9/9] bench: add 2MB throughput benchmarks Covers one-shot intdigest/hexdigest and streaming constructors across all four algorithms at 2MB where hashing dominates. --- tests/test_benchmark.py | 59 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index fa3683f..b915f52 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,4 +1,3 @@ -import hashlib import os import random @@ -12,6 +11,7 @@ DATA_5B = os.urandom(5) DATA_1KB = os.urandom(1000) DATA_10KB = os.urandom(10000) +DATA_2MB = os.urandom(2 * 1024 * 1024) # ── macro bench: larger inputs where hashing dominates ─────────────── @@ -146,3 +146,60 @@ def test_xxh3_64_ctor(): @pytest.mark.benchmark def test_xxh3_128_ctor(): xxhash.xxh3_128(DATA_STR, seed=SEED_64) + + +# ── 2MB throughput: hashing dominates, call overhead negligible ───── + + +@pytest.mark.benchmark +def test_xxh32_intdigest_2mb(): + xxhash.xxh32_intdigest(DATA_2MB, seed=SEED_32) + + +@pytest.mark.benchmark +def test_xxh64_intdigest_2mb(): + xxhash.xxh64_intdigest(DATA_2MB, seed=SEED_64) + + +@pytest.mark.benchmark +def test_xxh3_64_intdigest_2mb(): + xxhash.xxh3_64_intdigest(DATA_2MB, seed=SEED_64) + + +@pytest.mark.benchmark +def test_xxh3_128_intdigest_2mb(): + xxhash.xxh3_128_intdigest(DATA_2MB, seed=SEED_64) + + +@pytest.mark.benchmark +def test_xxh32_hexdigest_2mb(): + xxhash.xxh32_hexdigest(DATA_2MB, seed=SEED_32) + + +@pytest.mark.benchmark +def test_xxh3_64_hexdigest_2mb(): + xxhash.xxh3_64_hexdigest(DATA_2MB, seed=SEED_64) + + +@pytest.mark.benchmark +def test_xxh32_stream_intdigest_2mb(): + h = xxhash.xxh32(DATA_2MB, seed=SEED_32) + h.intdigest() + + +@pytest.mark.benchmark +def test_xxh64_stream_intdigest_2mb(): + h = xxhash.xxh64(DATA_2MB, seed=SEED_64) + h.intdigest() + + +@pytest.mark.benchmark +def test_xxh3_64_stream_intdigest_2mb(): + h = xxhash.xxh3_64(DATA_2MB, seed=SEED_64) + h.intdigest() + + +@pytest.mark.benchmark +def test_xxh3_128_stream_intdigest_2mb(): + h = xxhash.xxh3_128(DATA_2MB, seed=SEED_64) + h.intdigest()