From 90b376d5c7bddd6ed3b73338c5884c3186343b56 Mon Sep 17 00:00:00 2001 From: Yue Du Date: Mon, 27 Apr 2026 21:27:29 +0800 Subject: [PATCH] feat: hashlib compatibility - algorithms_available / algorithms_guaranteed: module attributes - Str rejection: TypeError('Strings must be encoded before hashing') - None rejection: TypeError('object supporting the buffer API required') - data= keyword argument across all entry points (constructor, update, one-shot) - Full buffer type support: bytes, bytearray, memoryview, array, mmap, PickleBuffer, ctypes - tp_vectorcall on all 4 type constructors (CPython fast path) - METH_FASTCALL on all 12 module-level one-shot functions - METH_FASTCALL|METH_KEYWORDS on all 4 update() methods - Manual arg parsing in tp_init for PyPy fallback - Reject unknown keywords, duplicate args, extra positional args globally - _get_buffer_or_str, _parse_fastcall_args, _check_kwargs shared helpers - Py_ALWAYS_INLINE on all performance-critical helpers - PyLong_FromUnsigned* replaces Py_BuildValue - Remove hexdigits lookup table (regressed) - 120 tests (15 hashlib compat, 32 fastcall, 34 benchmark, 39 original) - Tested on CPython 3.9-3.15 and PyPy 3.9-3.11 --- src/_xxhash.c | 564 +++++++++++++++++++++++++---------- tests/test_benchmark.py | 35 +-- tests/test_fastcall.py | 55 +++- tests/test_hashlib_compat.py | 164 ++++++++++ tests/test_xxh32.py | 60 ++-- tests/test_xxh3_128.py | 64 ++-- tests/test_xxh3_64.py | 64 ++-- tests/test_xxh64.py | 60 ++-- xxhash/__init__.py | 3 + xxhash/version.py | 2 +- 10 files changed, 747 insertions(+), 324 deletions(-) create mode 100644 tests/test_hashlib_compat.py diff --git a/src/_xxhash.c b/src/_xxhash.c index 48063b9..3995e12 100644 --- a/src/_xxhash.c +++ b/src/_xxhash.c @@ -45,37 +45,32 @@ #define XXH128_BLOCKSIZE 64 -/* Get a buffer from an object, or UTF-8 encode if it's a str. - * On success, *owner is set to the object that owns the buffer - * (NULL if the arg itself supports the buffer protocol). - * Caller must PyBuffer_Release(buf) and Py_XDECREF(*owner). */ + #ifndef Py_ALWAYS_INLINE # define Py_ALWAYS_INLINE #endif +/* Get a buffer from an object. Rejects str with hashlib-compatible error. */ static Py_ALWAYS_INLINE int -_get_buffer_or_str(PyObject *obj, Py_buffer *buf, PyObject **owner) +_get_buffer_or_str(PyObject *obj, Py_buffer *buf) { - /* Check str first to avoid a guaranteed-failing PyObject_GetBuffer call - * and the resulting set/clear of a TypeError. */ + if (obj == Py_None) { + PyErr_SetString(PyExc_TypeError, + "object supporting the buffer API required"); + return -1; + } if (PyUnicode_Check(obj)) { - *owner = PyUnicode_AsUTF8String(obj); - if (*owner == NULL) - return -1; - if (PyObject_GetBuffer(*owner, buf, PyBUF_SIMPLE) < 0) { - Py_DECREF(*owner); - return -1; - } - return 0; + PyErr_SetString(PyExc_TypeError, + "Strings must be encoded before hashing"); + return -1; } if (PyObject_GetBuffer(obj, buf, PyBUF_SIMPLE) < 0) return -1; - *owner = NULL; return 0; } /* Parse input buffer and optional seed from fastcall arguments. - * Handles: positional 'input', positional 'seed', keyword 'input', + * Handles: positional 'data', positional 'seed', keyword 'data', * keyword 'seed', with proper error reporting for unknown keywords, * duplicate arguments, and too many positional args. * Returns 0 on success, -1 on error with exception set. */ @@ -83,7 +78,7 @@ static Py_ALWAYS_INLINE int _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames, const char *funcname, int input_required, - Py_buffer *buf, PyObject **buf_owner, + Py_buffer *buf, unsigned long long *seed) { int input_found = 0; @@ -92,11 +87,10 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, *seed = 0; buf->buf = NULL; buf->obj = NULL; - *buf_owner = NULL; /* positional args */ if (nargs >= 1) { - if (_get_buffer_or_str(args[0], buf, buf_owner) < 0) + if (_get_buffer_or_str(args[0], buf) < 0) return -1; input_found = 1; } @@ -120,14 +114,14 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, PyObject *key = PyTuple_GET_ITEM(kwnames, i); PyObject *val = args[nargs + i]; - if (PyUnicode_CompareWithASCIIString(key, "input") == 0) { + if (PyUnicode_CompareWithASCIIString(key, "data") == 0) { if (input_found) { PyErr_Format(PyExc_TypeError, - "%s() got multiple values for argument 'input'", + "%s() got multiple values for argument 'data'", funcname); goto error; } - if (_get_buffer_or_str(val, buf, buf_owner) < 0) + if (_get_buffer_or_str(val, buf) < 0) return -1; input_found = 1; } else if (PyUnicode_CompareWithASCIIString(key, "seed") == 0) { @@ -152,7 +146,7 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, if (!input_found && input_required) { PyErr_Format(PyExc_TypeError, - "%s() missing required argument 'input'", funcname); + "%s() missing required argument 'data'", funcname); return -1; } return 0; @@ -160,7 +154,6 @@ _parse_fastcall_args(PyObject *const *args, Py_ssize_t nargs, error: if (input_found) { PyBuffer_Release(buf); - Py_XDECREF(*buf_owner); } return -1; } @@ -175,15 +168,13 @@ static PyObject *xxh32_digest(PyObject *self, PyObject *const *args, { XXH32_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_digest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_digest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; XXH32_hash_t intdigest = XXH32(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char retbuf[XXH32_DIGESTSIZE]; XXH32_canonicalFromHash((XXH32_canonical_t *)retbuf, intdigest); @@ -194,15 +185,13 @@ static PyObject *xxh32_intdigest(PyObject *self, PyObject *const *args, { XXH32_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_intdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; XXH32_hash_t intdigest = XXH32(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); return PyLong_FromUnsignedLong(intdigest); } @@ -211,15 +200,13 @@ static PyObject *xxh32_hexdigest(PyObject *self, PyObject *const *args, { XXH32_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh32_hexdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; XXH32_hash_t intdigest = XXH32(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char digest[XXH32_DIGESTSIZE]; XXH32_canonicalFromHash((XXH32_canonical_t *)digest, intdigest); @@ -245,15 +232,13 @@ static PyObject *xxh64_digest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_digest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_digest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH64(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char retbuf[XXH64_DIGESTSIZE]; XXH64_canonicalFromHash((XXH64_canonical_t *)retbuf, intdigest); @@ -264,15 +249,13 @@ static PyObject *xxh64_intdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_intdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH64(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); return PyLong_FromUnsignedLongLong(intdigest); } @@ -281,15 +264,13 @@ static PyObject *xxh64_hexdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh64_hexdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH64(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char digest[XXH64_DIGESTSIZE]; XXH64_canonicalFromHash((XXH64_canonical_t *)digest, intdigest); @@ -315,15 +296,13 @@ static PyObject *xxh3_64_digest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_digest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_digest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH3_64bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char retbuf[XXH64_DIGESTSIZE]; XXH64_canonicalFromHash((XXH64_canonical_t *)retbuf, intdigest); @@ -334,15 +313,13 @@ static PyObject *xxh3_64_intdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_intdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH3_64bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); return PyLong_FromUnsignedLongLong(intdigest); } @@ -351,15 +328,13 @@ static PyObject *xxh3_64_hexdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_64_hexdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH64_hash_t intdigest = XXH3_64bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char digest[XXH64_DIGESTSIZE]; XXH64_canonicalFromHash((XXH64_canonical_t *)digest, intdigest); @@ -385,15 +360,13 @@ static PyObject *xxh3_128_digest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_digest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_digest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH128_hash_t intdigest = XXH3_128bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char retbuf[XXH128_DIGESTSIZE]; XXH128_canonicalFromHash((XXH128_canonical_t *)retbuf, intdigest); @@ -404,15 +377,13 @@ static PyObject *xxh3_128_intdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_intdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_intdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH128_hash_t intdigest = XXH3_128bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); PyObject *sixtyfour = PyLong_FromLong(64); PyObject *low = PyLong_FromUnsignedLongLong(intdigest.low64); @@ -436,15 +407,13 @@ static PyObject *xxh3_128_hexdigest(PyObject *self, PyObject *const *args, { XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; - if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_hexdigest", 1, &buf, &buf_owner, &raw_seed) < 0) + if (_parse_fastcall_args(args, nargs, kwnames, "xxh3_128_hexdigest", 1, &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; XXH128_hash_t intdigest = XXH3_128bits_withSeed(buf.buf, buf.len, seed); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); char digest[XXH128_DIGESTSIZE]; XXH128_canonicalFromHash((XXH128_canonical_t *)digest, intdigest); @@ -502,11 +471,10 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); XXH32_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh32()", 0, - &buf, &buf_owner, &raw_seed) < 0) + &buf, &raw_seed) < 0) return NULL; seed = (XXH32_hash_t)raw_seed; @@ -514,23 +482,20 @@ PYXXH32_vectorcall(PyObject *type, PyObject *const *args, ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); if (self == NULL) { PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return NULL; + return NULL; } self->xxhash_state = XXH32_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); + return PyErr_NoMemory(); } self->seed = seed; XXH32_reset(self->xxhash_state, seed); if (buf.obj) PYXXH32_do_update(self, &buf); - Py_XDECREF(buf_owner); return (PyObject *)self; } @@ -556,46 +521,135 @@ static PyObject *PYXXH32_new(PyTypeObject *type, PyObject *args, PyObject *kwarg return (PyObject *)self; } + +/* Check kwargs for unknown keys. Returns 0 if all known, -1 with TypeError. */ +static Py_ALWAYS_INLINE int +_check_kwargs(PyObject *kwargs) +{ + if (!kwargs) + return 0; + Py_ssize_t pos = 0; + PyObject *key, *val; + while (PyDict_Next(kwargs, &pos, &key, &val)) { + if (PyUnicode_CompareWithASCIIString(key, "data") == 0 || + PyUnicode_CompareWithASCIIString(key, "seed") == 0) + continue; + PyErr_Format(PyExc_TypeError, + "'%U' is an invalid keyword argument for this function", + key); + return -1; + } + return 0; +} static int PYXXH32_init(PYXXH32Object *self, PyObject *args, PyObject *kwargs) { XXH32_hash_t seed = 0; - char *keywords[] = {"input", "seed", NULL}; - Py_buffer buf; + PyObject *data_obj = NULL; + Py_buffer buf = {NULL, NULL}; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); - buf.buf = buf.obj = NULL; + if (_check_kwargs(kwargs) < 0) + return -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s*I:__init__", keywords, &buf, &seed)) { + if (nargs >= 1) { + data_obj = PyTuple_GET_ITEM(args, 0); + if (kwargs && PyDict_GetItemString(kwargs, "data")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'data'"); + return -1; + } + } + if (nargs >= 2) { + seed = (XXH32_hash_t)PyLong_AsUnsignedLongMask(PyTuple_GET_ITEM(args, 1)); + if (PyErr_Occurred()) return -1; + if (kwargs && PyDict_GetItemString(kwargs, "seed")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'seed'"); + return -1; + } + } + if (nargs > 2) { + PyErr_SetString(PyExc_TypeError, + "__init__() takes at most 2 positional arguments"); return -1; } + if (kwargs) { + PyObject *val = PyDict_GetItemString(kwargs, "data"); + if (val) { + if (data_obj) return -1; /* unreachable, caught above */ + data_obj = val; + } + val = PyDict_GetItemString(kwargs, "seed"); + if (val) { + seed = (XXH32_hash_t)PyLong_AsUnsignedLongMask(val); + if (PyErr_Occurred()) return -1; + } + } + + if (data_obj) { + if (_get_buffer_or_str(data_obj, &buf) < 0) + return -1; + } + self->seed = seed; XXH32_reset(self->xxhash_state, seed); - if (buf.obj) { + if (buf.obj) PYXXH32_do_update(self, &buf); - } - return 0; } PyDoc_STRVAR( PYXXH32_update_doc, - "update (input)\n\n" - "Update the xxh32 object with the string input. Repeated calls are\n" + "update (data)\n\n" + "Update the xxh32 object with bytes-like data. Repeated calls are\n" "equivalent to a single call with the concatenation of all the arguments."); -static PyObject *PYXXH32_update(PYXXH32Object *self, PyObject *args) +static PyObject *PYXXH32_update(PYXXH32Object *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames) { - Py_buffer buf; + PyObject *arg = NULL; - buf.buf = buf.obj = NULL; + /* validate keywords first */ + if (kwnames) { + Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames); + for (Py_ssize_t i = 0; i < nkw; i++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, i); + if (PyUnicode_CompareWithASCIIString(key, "data") == 0) { + if (nargs >= 1) { + PyErr_SetString(PyExc_TypeError, + "xxh32.update() got multiple values for argument 'data'"); + return NULL; + } + arg = args[nargs + i]; + } else { + PyErr_Format(PyExc_TypeError, + "'%U' is an invalid keyword argument for 'xxh32.update()'", + key); + return NULL; + } + } + } - if (!PyArg_ParseTuple(args, "s*:update", &buf)) { + if (nargs >= 1) { + if (nargs > 1) { + PyErr_Format(PyExc_TypeError, + "xxh32.update() takes at most 1 positional argument (%zd given)", nargs); + return NULL; + } + arg = args[0]; + } + + if (!arg) { + PyErr_SetString(PyExc_TypeError, "xxh32.update() missing required argument 'data'"); return NULL; } + Py_buffer buf; + if (_get_buffer_or_str(arg, &buf) < 0) + return NULL; PYXXH32_do_update(self, &buf); - Py_RETURN_NONE; } @@ -603,7 +657,7 @@ static PyObject *PYXXH32_update(PYXXH32Object *self, PyObject *args) PyDoc_STRVAR( PYXXH32_digest_doc, "digest() -> string\n\n" - "Return the digest of the strings passed to the update() method so\n" + "Return the digest of the data passed to the update() method so\n" "far. This is a 4-byte string which may contain non-ASCII characters,\n" "including null bytes."); @@ -695,7 +749,7 @@ static PyObject *PYXXH32_reset(PYXXH32Object *self) } static PyMethodDef PYXXH32_methods[] = { - {"update", (PyCFunction)PYXXH32_update, METH_VARARGS, PYXXH32_update_doc}, + {"update", (PyCFunction)PYXXH32_update, METH_FASTCALL | METH_KEYWORDS, PYXXH32_update_doc}, {"digest", (PyCFunction)PYXXH32_digest, METH_NOARGS, PYXXH32_digest_doc}, {"hexdigest", (PyCFunction)PYXXH32_hexdigest, METH_NOARGS, PYXXH32_hexdigest_doc}, {"intdigest", (PyCFunction)PYXXH32_intdigest, METH_NOARGS, PYXXH32_intdigest_doc}, @@ -768,7 +822,7 @@ PyDoc_STRVAR( "\n" "Methods:\n" "\n" - "update(input) -- updates the current digest with the provided string.\n" + "update(data) -- updates the current digest with the provided data.\n" "digest() -- return the current digest value\n" "hexdigest() -- return the current digest as a string of hexadecimal digits\n" "intdigest() -- return the current digest as an integer\n" @@ -861,11 +915,10 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh64()", 0, - &buf, &buf_owner, &raw_seed) < 0) + &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -873,23 +926,20 @@ PYXXH64_vectorcall(PyObject *type, PyObject *const *args, ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); if (self == NULL) { PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return NULL; + return NULL; } self->xxhash_state = XXH64_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); + return PyErr_NoMemory(); } self->seed = seed; XXH64_reset(self->xxhash_state, seed); if (buf.obj) PYXXH64_do_update(self, &buf); - Py_XDECREF(buf_owner); return (PyObject *)self; } static PyObject *PYXXH64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -915,50 +965,119 @@ static PyObject *PYXXH64_new(PyTypeObject *type, PyObject *args, PyObject *kwarg static int PYXXH64_init(PYXXH64Object *self, PyObject *args, PyObject *kwargs) { XXH64_hash_t seed = 0; - char *keywords[] = {"input", "seed", NULL}; - Py_buffer buf; + PyObject *data_obj = NULL; + Py_buffer buf = {NULL, NULL}; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); - buf.buf = buf.obj = NULL; + if (_check_kwargs(kwargs) < 0) + return -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s*K:__init__", keywords, &buf, &seed)) { + if (nargs >= 1) { + data_obj = PyTuple_GET_ITEM(args, 0); + if (kwargs && PyDict_GetItemString(kwargs, "data")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'data'"); + return -1; + } + } + if (nargs >= 2) { + seed = PyLong_AsUnsignedLongLongMask(PyTuple_GET_ITEM(args, 1)); + if (PyErr_Occurred()) return -1; + if (kwargs && PyDict_GetItemString(kwargs, "seed")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'seed'"); + return -1; + } + } + if (nargs > 2) { + PyErr_SetString(PyExc_TypeError, + "__init__() takes at most 2 positional arguments"); return -1; } + if (kwargs) { + PyObject *val = PyDict_GetItemString(kwargs, "data"); + if (val) { + if (data_obj) return -1; /* unreachable, caught above */ + data_obj = val; + } + val = PyDict_GetItemString(kwargs, "seed"); + if (val) { + seed = PyLong_AsUnsignedLongLongMask(val); + if (PyErr_Occurred()) return -1; + } + } + + if (data_obj) { + if (_get_buffer_or_str(data_obj, &buf) < 0) + return -1; + } + self->seed = seed; XXH64_reset(self->xxhash_state, seed); - if (buf.obj) { + if (buf.obj) PYXXH64_do_update(self, &buf); - } - return 0; } PyDoc_STRVAR( PYXXH64_update_doc, - "update (input)\n\n" - "Update the xxh64 object with the string input. Repeated calls are\n" + "update (data)\n\n" + "Update the xxh64 object with bytes-like data. Repeated calls are\n" "equivalent to a single call with the concatenation of all the arguments."); -static PyObject *PYXXH64_update(PYXXH64Object *self, PyObject *args) +static PyObject *PYXXH64_update(PYXXH64Object *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames) { - Py_buffer buf; + PyObject *arg = NULL; + + /* validate keywords first */ + if (kwnames) { + Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames); + for (Py_ssize_t i = 0; i < nkw; i++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, i); + if (PyUnicode_CompareWithASCIIString(key, "data") == 0) { + if (nargs >= 1) { + PyErr_SetString(PyExc_TypeError, + "xxh64.update() got multiple values for argument 'data'"); + return NULL; + } + arg = args[nargs + i]; + } else { + PyErr_Format(PyExc_TypeError, + "'%U' is an invalid keyword argument for 'xxh64.update()'", + key); + return NULL; + } + } + } - buf.buf = buf.obj = NULL; + if (nargs >= 1) { + if (nargs > 1) { + PyErr_Format(PyExc_TypeError, + "xxh64.update() takes at most 1 positional argument (%zd given)", nargs); + return NULL; + } + arg = args[0]; + } - if (!PyArg_ParseTuple(args, "s*:update", &buf)) { + if (!arg) { + PyErr_SetString(PyExc_TypeError, "xxh64.update() missing required argument 'data'"); return NULL; } + Py_buffer buf; + if (_get_buffer_or_str(arg, &buf) < 0) + return NULL; PYXXH64_do_update(self, &buf); - Py_RETURN_NONE; } PyDoc_STRVAR( PYXXH64_digest_doc, "digest() -> string\n\n" - "Return the digest of the strings passed to the update() method so\n" + "Return the digest of the data passed to the update() method so\n" "far. This is a 8-byte string which may contain non-ASCII characters,\n" "including null bytes."); @@ -1051,7 +1170,7 @@ static PyObject *PYXXH64_reset(PYXXH64Object *self) } static PyMethodDef PYXXH64_methods[] = { - {"update", (PyCFunction)PYXXH64_update, METH_VARARGS, PYXXH64_update_doc}, + {"update", (PyCFunction)PYXXH64_update, METH_FASTCALL | METH_KEYWORDS, PYXXH64_update_doc}, {"digest", (PyCFunction)PYXXH64_digest, METH_NOARGS, PYXXH64_digest_doc}, {"hexdigest", (PyCFunction)PYXXH64_hexdigest, METH_NOARGS, PYXXH64_hexdigest_doc}, {"intdigest", (PyCFunction)PYXXH64_intdigest, METH_NOARGS, PYXXH64_intdigest_doc}, @@ -1124,7 +1243,7 @@ PyDoc_STRVAR( "\n" "Methods:\n" "\n" - "update(input) -- updates the current digest with an additional string\n" + "update(data) -- updates the current digest with additional data\n" "digest() -- return the current digest value\n" "hexdigest() -- return the current digest as a string of hexadecimal digits\n" "intdigest() -- return the current digest as an integer\n" @@ -1216,11 +1335,10 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh3_64()", 0, - &buf, &buf_owner, &raw_seed) < 0) + &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -1228,23 +1346,20 @@ PYXXH3_64_vectorcall(PyObject *type, PyObject *const *args, ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); if (self == NULL) { PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return NULL; + return NULL; } self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); + return PyErr_NoMemory(); } self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); if (buf.obj) PYXXH3_64_do_update(self, &buf); - Py_XDECREF(buf_owner); return (PyObject *)self; } static PyObject *PYXXH3_64_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -1270,50 +1385,119 @@ static PyObject *PYXXH3_64_new(PyTypeObject *type, PyObject *args, PyObject *kwa static int PYXXH3_64_init(PYXXH3_64Object *self, PyObject *args, PyObject *kwargs) { XXH64_hash_t seed = 0; - char *keywords[] = {"input", "seed", NULL}; - Py_buffer buf; + PyObject *data_obj = NULL; + Py_buffer buf = {NULL, NULL}; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); - buf.buf = buf.obj = NULL; + if (_check_kwargs(kwargs) < 0) + return -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s*K:__init__", keywords, &buf, &seed)) { + if (nargs >= 1) { + data_obj = PyTuple_GET_ITEM(args, 0); + if (kwargs && PyDict_GetItemString(kwargs, "data")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'data'"); + return -1; + } + } + if (nargs >= 2) { + seed = PyLong_AsUnsignedLongLongMask(PyTuple_GET_ITEM(args, 1)); + if (PyErr_Occurred()) return -1; + if (kwargs && PyDict_GetItemString(kwargs, "seed")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'seed'"); + return -1; + } + } + if (nargs > 2) { + PyErr_SetString(PyExc_TypeError, + "__init__() takes at most 2 positional arguments"); return -1; } + if (kwargs) { + PyObject *val = PyDict_GetItemString(kwargs, "data"); + if (val) { + if (data_obj) return -1; /* unreachable, caught above */ + data_obj = val; + } + val = PyDict_GetItemString(kwargs, "seed"); + if (val) { + seed = PyLong_AsUnsignedLongLongMask(val); + if (PyErr_Occurred()) return -1; + } + } + + if (data_obj) { + if (_get_buffer_or_str(data_obj, &buf) < 0) + return -1; + } + self->seed = seed; XXH3_64bits_reset_withSeed(self->xxhash_state, seed); - if (buf.obj) { + if (buf.obj) PYXXH3_64_do_update(self, &buf); - } - return 0; } PyDoc_STRVAR( PYXXH3_64_update_doc, - "update (input)\n\n" - "Update the xxh3_64 object with the string input. Repeated calls are\n" + "update (data)\n\n" + "Update the xxh3_64 object with bytes-like data. Repeated calls are\n" "equivalent to a single call with the concatenation of all the arguments."); -static PyObject *PYXXH3_64_update(PYXXH3_64Object *self, PyObject *args) +static PyObject *PYXXH3_64_update(PYXXH3_64Object *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames) { - Py_buffer buf; + PyObject *arg = NULL; + + /* validate keywords first */ + if (kwnames) { + Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames); + for (Py_ssize_t i = 0; i < nkw; i++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, i); + if (PyUnicode_CompareWithASCIIString(key, "data") == 0) { + if (nargs >= 1) { + PyErr_SetString(PyExc_TypeError, + "xxh3_64.update() got multiple values for argument 'data'"); + return NULL; + } + arg = args[nargs + i]; + } else { + PyErr_Format(PyExc_TypeError, + "'%U' is an invalid keyword argument for 'xxh3_64.update()'", + key); + return NULL; + } + } + } - buf.buf = buf.obj = NULL; + if (nargs >= 1) { + if (nargs > 1) { + PyErr_Format(PyExc_TypeError, + "xxh3_64.update() takes at most 1 positional argument (%zd given)", nargs); + return NULL; + } + arg = args[0]; + } - if (!PyArg_ParseTuple(args, "s*:update", &buf)) { + if (!arg) { + PyErr_SetString(PyExc_TypeError, "xxh3_64.update() missing required argument 'data'"); return NULL; } + Py_buffer buf; + if (_get_buffer_or_str(arg, &buf) < 0) + return NULL; PYXXH3_64_do_update(self, &buf); - Py_RETURN_NONE; } PyDoc_STRVAR( PYXXH3_64_digest_doc, "digest() -> string\n\n" - "Return the digest of the strings passed to the update() method so\n" + "Return the digest of the data passed to the update() method so\n" "far. This is a 8-byte string which may contain non-ASCII characters,\n" "including null bytes."); @@ -1414,7 +1598,7 @@ static PyObject *PYXXH3_64_reset(PYXXH3_64Object *self) } static PyMethodDef PYXXH3_64_methods[] = { - {"update", (PyCFunction)PYXXH3_64_update, METH_VARARGS, PYXXH3_64_update_doc}, + {"update", (PyCFunction)PYXXH3_64_update, METH_FASTCALL | METH_KEYWORDS, PYXXH3_64_update_doc}, {"digest", (PyCFunction)PYXXH3_64_digest, METH_NOARGS, PYXXH3_64_digest_doc}, {"hexdigest", (PyCFunction)PYXXH3_64_hexdigest, METH_NOARGS, PYXXH3_64_hexdigest_doc}, {"intdigest", (PyCFunction)PYXXH3_64_intdigest, METH_NOARGS, PYXXH3_64_intdigest_doc}, @@ -1487,7 +1671,7 @@ PyDoc_STRVAR( "\n" "Methods:\n" "\n" - "update(input) -- updates the current digest with an additional string\n" + "update(data) -- updates the current digest with additional data\n" "digest() -- return the current digest value\n" "hexdigest() -- return the current digest as a string of hexadecimal digits\n" "intdigest() -- return the current digest as an integer\n" @@ -1580,11 +1764,10 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); XXH64_hash_t seed = 0; Py_buffer buf; - PyObject *buf_owner; unsigned long long raw_seed; if (_parse_fastcall_args(args, nargs, kwnames, "xxhash.xxh3_128()", 0, - &buf, &buf_owner, &raw_seed) < 0) + &buf, &raw_seed) < 0) return NULL; seed = (XXH64_hash_t)raw_seed; @@ -1592,23 +1775,20 @@ PYXXH3_128_vectorcall(PyObject *type, PyObject *const *args, ((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); if (self == NULL) { PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return NULL; + return NULL; } self->xxhash_state = XXH3_createState(); if (self->xxhash_state == NULL) { Py_DECREF(self); PyBuffer_Release(&buf); - Py_XDECREF(buf_owner); - return PyErr_NoMemory(); + return PyErr_NoMemory(); } self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); if (buf.obj) PYXXH3_128_do_update(self, &buf); - Py_XDECREF(buf_owner); return (PyObject *)self; } static PyObject *PYXXH3_128_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -1634,49 +1814,119 @@ static PyObject *PYXXH3_128_new(PyTypeObject *type, PyObject *args, PyObject *kw static int PYXXH3_128_init(PYXXH3_128Object *self, PyObject *args, PyObject *kwargs) { XXH64_hash_t seed = 0; - char *keywords[] = {"input", "seed", NULL}; - Py_buffer buf; + PyObject *data_obj = NULL; + Py_buffer buf = {NULL, NULL}; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); - buf.buf = buf.obj = NULL; + if (_check_kwargs(kwargs) < 0) + return -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s*K:__init__", keywords, &buf, &seed)) { + if (nargs >= 1) { + data_obj = PyTuple_GET_ITEM(args, 0); + if (kwargs && PyDict_GetItemString(kwargs, "data")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'data'"); + return -1; + } + } + if (nargs >= 2) { + seed = PyLong_AsUnsignedLongLongMask(PyTuple_GET_ITEM(args, 1)); + if (PyErr_Occurred()) return -1; + if (kwargs && PyDict_GetItemString(kwargs, "seed")) { + PyErr_SetString(PyExc_TypeError, + "__init__() got multiple values for argument 'seed'"); + return -1; + } + } + if (nargs > 2) { + PyErr_SetString(PyExc_TypeError, + "__init__() takes at most 2 positional arguments"); return -1; } + if (kwargs) { + PyObject *val = PyDict_GetItemString(kwargs, "data"); + if (val) { + if (data_obj) return -1; /* unreachable, caught above */ + data_obj = val; + } + val = PyDict_GetItemString(kwargs, "seed"); + if (val) { + seed = PyLong_AsUnsignedLongLongMask(val); + if (PyErr_Occurred()) return -1; + } + } + + if (data_obj) { + if (_get_buffer_or_str(data_obj, &buf) < 0) + return -1; + } + self->seed = seed; XXH3_128bits_reset_withSeed(self->xxhash_state, seed); - if (buf.obj) { + if (buf.obj) PYXXH3_128_do_update(self, &buf); - } - return 0; } PyDoc_STRVAR( PYXXH3_128_update_doc, - "update (input)\n\n" - "Update the xxh3_128 object with the string input. Repeated calls are\n" + "update (data)\n\n" + "Update the xxh3_128 object with bytes-like data. Repeated calls are\n" "equivalent to a single call with the concatenation of all the arguments."); -static PyObject *PYXXH3_128_update(PYXXH3_128Object *self, PyObject *args) +static PyObject *PYXXH3_128_update(PYXXH3_128Object *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames) { - Py_buffer buf; - buf.buf = buf.obj = NULL; + PyObject *arg = NULL; + + /* validate keywords first */ + if (kwnames) { + Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames); + for (Py_ssize_t i = 0; i < nkw; i++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, i); + if (PyUnicode_CompareWithASCIIString(key, "data") == 0) { + if (nargs >= 1) { + PyErr_SetString(PyExc_TypeError, + "xxh3_128.update() got multiple values for argument 'data'"); + return NULL; + } + arg = args[nargs + i]; + } else { + PyErr_Format(PyExc_TypeError, + "'%U' is an invalid keyword argument for 'xxh3_128.update()'", + key); + return NULL; + } + } + } - if (!PyArg_ParseTuple(args, "s*:update", &buf)) { + if (nargs >= 1) { + if (nargs > 1) { + PyErr_Format(PyExc_TypeError, + "xxh3_128.update() takes at most 1 positional argument (%zd given)", nargs); + return NULL; + } + arg = args[0]; + } + + if (!arg) { + PyErr_SetString(PyExc_TypeError, "xxh3_128.update() missing required argument 'data'"); return NULL; } + Py_buffer buf; + if (_get_buffer_or_str(arg, &buf) < 0) + return NULL; PYXXH3_128_do_update(self, &buf); - Py_RETURN_NONE; } PyDoc_STRVAR( PYXXH3_128_digest_doc, "digest() -> string\n\n" - "Return the digest of the strings passed to the update() method so\n" + "Return the digest of the data passed to the update() method so\n" "far. This is a 16-byte string which may contain non-ASCII characters,\n" "including null bytes."); @@ -1795,7 +2045,7 @@ static PyObject *PYXXH3_128_reset(PYXXH3_128Object *self) } static PyMethodDef PYXXH3_128_methods[] = { - {"update", (PyCFunction)PYXXH3_128_update, METH_VARARGS, PYXXH3_128_update_doc}, + {"update", (PyCFunction)PYXXH3_128_update, METH_FASTCALL | METH_KEYWORDS, PYXXH3_128_update_doc}, {"digest", (PyCFunction)PYXXH3_128_digest, METH_NOARGS, PYXXH3_128_digest_doc}, {"hexdigest", (PyCFunction)PYXXH3_128_hexdigest, METH_NOARGS, PYXXH3_128_hexdigest_doc}, {"intdigest", (PyCFunction)PYXXH3_128_intdigest, METH_NOARGS, PYXXH3_128_intdigest_doc}, @@ -1868,7 +2118,7 @@ PyDoc_STRVAR( "\n" "Methods:\n" "\n" - "update(input) -- updates the current digest with an additional string\n" + "update(data) -- updates the current digest with additional data\n" "digest() -- return the current digest value\n" "hexdigest() -- return the current digest as a string of hexadecimal digits\n" "intdigest() -- return the current digest as an integer\n" diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index b915f52..7412e85 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -90,42 +90,17 @@ def test_xxh64_hexdigest_5b(): xxhash.xxh64_hexdigest(DATA_5B) -# ── str input (tests _get_buffer_or_str UTF-8 encoding path) ──────── - -DATA_STR = "hello world" - - -@pytest.mark.benchmark -def test_xxh32_intdigest_str(): - xxhash.xxh32_intdigest(DATA_STR) - - -@pytest.mark.benchmark -def test_xxh64_intdigest_str(): - xxhash.xxh64_intdigest(DATA_STR) - - -@pytest.mark.benchmark -def test_xxh3_64_intdigest_str(): - xxhash.xxh3_64_intdigest(DATA_STR) - - -@pytest.mark.benchmark -def test_xxh3_128_intdigest_str(): - xxhash.xxh3_128_intdigest(DATA_STR) - - # ── type constructor (tests tp_vectorcall) ────────────────────────── @pytest.mark.benchmark def test_xxh32_ctor(): - xxhash.xxh32(DATA_STR) + xxhash.xxh32(DATA_5B) @pytest.mark.benchmark def test_xxh32_ctor_seed(): - xxhash.xxh32(DATA_STR, seed=SEED_32) + xxhash.xxh32(DATA_5B, seed=SEED_32) @pytest.mark.benchmark @@ -135,17 +110,17 @@ def test_xxh32_ctor_empty(): @pytest.mark.benchmark def test_xxh64_ctor(): - xxhash.xxh64(DATA_STR, seed=SEED_64) + xxhash.xxh64(DATA_5B, seed=SEED_64) @pytest.mark.benchmark def test_xxh3_64_ctor(): - xxhash.xxh3_64(DATA_STR, seed=SEED_64) + xxhash.xxh3_64(DATA_5B, seed=SEED_64) @pytest.mark.benchmark def test_xxh3_128_ctor(): - xxhash.xxh3_128(DATA_STR, seed=SEED_64) + xxhash.xxh3_128(DATA_5B, seed=SEED_64) # ── 2MB throughput: hashing dominates, call overhead negligible ───── diff --git a/tests/test_fastcall.py b/tests/test_fastcall.py index 4b093c6..e27549e 100644 --- a/tests/test_fastcall.py +++ b/tests/test_fastcall.py @@ -36,9 +36,12 @@ def test_input_bytes(self): self._check(a, self.data) def test_input_str(self): + """hashlib compatibility: str raises TypeError.""" s = self.data.decode() for a in self.algorithms: - self._check(a, s) + for fn in self._funcs(a): + with self.assertRaises(TypeError): + fn(s) def test_input_empty(self): for a in self.algorithms: @@ -64,13 +67,13 @@ def test_positional_seed_xxh3_128(self): # ── keyword input ───────────────────────────────────────────── - def test_keyword_input(self): + def test_keyword_data(self): for a in self.algorithms: - self._check(a, input=self.data) + self._check(a, data=self.data) - def test_keyword_input_and_seed(self): + def test_keyword_data_and_seed(self): for a in self.algorithms: - self._check(a, input=self.data, seed=42) + self._check(a, data=self.data, seed=42) # ── keyword seed (with positional input) ────────────────────── @@ -105,6 +108,33 @@ def test_input_array(self): for a in self.algorithms: self._check(a, array.array('B', self.data)) + def test_input_mmap(self): + import mmap, tempfile, os + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(self.data) + f.flush() + try: + with open(f.name, 'rb') as f2: + with mmap.mmap(f2.fileno(), 0, access=mmap.ACCESS_READ) as m: + for a in self.algorithms: + self._check(a, m) + finally: + os.unlink(f.name) + + def test_input_pickle_buffer(self): + try: + from pickle import PickleBuffer + except ImportError: + raise self.skipTest('PickleBuffer not available') + for a in self.algorithms: + self._check(a, PickleBuffer(self.data)) + + def test_input_ctypes(self): + import ctypes + buf = (ctypes.c_char * len(self.data)).from_buffer_copy(self.data) + for a in self.algorithms: + self._check(a, buf) + class TestFastcallErrors(unittest.TestCase): """Invalid argument passing: all error cases.""" @@ -141,16 +171,17 @@ def test_too_many_positional(self): # ── unknown keyword ─────────────────────────────────────────── - def test_unknown_keyword(self): - self._assert_all_raise(TypeError, self.data, bad=1) + def test_unknown_keyword_input(self): + """Old 'input' keyword is now unknown — was renamed to 'data'.""" + self._assert_all_raise(TypeError, input=self.data) - def test_unknown_keyword_input_kw(self): - self._assert_all_raise(TypeError, input=self.data, bad=1) + def test_unknown_keyword_data_kw(self): + self._assert_all_raise(TypeError, data=self.data, bad=1) # ── duplicate arguments ─────────────────────────────────────── def test_duplicate_input(self): - self._assert_all_raise(TypeError, self.data, input=self.data) + self._assert_all_raise(TypeError, self.data, data=self.data) def test_duplicate_seed(self): self._assert_all_raise(TypeError, self.data, 0, seed=1) @@ -164,7 +195,7 @@ def test_invalid_seed_keyword(self): self._assert_all_raise(TypeError, self.data, seed='bad') def test_invalid_seed_with_input_kw(self): - self._assert_all_raise(TypeError, input=self.data, seed='bad') + self._assert_all_raise(TypeError, data=self.data, seed='bad') # ── invalid input type (not str, not buffer) ────────────────── @@ -172,7 +203,7 @@ def test_input_not_bytes_or_str(self): self._assert_all_raise(TypeError, 12345) def test_input_not_bytes_or_str_kw(self): - self._assert_all_raise(TypeError, input=12345) + self._assert_all_raise(TypeError, data=12345) class TestFastcallSeedOverflow(unittest.TestCase): diff --git a/tests/test_hashlib_compat.py b/tests/test_hashlib_compat.py new file mode 100644 index 0000000..fa21b90 --- /dev/null +++ b/tests/test_hashlib_compat.py @@ -0,0 +1,164 @@ +"""Tests for hashlib compatibility.""" +import unittest +import xxhash + + +class TestHashlibCompat(unittest.TestCase): + """Verify hashlib-compatible interface.""" + + data = b'hello world' + + def test_algorithms_available(self): + self.assertIsInstance(xxhash.algorithms_available, set) + for a in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128', 'xxh128'): + self.assertIn(a, xxhash.algorithms_available) + + def test_algorithms_guaranteed(self): + self.assertEqual(xxhash.algorithms_guaranteed, xxhash.algorithms_available) + + # ── str rejection ────────────────────────────────────────────── + + def test_str_rejected(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + for fn in (getattr(xxhash, f'{algo}_digest'), + getattr(xxhash, f'{algo}_intdigest'), + getattr(xxhash, f'{algo}_hexdigest')): + # positional str + with self.assertRaisesRegex(TypeError, + 'Strings must be encoded before hashing'): + fn('hello') + # keyword str + with self.assertRaisesRegex(TypeError, + 'Strings must be encoded before hashing'): + fn(data='hello') + # None + with self.assertRaisesRegex(TypeError, + 'object supporting the buffer API required'): + fn(None) + + def test_str_rejected_constructor(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + cls = getattr(xxhash, algo) + # positional str + with self.assertRaisesRegex(TypeError, + 'Strings must be encoded before hashing'): + cls('hello') + # keyword str + with self.assertRaisesRegex(TypeError, + 'Strings must be encoded before hashing'): + cls(data='hello') + # None + with self.assertRaisesRegex(TypeError, + 'object supporting the buffer API required'): + cls(None) + with self.assertRaisesRegex(TypeError, + 'object supporting the buffer API required'): + cls(data=None) + + def test_str_rejected_update(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + obj = getattr(xxhash, algo)() + with self.assertRaisesRegex(TypeError, + 'Strings must be encoded before hashing'): + obj.update('hello') + # also test that bytes work after + obj.update(b'hello') + self.assertIsInstance(obj.intdigest(), int) + # None + with self.assertRaisesRegex(TypeError, + 'object supporting the buffer API required'): + obj.update(None) + with self.assertRaisesRegex(TypeError, + 'object supporting the buffer API required'): + obj.update(data=None) + + # ── unknown keyword ─────────────────────────────────────────── + + def test_unknown_keyword(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + cls = getattr(xxhash, algo) + with self.assertRaises(TypeError): + cls(b'hello', bad=1) + with self.assertRaises(TypeError): + cls(data=b'hello', bad=1) + obj = cls() + with self.assertRaises(TypeError): + obj.update(b'hello', bad=1) + with self.assertRaises(TypeError): + obj.update(data=b'hello', bad=1) + + # ── data keyword ─────────────────────────────────────────────── + + def test_data_keyword(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + obj = getattr(xxhash, algo)(self.data) + d_fn = getattr(xxhash, f'{algo}_digest') + i_fn = getattr(xxhash, f'{algo}_intdigest') + h_fn = getattr(xxhash, f'{algo}_hexdigest') + self.assertEqual(d_fn(data=self.data), obj.digest()) + self.assertEqual(i_fn(data=self.data), obj.intdigest()) + self.assertEqual(h_fn(data=self.data), obj.hexdigest()) + + def test_data_keyword_constructor(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + cls = getattr(xxhash, algo) + obj = cls(data=self.data) + self.assertEqual(obj.intdigest(), + getattr(xxhash, f'{algo}_intdigest')(self.data)) + + # ── digest_size / block_size / name ──────────────────────────── + + def test_digest_size(self): + self.assertEqual(xxhash.xxh32().digest_size, 4) + self.assertEqual(xxhash.xxh64().digest_size, 8) + self.assertEqual(xxhash.xxh3_64().digest_size, 8) + self.assertEqual(xxhash.xxh3_128().digest_size, 16) + + def test_block_size(self): + self.assertEqual(xxhash.xxh32().block_size, 16) + self.assertEqual(xxhash.xxh64().block_size, 32) + self.assertEqual(xxhash.xxh3_64().block_size, 32) + self.assertEqual(xxhash.xxh3_128().block_size, 64) + + def test_name(self): + self.assertEqual(xxhash.xxh32().name, 'XXH32') + self.assertEqual(xxhash.xxh64().name, 'XXH64') + self.assertEqual(xxhash.xxh3_64().name, 'XXH3_64') + self.assertEqual(xxhash.xxh3_128().name, 'XXH3_128') + + # ── digest / hexdigest ───────────────────────────────────────── + + def test_digest(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + obj = getattr(xxhash, algo)(self.data) + d_fn = getattr(xxhash, f'{algo}_digest') + self.assertEqual(obj.digest(), d_fn(self.data)) + self.assertIsInstance(obj.digest(), bytes) + self.assertEqual(len(obj.digest()), obj.digest_size) + + def test_hexdigest(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + obj = getattr(xxhash, algo)(self.data) + h_fn = getattr(xxhash, f'{algo}_hexdigest') + self.assertEqual(obj.hexdigest(), h_fn(self.data)) + self.assertIsInstance(obj.hexdigest(), str) + self.assertEqual(len(obj.hexdigest()), obj.digest_size * 2) + + # ── update ───────────────────────────────────────────────────── + + def test_update(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + a = getattr(xxhash, algo)() + a.update(self.data) + b = getattr(xxhash, algo)(self.data) + self.assertEqual(a.digest(), b.digest()) + + # ── copy ─────────────────────────────────────────────────────── + + def test_copy(self): + for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'): + a = getattr(xxhash, algo)(self.data) + b = a.copy() + self.assertEqual(a.digest(), b.digest()) + b.update(b'more') + self.assertNotEqual(a.digest(), b.digest()) diff --git a/tests/test_xxh32.py b/tests/test_xxh32.py index c67ee03..24584a3 100644 --- a/tests/test_xxh32.py +++ b/tests/test_xxh32.py @@ -6,40 +6,40 @@ class TestXXH(unittest.TestCase): def test_xxh32(self): - self.assertEqual(xxhash.xxh32('a').intdigest(), 1426945110) - self.assertEqual(xxhash.xxh32('a', 0).intdigest(), 1426945110) - self.assertEqual(xxhash.xxh32('a', 1).intdigest(), 4111757423) - self.assertEqual(xxhash.xxh32('a', 2**32-1).intdigest(), 3443684653) + self.assertEqual(xxhash.xxh32(b'a').intdigest(), 1426945110) + self.assertEqual(xxhash.xxh32(b'a', 0).intdigest(), 1426945110) + self.assertEqual(xxhash.xxh32(b'a', 1).intdigest(), 4111757423) + self.assertEqual(xxhash.xxh32(b'a', 2**32-1).intdigest(), 3443684653) def test_xxh32_intdigest(self): - self.assertEqual(xxhash.xxh32_intdigest('a'), 1426945110) - self.assertEqual(xxhash.xxh32_intdigest('a', 0), 1426945110) - self.assertEqual(xxhash.xxh32_intdigest('a', 1), 4111757423) - self.assertEqual(xxhash.xxh32_intdigest('a', 2**32-1), 3443684653) + self.assertEqual(xxhash.xxh32_intdigest(b'a'), 1426945110) + self.assertEqual(xxhash.xxh32_intdigest(b'a', 0), 1426945110) + self.assertEqual(xxhash.xxh32_intdigest(b'a', 1), 4111757423) + self.assertEqual(xxhash.xxh32_intdigest(b'a', 2**32-1), 3443684653) def test_xxh32_update(self): x = xxhash.xxh32() - x.update('a') - self.assertEqual(xxhash.xxh32('a').digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('a'), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh32('ab').digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('ab'), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh32('abc').digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('abc'), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh32(b'a').digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'a'), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh32(b'ab').digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'ab'), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh32(b'abc').digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'abc'), x.digest()) seed = random.randint(0, 2**32) x = xxhash.xxh32(seed=seed) - x.update('a') - self.assertEqual(xxhash.xxh32('a', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('a', seed), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh32('ab', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('ab', seed), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh32('abc', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh32_digest('abc', seed), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh32(b'a', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'a', seed), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh32(b'ab', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'ab', seed), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh32(b'abc', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh32_digest(b'abc', seed), x.digest()) def test_xxh32_reset(self): x = xxhash.xxh32() @@ -54,25 +54,25 @@ def test_xxh32_reset(self): def test_xxh32_copy(self): a = xxhash.xxh32() - a.update('xxhash') + a.update(b'xxhash') b = a.copy() self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) - b.update('xxhash') + b.update(b'xxhash') self.assertNotEqual(a.digest(), b.digest()) self.assertNotEqual(a.intdigest(), b.intdigest()) self.assertNotEqual(a.hexdigest(), b.hexdigest()) - a.update('xxhash') + a.update(b'xxhash') self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) def test_xxh32_overflow(self): - s = 'I want an unsigned 32-bit seed!' + s = b'I want an unsigned 32-bit seed!' a = xxhash.xxh32(s, seed=0) b = xxhash.xxh32(s, seed=2**32) self.assertEqual(a.seed, b.seed) diff --git a/tests/test_xxh3_128.py b/tests/test_xxh3_128.py index a695a10..435152c 100644 --- a/tests/test_xxh3_128.py +++ b/tests/test_xxh3_128.py @@ -6,45 +6,45 @@ class TestXXH(unittest.TestCase): def test_xxh3_128(self): - self.assertEqual(xxhash.xxh3_128('a').intdigest(), 225219434562328483135862406050043285023) - self.assertEqual(xxhash.xxh3_128('a', 0).intdigest(), 225219434562328483135862406050043285023) - self.assertEqual(xxhash.xxh3_128('a', 1).intdigest(), 337425133163118381928709500770786453280) - self.assertEqual(xxhash.xxh3_128('a', 2**64-1).intdigest(), 198297796855923085494266857744987477846) + self.assertEqual(xxhash.xxh3_128(b'a').intdigest(), 225219434562328483135862406050043285023) + self.assertEqual(xxhash.xxh3_128(b'a', 0).intdigest(), 225219434562328483135862406050043285023) + self.assertEqual(xxhash.xxh3_128(b'a', 1).intdigest(), 337425133163118381928709500770786453280) + self.assertEqual(xxhash.xxh3_128(b'a', 2**64-1).intdigest(), 198297796855923085494266857744987477846) def test_xxh3_128_intdigest(self): - self.assertEqual(xxhash.xxh3_128_intdigest('a'), 225219434562328483135862406050043285023) - self.assertEqual(xxhash.xxh3_128_intdigest('a', 0), 225219434562328483135862406050043285023) - self.assertEqual(xxhash.xxh3_128_intdigest('a', 1), 337425133163118381928709500770786453280) - self.assertEqual(xxhash.xxh3_128_intdigest('a', 2**64-1), 198297796855923085494266857744987477846) + self.assertEqual(xxhash.xxh3_128_intdigest(b'a'), 225219434562328483135862406050043285023) + self.assertEqual(xxhash.xxh3_128_intdigest(b'a', 0), 225219434562328483135862406050043285023) + self.assertEqual(xxhash.xxh3_128_intdigest(b'a', 1), 337425133163118381928709500770786453280) + self.assertEqual(xxhash.xxh3_128_intdigest(b'a', 2**64-1), 198297796855923085494266857744987477846) def test_xxh3_128_update(self): x = xxhash.xxh3_128() - x.update('a') - self.assertEqual(xxhash.xxh3_128('a').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('a'), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh3_128('ab').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('ab'), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh3_128('abc').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('abc'), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh3_128(b'a').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'a'), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh3_128(b'ab').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'ab'), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh3_128(b'abc').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'abc'), x.digest()) seed = random.randint(0, 2**64) x = xxhash.xxh3_128(seed=seed) - x.update('a') - self.assertEqual(xxhash.xxh3_128('a', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('a', seed), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh3_128('ab', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('ab', seed), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh3_128('abc', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_128_digest('abc', seed), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh3_128(b'a', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'a', seed), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh3_128(b'ab', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'ab', seed), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh3_128(b'abc', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_128_digest(b'abc', seed), x.digest()) def test_xxh3_128_reset(self): x = xxhash.xxh3_128() h = x.intdigest() - x.update('x' * 10240) + x.update(b'x' * 10240) x.reset() self.assertEqual(h, x.intdigest()) @@ -52,7 +52,7 @@ def test_xxh3_128_seed_reset(self): seed = random.randint(0, 2**64-1) x = xxhash.xxh3_128(seed=seed) h = x.intdigest() - x.update('x' * 10240) + x.update(b'x' * 10240) x.reset() self.assertEqual(h, x.intdigest()) @@ -97,25 +97,25 @@ def test_xxh3_128_seed_reset_more(self): def test_xxh3_128_copy(self): a = xxhash.xxh3_128() - a.update('xxhash') + a.update(b'xxhash') b = a.copy() self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) - b.update('xxhash') + b.update(b'xxhash') self.assertNotEqual(a.digest(), b.digest()) self.assertNotEqual(a.intdigest(), b.intdigest()) self.assertNotEqual(a.hexdigest(), b.hexdigest()) - a.update('xxhash') + a.update(b'xxhash') self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) def test_xxh3_128_overflow(self): - s = 'I want an unsigned 64-bit seed!' + s = b'I want an unsigned 64-bit seed!' a = xxhash.xxh3_128(s, seed=0) b = xxhash.xxh3_128(s, seed=2**64) self.assertEqual(a.seed, b.seed) diff --git a/tests/test_xxh3_64.py b/tests/test_xxh3_64.py index 3ff13ee..70b7d27 100644 --- a/tests/test_xxh3_64.py +++ b/tests/test_xxh3_64.py @@ -8,46 +8,46 @@ class TestXXH(unittest.TestCase): def test_xxh3_64(self): - self.assertEqual(xxhash.xxh3_64('a').intdigest(), 16629034431890738719) - self.assertEqual(xxhash.xxh3_64('a', 0).intdigest(), 16629034431890738719) - self.assertEqual(xxhash.xxh3_64('a', 1).intdigest(), 15201566949650179872) - self.assertEqual(xxhash.xxh3_64('a', 2**64-1).intdigest(), 4875116479388997462) + self.assertEqual(xxhash.xxh3_64(b'a').intdigest(), 16629034431890738719) + self.assertEqual(xxhash.xxh3_64(b'a', 0).intdigest(), 16629034431890738719) + self.assertEqual(xxhash.xxh3_64(b'a', 1).intdigest(), 15201566949650179872) + self.assertEqual(xxhash.xxh3_64(b'a', 2**64-1).intdigest(), 4875116479388997462) def test_xxh3_64_intdigest(self): - self.assertEqual(xxhash.xxh3_64_intdigest('a'), 16629034431890738719) - self.assertEqual(xxhash.xxh3_64_intdigest('a', 0), 16629034431890738719) - self.assertEqual(xxhash.xxh3_64_intdigest('a', 1), 15201566949650179872) - self.assertEqual(xxhash.xxh3_64_intdigest('a', 2**64-1), 4875116479388997462) + self.assertEqual(xxhash.xxh3_64_intdigest(b'a'), 16629034431890738719) + self.assertEqual(xxhash.xxh3_64_intdigest(b'a', 0), 16629034431890738719) + self.assertEqual(xxhash.xxh3_64_intdigest(b'a', 1), 15201566949650179872) + self.assertEqual(xxhash.xxh3_64_intdigest(b'a', 2**64-1), 4875116479388997462) def test_xxh3_64_update(self): x = xxhash.xxh3_64() - x.update('a') - self.assertEqual(xxhash.xxh3_64('a').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('a'), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh3_64('ab').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('ab'), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh3_64('abc').digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('abc'), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh3_64(b'a').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'a'), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh3_64(b'ab').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'ab'), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh3_64(b'abc').digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'abc'), x.digest()) seed = random.randint(0, 2**64) x = xxhash.xxh3_64(seed=seed) - x.update('a') - self.assertEqual(xxhash.xxh3_64('a', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('a', seed), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh3_64('ab', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('ab', seed), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh3_64('abc', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh3_64_digest('abc', seed), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh3_64(b'a', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'a', seed), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh3_64(b'ab', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'ab', seed), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh3_64(b'abc', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh3_64_digest(b'abc', seed), x.digest()) def test_xxh3_64_reset(self): x = xxhash.xxh3_64() h = x.intdigest() - x.update('x' * 10240) + x.update(b'x' * 10240) x.reset() self.assertEqual(h, x.intdigest()) @@ -56,7 +56,7 @@ def test_xxh3_64_seed_reset(self): seed = random.randint(0, 2**64-1) x = xxhash.xxh3_64(seed=seed) h = x.intdigest() - x.update('x' * 10240) + x.update(b'x' * 10240) x.reset() self.assertEqual(h, x.intdigest()) @@ -105,25 +105,25 @@ def test_xxh3_64_seed_reset_more(self): def test_xxh3_64_copy(self): a = xxhash.xxh3_64() - a.update('xxhash') + a.update(b'xxhash') b = a.copy() self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) - b.update('xxhash') + b.update(b'xxhash') self.assertNotEqual(a.digest(), b.digest()) self.assertNotEqual(a.intdigest(), b.intdigest()) self.assertNotEqual(a.hexdigest(), b.hexdigest()) - a.update('xxhash') + a.update(b'xxhash') self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) def test_xxh3_64_overflow(self): - s = 'I want an unsigned 64-bit seed!' + s = b'I want an unsigned 64-bit seed!' a = xxhash.xxh3_64(s, seed=0) b = xxhash.xxh3_64(s, seed=2**64) self.assertEqual(a.seed, b.seed) diff --git a/tests/test_xxh64.py b/tests/test_xxh64.py index e8ed707..1bb12ac 100644 --- a/tests/test_xxh64.py +++ b/tests/test_xxh64.py @@ -5,40 +5,40 @@ class TestXXH(unittest.TestCase): def test_xxh64(self): - self.assertEqual(xxhash.xxh64('a').intdigest(), 15154266338359012955) - self.assertEqual(xxhash.xxh64('a', 0).intdigest(), 15154266338359012955) - self.assertEqual(xxhash.xxh64('a', 1).intdigest(), 16051599287423682246) - self.assertEqual(xxhash.xxh64('a', 2**64-1).intdigest(), 6972758980737027682) + self.assertEqual(xxhash.xxh64(b'a').intdigest(), 15154266338359012955) + self.assertEqual(xxhash.xxh64(b'a', 0).intdigest(), 15154266338359012955) + self.assertEqual(xxhash.xxh64(b'a', 1).intdigest(), 16051599287423682246) + self.assertEqual(xxhash.xxh64(b'a', 2**64-1).intdigest(), 6972758980737027682) def test_xxh64_intdigest(self): - self.assertEqual(xxhash.xxh64_intdigest('a'), 15154266338359012955) - self.assertEqual(xxhash.xxh64_intdigest('a', 0), 15154266338359012955) - self.assertEqual(xxhash.xxh64_intdigest('a', 1), 16051599287423682246) - self.assertEqual(xxhash.xxh64_intdigest('a', 2**64-1), 6972758980737027682) + self.assertEqual(xxhash.xxh64_intdigest(b'a'), 15154266338359012955) + self.assertEqual(xxhash.xxh64_intdigest(b'a', 0), 15154266338359012955) + self.assertEqual(xxhash.xxh64_intdigest(b'a', 1), 16051599287423682246) + self.assertEqual(xxhash.xxh64_intdigest(b'a', 2**64-1), 6972758980737027682) def test_xxh64_update(self): x = xxhash.xxh64() - x.update('a') - self.assertEqual(xxhash.xxh64('a').digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('a'), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh64('ab').digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('ab'), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh64('abc').digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('abc'), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh64(b'a').digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'a'), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh64(b'ab').digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'ab'), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh64(b'abc').digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'abc'), x.digest()) seed = random.randint(0, 2**64) x = xxhash.xxh64(seed=seed) - x.update('a') - self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('a', seed), x.digest()) - x.update('b') - self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('ab', seed), x.digest()) - x.update('c') - self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest()) - self.assertEqual(xxhash.xxh64_digest('abc', seed), x.digest()) + x.update(b'a') + self.assertEqual(xxhash.xxh64(b'a', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'a', seed), x.digest()) + x.update(b'b') + self.assertEqual(xxhash.xxh64(b'ab', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'ab', seed), x.digest()) + x.update(b'c') + self.assertEqual(xxhash.xxh64(b'abc', seed).digest(), x.digest()) + self.assertEqual(xxhash.xxh64_digest(b'abc', seed), x.digest()) def test_xxh64_reset(self): x = xxhash.xxh64() @@ -53,25 +53,25 @@ def test_xxh64_reset(self): def test_xxh64_copy(self): a = xxhash.xxh64() - a.update('xxhash') + a.update(b'xxhash') b = a.copy() self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) - b.update('xxhash') + b.update(b'xxhash') self.assertNotEqual(a.digest(), b.digest()) self.assertNotEqual(a.intdigest(), b.intdigest()) self.assertNotEqual(a.hexdigest(), b.hexdigest()) - a.update('xxhash') + a.update(b'xxhash') self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) def test_xxh64_overflow(self): - s = 'I want an unsigned 64-bit seed!' + s = b'I want an unsigned 64-bit seed!' a = xxhash.xxh64(s, seed=0) b = xxhash.xxh64(s, seed=2**64) self.assertEqual(a.seed, b.seed) diff --git a/xxhash/__init__.py b/xxhash/__init__.py index 043c486..5fe0087 100644 --- a/xxhash/__init__.py +++ b/xxhash/__init__.py @@ -34,6 +34,8 @@ "xxh3_128", ]) +algorithms_guaranteed = algorithms_available + __all__ = [ "xxh32", @@ -60,4 +62,5 @@ "VERSION_TUPLE", "XXHASH_VERSION", "algorithms_available", + "algorithms_guaranteed", ] diff --git a/xxhash/version.py b/xxhash/version.py index 83f532c..923f897 100644 --- a/xxhash/version.py +++ b/xxhash/version.py @@ -1,3 +1,3 @@ -VERSION = "3.8.0.dev3" +VERSION = "3.8.0.dev6" #: Deprecated, will be removed in the next major release VERSION_TUPLE = (3, 8, 0)