Skip to content

Commit fba326c

Browse files
committed
fix: use _get_buffer_or_str in tp_init for consistent str error
Parse data as PyObject* with O format instead of y*, then validate via _get_buffer_or_str. This ensures both positional and keyword str give 'Strings must be encoded before hashing' on all platforms including PyPy where tp_vectorcall falls back to tp_init.
1 parent 3394813 commit fba326c

1 file changed

Lines changed: 32 additions & 20 deletions

File tree

src/_xxhash.c

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -550,18 +550,21 @@ static PyObject *PYXXH32_new(PyTypeObject *type, PyObject *args, PyObject *kwarg
550550
static int PYXXH32_init(PYXXH32Object *self, PyObject *args, PyObject *kwargs)
551551
{
552552
XXH32_hash_t seed = 0;
553+
PyObject *data_obj = NULL;
553554
char *keywords[] = {"data", "seed", NULL};
554555
Py_buffer buf;
555556

556557
buf.buf = buf.obj = NULL;
557558

558-
if (PyTuple_GET_SIZE(args) >= 1 && PyUnicode_Check(PyTuple_GET_ITEM(args, 0))) {
559-
PyErr_SetString(PyExc_TypeError,
560-
"Strings must be encoded before hashing");
559+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OI:__init__", keywords, &data_obj, &seed)) {
561560
return -1;
562561
}
563-
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|y*I:__init__", keywords, &buf, &seed)) {
564-
return -1;
562+
563+
if (data_obj && data_obj != Py_None) {
564+
PyObject *buf_owner;
565+
if (_get_buffer_or_str(data_obj, &buf, &buf_owner) < 0)
566+
return -1;
567+
Py_XDECREF(buf_owner);
565568
}
566569

567570
self->seed = seed;
@@ -910,18 +913,21 @@ static PyObject *PYXXH64_new(PyTypeObject *type, PyObject *args, PyObject *kwarg
910913
static int PYXXH64_init(PYXXH64Object *self, PyObject *args, PyObject *kwargs)
911914
{
912915
XXH64_hash_t seed = 0;
916+
PyObject *data_obj = NULL;
913917
char *keywords[] = {"data", "seed", NULL};
914918
Py_buffer buf;
915919

916920
buf.buf = buf.obj = NULL;
917921

918-
if (PyTuple_GET_SIZE(args) >= 1 && PyUnicode_Check(PyTuple_GET_ITEM(args, 0))) {
919-
PyErr_SetString(PyExc_TypeError,
920-
"Strings must be encoded before hashing");
922+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OK:__init__", keywords, &data_obj, &seed)) {
921923
return -1;
922924
}
923-
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|y*K:__init__", keywords, &buf, &seed)) {
924-
return -1;
925+
926+
if (data_obj && data_obj != Py_None) {
927+
PyObject *buf_owner;
928+
if (_get_buffer_or_str(data_obj, &buf, &buf_owner) < 0)
929+
return -1;
930+
Py_XDECREF(buf_owner);
925931
}
926932

927933
self->seed = seed;
@@ -1269,18 +1275,21 @@ static PyObject *PYXXH3_64_new(PyTypeObject *type, PyObject *args, PyObject *kwa
12691275
static int PYXXH3_64_init(PYXXH3_64Object *self, PyObject *args, PyObject *kwargs)
12701276
{
12711277
XXH64_hash_t seed = 0;
1278+
PyObject *data_obj = NULL;
12721279
char *keywords[] = {"data", "seed", NULL};
12731280
Py_buffer buf;
12741281

12751282
buf.buf = buf.obj = NULL;
12761283

1277-
if (PyTuple_GET_SIZE(args) >= 1 && PyUnicode_Check(PyTuple_GET_ITEM(args, 0))) {
1278-
PyErr_SetString(PyExc_TypeError,
1279-
"Strings must be encoded before hashing");
1284+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OK:__init__", keywords, &data_obj, &seed)) {
12801285
return -1;
12811286
}
1282-
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|y*K:__init__", keywords, &buf, &seed)) {
1283-
return -1;
1287+
1288+
if (data_obj && data_obj != Py_None) {
1289+
PyObject *buf_owner;
1290+
if (_get_buffer_or_str(data_obj, &buf, &buf_owner) < 0)
1291+
return -1;
1292+
Py_XDECREF(buf_owner);
12841293
}
12851294

12861295
self->seed = seed;
@@ -1637,18 +1646,21 @@ static PyObject *PYXXH3_128_new(PyTypeObject *type, PyObject *args, PyObject *kw
16371646
static int PYXXH3_128_init(PYXXH3_128Object *self, PyObject *args, PyObject *kwargs)
16381647
{
16391648
XXH64_hash_t seed = 0;
1649+
PyObject *data_obj = NULL;
16401650
char *keywords[] = {"data", "seed", NULL};
16411651
Py_buffer buf;
16421652

16431653
buf.buf = buf.obj = NULL;
16441654

1645-
if (PyTuple_GET_SIZE(args) >= 1 && PyUnicode_Check(PyTuple_GET_ITEM(args, 0))) {
1646-
PyErr_SetString(PyExc_TypeError,
1647-
"Strings must be encoded before hashing");
1655+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OK:__init__", keywords, &data_obj, &seed)) {
16481656
return -1;
16491657
}
1650-
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|y*K:__init__", keywords, &buf, &seed)) {
1651-
return -1;
1658+
1659+
if (data_obj && data_obj != Py_None) {
1660+
PyObject *buf_owner;
1661+
if (_get_buffer_or_str(data_obj, &buf, &buf_owner) < 0)
1662+
return -1;
1663+
Py_XDECREF(buf_owner);
16521664
}
16531665

16541666
self->seed = seed;

0 commit comments

Comments
 (0)