Skip to content

Commit 290258a

Browse files
committed
fix: validate keywords first in update() for correct error reporting
Check unknown keywords before arg parsing in all 4 update() functions. Fixes cases where update(b'a', extra=1) reported wrong error. Also reject unknown keywords in tp_init via shared _check_kwargs helper.
1 parent 36fb95d commit 290258a

2 files changed

Lines changed: 151 additions & 68 deletions

File tree

src/_xxhash.c

Lines changed: 136 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -521,14 +521,36 @@ static PyObject *PYXXH32_new(PyTypeObject *type, PyObject *args, PyObject *kwarg
521521
return (PyObject *)self;
522522
}
523523

524+
525+
/* Check kwargs for unknown keys. Returns 0 if all known, -1 with TypeError. */
526+
static Py_ALWAYS_INLINE int
527+
_check_kwargs(PyObject *kwargs)
528+
{
529+
if (!kwargs)
530+
return 0;
531+
Py_ssize_t pos = 0;
532+
PyObject *key, *val;
533+
while (PyDict_Next(kwargs, &pos, &key, &val)) {
534+
if (PyUnicode_CompareWithASCIIString(key, "data") == 0 ||
535+
PyUnicode_CompareWithASCIIString(key, "seed") == 0)
536+
continue;
537+
PyErr_Format(PyExc_TypeError,
538+
"'%U' is an invalid keyword argument for this function",
539+
key);
540+
return -1;
541+
}
542+
return 0;
543+
}
524544
static int PYXXH32_init(PYXXH32Object *self, PyObject *args, PyObject *kwargs)
525545
{
526546
XXH32_hash_t seed = 0;
527547
PyObject *data_obj = NULL;
528548
Py_buffer buf = {NULL, NULL};
529549
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
530550

531-
/* positional args: data, then optional seed */
551+
if (_check_kwargs(kwargs) < 0)
552+
return -1;
553+
532554
if (nargs >= 1) {
533555
data_obj = PyTuple_GET_ITEM(args, 0);
534556
if (kwargs && PyDict_GetItemString(kwargs, "data")) {
@@ -552,15 +574,10 @@ static int PYXXH32_init(PYXXH32Object *self, PyObject *args, PyObject *kwargs)
552574
return -1;
553575
}
554576

555-
/* keyword args */
556577
if (kwargs) {
557578
PyObject *val = PyDict_GetItemString(kwargs, "data");
558579
if (val) {
559-
if (data_obj) {
560-
PyErr_SetString(PyExc_TypeError,
561-
"__init__() got multiple values for argument 'data'");
562-
return -1;
563-
}
580+
if (data_obj) return -1; /* unreachable, caught above */
564581
data_obj = val;
565582
}
566583
val = PyDict_GetItemString(kwargs, "seed");
@@ -592,7 +609,28 @@ PyDoc_STRVAR(
592609
static PyObject *PYXXH32_update(PYXXH32Object *self, PyObject *const *args,
593610
Py_ssize_t nargs, PyObject *kwnames)
594611
{
595-
PyObject *arg;
612+
PyObject *arg = NULL;
613+
614+
/* validate keywords first */
615+
if (kwnames) {
616+
Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames);
617+
for (Py_ssize_t i = 0; i < nkw; i++) {
618+
PyObject *key = PyTuple_GET_ITEM(kwnames, i);
619+
if (PyUnicode_CompareWithASCIIString(key, "data") == 0) {
620+
if (nargs >= 1) {
621+
PyErr_SetString(PyExc_TypeError,
622+
"xxh32.update() got multiple values for argument 'data'");
623+
return NULL;
624+
}
625+
arg = args[nargs + i];
626+
} else {
627+
PyErr_Format(PyExc_TypeError,
628+
"'%U' is an invalid keyword argument for 'xxh32.update()'",
629+
key);
630+
return NULL;
631+
}
632+
}
633+
}
596634

597635
if (nargs >= 1) {
598636
if (nargs > 1) {
@@ -601,15 +639,9 @@ static PyObject *PYXXH32_update(PYXXH32Object *self, PyObject *const *args,
601639
return NULL;
602640
}
603641
arg = args[0];
604-
if (kwnames && PyTuple_GET_SIZE(kwnames) > 0) {
605-
PyErr_SetString(PyExc_TypeError,
606-
"xxh32.update() got multiple values for argument 'data'");
607-
return NULL;
608-
}
609-
} else if (kwnames && PyTuple_GET_SIZE(kwnames) == 1 &&
610-
PyUnicode_CompareWithASCIIString(PyTuple_GET_ITEM(kwnames, 0), "data") == 0) {
611-
arg = args[0];
612-
} else {
642+
}
643+
644+
if (!arg) {
613645
PyErr_SetString(PyExc_TypeError, "xxh32.update() missing required argument 'data'");
614646
return NULL;
615647
}
@@ -937,7 +969,9 @@ static int PYXXH64_init(PYXXH64Object *self, PyObject *args, PyObject *kwargs)
937969
Py_buffer buf = {NULL, NULL};
938970
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
939971

940-
/* positional args: data, then optional seed */
972+
if (_check_kwargs(kwargs) < 0)
973+
return -1;
974+
941975
if (nargs >= 1) {
942976
data_obj = PyTuple_GET_ITEM(args, 0);
943977
if (kwargs && PyDict_GetItemString(kwargs, "data")) {
@@ -961,15 +995,10 @@ static int PYXXH64_init(PYXXH64Object *self, PyObject *args, PyObject *kwargs)
961995
return -1;
962996
}
963997

964-
/* keyword args */
965998
if (kwargs) {
966999
PyObject *val = PyDict_GetItemString(kwargs, "data");
9671000
if (val) {
968-
if (data_obj) {
969-
PyErr_SetString(PyExc_TypeError,
970-
"__init__() got multiple values for argument 'data'");
971-
return -1;
972-
}
1001+
if (data_obj) return -1; /* unreachable, caught above */
9731002
data_obj = val;
9741003
}
9751004
val = PyDict_GetItemString(kwargs, "seed");
@@ -1001,7 +1030,28 @@ PyDoc_STRVAR(
10011030
static PyObject *PYXXH64_update(PYXXH64Object *self, PyObject *const *args,
10021031
Py_ssize_t nargs, PyObject *kwnames)
10031032
{
1004-
PyObject *arg;
1033+
PyObject *arg = NULL;
1034+
1035+
/* validate keywords first */
1036+
if (kwnames) {
1037+
Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames);
1038+
for (Py_ssize_t i = 0; i < nkw; i++) {
1039+
PyObject *key = PyTuple_GET_ITEM(kwnames, i);
1040+
if (PyUnicode_CompareWithASCIIString(key, "data") == 0) {
1041+
if (nargs >= 1) {
1042+
PyErr_SetString(PyExc_TypeError,
1043+
"xxh64.update() got multiple values for argument 'data'");
1044+
return NULL;
1045+
}
1046+
arg = args[nargs + i];
1047+
} else {
1048+
PyErr_Format(PyExc_TypeError,
1049+
"'%U' is an invalid keyword argument for 'xxh64.update()'",
1050+
key);
1051+
return NULL;
1052+
}
1053+
}
1054+
}
10051055

10061056
if (nargs >= 1) {
10071057
if (nargs > 1) {
@@ -1010,15 +1060,9 @@ static PyObject *PYXXH64_update(PYXXH64Object *self, PyObject *const *args,
10101060
return NULL;
10111061
}
10121062
arg = args[0];
1013-
if (kwnames && PyTuple_GET_SIZE(kwnames) > 0) {
1014-
PyErr_SetString(PyExc_TypeError,
1015-
"xxh64.update() got multiple values for argument 'data'");
1016-
return NULL;
1017-
}
1018-
} else if (kwnames && PyTuple_GET_SIZE(kwnames) == 1 &&
1019-
PyUnicode_CompareWithASCIIString(PyTuple_GET_ITEM(kwnames, 0), "data") == 0) {
1020-
arg = args[0];
1021-
} else {
1063+
}
1064+
1065+
if (!arg) {
10221066
PyErr_SetString(PyExc_TypeError, "xxh64.update() missing required argument 'data'");
10231067
return NULL;
10241068
}
@@ -1345,7 +1389,9 @@ static int PYXXH3_64_init(PYXXH3_64Object *self, PyObject *args, PyObject *kwarg
13451389
Py_buffer buf = {NULL, NULL};
13461390
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
13471391

1348-
/* positional args: data, then optional seed */
1392+
if (_check_kwargs(kwargs) < 0)
1393+
return -1;
1394+
13491395
if (nargs >= 1) {
13501396
data_obj = PyTuple_GET_ITEM(args, 0);
13511397
if (kwargs && PyDict_GetItemString(kwargs, "data")) {
@@ -1369,15 +1415,10 @@ static int PYXXH3_64_init(PYXXH3_64Object *self, PyObject *args, PyObject *kwarg
13691415
return -1;
13701416
}
13711417

1372-
/* keyword args */
13731418
if (kwargs) {
13741419
PyObject *val = PyDict_GetItemString(kwargs, "data");
13751420
if (val) {
1376-
if (data_obj) {
1377-
PyErr_SetString(PyExc_TypeError,
1378-
"__init__() got multiple values for argument 'data'");
1379-
return -1;
1380-
}
1421+
if (data_obj) return -1; /* unreachable, caught above */
13811422
data_obj = val;
13821423
}
13831424
val = PyDict_GetItemString(kwargs, "seed");
@@ -1409,7 +1450,28 @@ PyDoc_STRVAR(
14091450
static PyObject *PYXXH3_64_update(PYXXH3_64Object *self, PyObject *const *args,
14101451
Py_ssize_t nargs, PyObject *kwnames)
14111452
{
1412-
PyObject *arg;
1453+
PyObject *arg = NULL;
1454+
1455+
/* validate keywords first */
1456+
if (kwnames) {
1457+
Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames);
1458+
for (Py_ssize_t i = 0; i < nkw; i++) {
1459+
PyObject *key = PyTuple_GET_ITEM(kwnames, i);
1460+
if (PyUnicode_CompareWithASCIIString(key, "data") == 0) {
1461+
if (nargs >= 1) {
1462+
PyErr_SetString(PyExc_TypeError,
1463+
"xxh3_64.update() got multiple values for argument 'data'");
1464+
return NULL;
1465+
}
1466+
arg = args[nargs + i];
1467+
} else {
1468+
PyErr_Format(PyExc_TypeError,
1469+
"'%U' is an invalid keyword argument for 'xxh3_64.update()'",
1470+
key);
1471+
return NULL;
1472+
}
1473+
}
1474+
}
14131475

14141476
if (nargs >= 1) {
14151477
if (nargs > 1) {
@@ -1418,15 +1480,9 @@ static PyObject *PYXXH3_64_update(PYXXH3_64Object *self, PyObject *const *args,
14181480
return NULL;
14191481
}
14201482
arg = args[0];
1421-
if (kwnames && PyTuple_GET_SIZE(kwnames) > 0) {
1422-
PyErr_SetString(PyExc_TypeError,
1423-
"xxh3_64.update() got multiple values for argument 'data'");
1424-
return NULL;
1425-
}
1426-
} else if (kwnames && PyTuple_GET_SIZE(kwnames) == 1 &&
1427-
PyUnicode_CompareWithASCIIString(PyTuple_GET_ITEM(kwnames, 0), "data") == 0) {
1428-
arg = args[0];
1429-
} else {
1483+
}
1484+
1485+
if (!arg) {
14301486
PyErr_SetString(PyExc_TypeError, "xxh3_64.update() missing required argument 'data'");
14311487
return NULL;
14321488
}
@@ -1762,7 +1818,9 @@ static int PYXXH3_128_init(PYXXH3_128Object *self, PyObject *args, PyObject *kwa
17621818
Py_buffer buf = {NULL, NULL};
17631819
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
17641820

1765-
/* positional args: data, then optional seed */
1821+
if (_check_kwargs(kwargs) < 0)
1822+
return -1;
1823+
17661824
if (nargs >= 1) {
17671825
data_obj = PyTuple_GET_ITEM(args, 0);
17681826
if (kwargs && PyDict_GetItemString(kwargs, "data")) {
@@ -1786,15 +1844,10 @@ static int PYXXH3_128_init(PYXXH3_128Object *self, PyObject *args, PyObject *kwa
17861844
return -1;
17871845
}
17881846

1789-
/* keyword args */
17901847
if (kwargs) {
17911848
PyObject *val = PyDict_GetItemString(kwargs, "data");
17921849
if (val) {
1793-
if (data_obj) {
1794-
PyErr_SetString(PyExc_TypeError,
1795-
"__init__() got multiple values for argument 'data'");
1796-
return -1;
1797-
}
1850+
if (data_obj) return -1; /* unreachable, caught above */
17981851
data_obj = val;
17991852
}
18001853
val = PyDict_GetItemString(kwargs, "seed");
@@ -1826,7 +1879,28 @@ PyDoc_STRVAR(
18261879
static PyObject *PYXXH3_128_update(PYXXH3_128Object *self, PyObject *const *args,
18271880
Py_ssize_t nargs, PyObject *kwnames)
18281881
{
1829-
PyObject *arg;
1882+
PyObject *arg = NULL;
1883+
1884+
/* validate keywords first */
1885+
if (kwnames) {
1886+
Py_ssize_t nkw = PyTuple_GET_SIZE(kwnames);
1887+
for (Py_ssize_t i = 0; i < nkw; i++) {
1888+
PyObject *key = PyTuple_GET_ITEM(kwnames, i);
1889+
if (PyUnicode_CompareWithASCIIString(key, "data") == 0) {
1890+
if (nargs >= 1) {
1891+
PyErr_SetString(PyExc_TypeError,
1892+
"xxh3_128.update() got multiple values for argument 'data'");
1893+
return NULL;
1894+
}
1895+
arg = args[nargs + i];
1896+
} else {
1897+
PyErr_Format(PyExc_TypeError,
1898+
"'%U' is an invalid keyword argument for 'xxh3_128.update()'",
1899+
key);
1900+
return NULL;
1901+
}
1902+
}
1903+
}
18301904

18311905
if (nargs >= 1) {
18321906
if (nargs > 1) {
@@ -1835,15 +1909,9 @@ static PyObject *PYXXH3_128_update(PYXXH3_128Object *self, PyObject *const *args
18351909
return NULL;
18361910
}
18371911
arg = args[0];
1838-
if (kwnames && PyTuple_GET_SIZE(kwnames) > 0) {
1839-
PyErr_SetString(PyExc_TypeError,
1840-
"xxh3_128.update() got multiple values for argument 'data'");
1841-
return NULL;
1842-
}
1843-
} else if (kwnames && PyTuple_GET_SIZE(kwnames) == 1 &&
1844-
PyUnicode_CompareWithASCIIString(PyTuple_GET_ITEM(kwnames, 0), "data") == 0) {
1845-
arg = args[0];
1846-
} else {
1912+
}
1913+
1914+
if (!arg) {
18471915
PyErr_SetString(PyExc_TypeError, "xxh3_128.update() missing required argument 'data'");
18481916
return NULL;
18491917
}

tests/test_hashlib_compat.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ def test_str_rejected_update(self):
7272
'object supporting the buffer API required'):
7373
obj.update(data=None)
7474

75+
# ── unknown keyword ───────────────────────────────────────────
76+
77+
def test_unknown_keyword(self):
78+
for algo in ('xxh32', 'xxh64', 'xxh3_64', 'xxh3_128'):
79+
cls = getattr(xxhash, algo)
80+
with self.assertRaises(TypeError):
81+
cls(b'hello', bad=1)
82+
with self.assertRaises(TypeError):
83+
cls(data=b'hello', bad=1)
84+
obj = cls()
85+
with self.assertRaises(TypeError):
86+
obj.update(b'hello', bad=1)
87+
with self.assertRaises(TypeError):
88+
obj.update(data=b'hello', bad=1)
89+
7590
# ── data keyword ───────────────────────────────────────────────
7691

7792
def test_data_keyword(self):

0 commit comments

Comments
 (0)