Skip to content

Commit cf94296

Browse files
committed
bytes binascii performance experiments
1 parent 1a8e574 commit cf94296

File tree

3 files changed

+24
-205
lines changed

3 files changed

+24
-205
lines changed

Modules/binascii.c

Lines changed: 9 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
871871
/*[clinic input]
872872
binascii.a2b_hex
873873
874-
hexstr: ascii_buffer
874+
hexstr: object
875875
/
876876
877877
Binary data of hexadecimal representation.
@@ -881,58 +881,17 @@ This function is also available as "unhexlify()".
881881
[clinic start generated code]*/
882882

883883
static PyObject *
884-
binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
885-
/*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
884+
binascii_a2b_hex(PyObject *module, PyObject *hexstr)
885+
/*[clinic end generated code: output=6c7c03524380fc43 input=da849a2fb3d639e1]*/
886886
{
887-
const char* argbuf;
888-
Py_ssize_t arglen;
887+
// puts("binascii_a2b_hex");
889888
PyObject *retval;
890889
char* retbuf;
891-
Py_ssize_t i, j;
892-
binascii_state *state;
893-
894-
argbuf = hexstr->buf;
895-
arglen = hexstr->len;
896-
897-
assert(arglen >= 0);
898-
899-
/* XXX What should we do about strings with an odd length? Should
900-
* we add an implicit leading zero, or a trailing zero? For now,
901-
* raise an exception.
902-
*/
903-
if (arglen % 2) {
904-
state = get_binascii_state(module);
905-
if (state == NULL) {
906-
return NULL;
907-
}
908-
PyErr_SetString(state->Error, "Odd-length string");
909-
return NULL;
910-
}
911890

912-
retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
913-
if (!retval)
914-
return NULL;
891+
retval = PyBytes_FromStringAndSize(NULL, 1);
915892
retbuf = PyBytes_AS_STRING(retval);
916-
917-
for (i=j=0; i < arglen; i += 2) {
918-
unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
919-
unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
920-
if (top >= 16 || bot >= 16) {
921-
state = get_binascii_state(module);
922-
if (state == NULL) {
923-
return NULL;
924-
}
925-
PyErr_SetString(state->Error,
926-
"Non-hexadecimal digit found");
927-
goto finally;
928-
}
929-
retbuf[j++] = (top << 4) + bot;
930-
}
893+
retbuf[0] = 0;
931894
return retval;
932-
933-
finally:
934-
Py_DECREF(retval);
935-
return NULL;
936895
}
937896

938897
/*[clinic input]
@@ -944,10 +903,10 @@ hexstr must contain an even number of hex digits (upper or lower case).
944903
[clinic start generated code]*/
945904

946905
static PyObject *
947-
binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
948-
/*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
906+
binascii_unhexlify(PyObject *module, PyObject *hexstr)
907+
/*[clinic end generated code: output=ccd84f3e66cc10a6 input=dd8c012725f462da]*/
949908
{
950-
return binascii_a2b_hex_impl(module, hexstr);
909+
return binascii_a2b_hex(module, hexstr);
951910
}
952911

953912
#define MAXLINESIZE 76

Modules/clinic/binascii.c.h

Lines changed: 1 addition & 45 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Objects/bytesobject.c

Lines changed: 14 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -2497,122 +2497,26 @@ static PyObject *
24972497
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
24982498
/*[clinic end generated code: output=0973acc63661bb2e input=f37d98ed51088a21]*/
24992499
{
2500-
PyObject *result = _PyBytes_FromHex(string, 0);
2501-
if (type != &PyBytes_Type && result != NULL) {
2502-
Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2503-
}
2504-
return result;
2500+
// puts("bytes_fromhex_impl");
2501+
PyObject *retval;
2502+
char* retbuf;
2503+
2504+
retval = PyBytes_FromStringAndSize(NULL, 1);
2505+
retbuf = PyBytes_AS_STRING(retval);
2506+
retbuf[0] = 0;
2507+
return retval;
25052508
}
25062509

25072510
PyObject*
25082511
_PyBytes_FromHex(PyObject *string, int use_bytearray)
25092512
{
2510-
char *buf;
2511-
Py_ssize_t hexlen, invalid_char;
2512-
unsigned int top, bot;
2513-
const Py_UCS1 *str, *start, *end;
2514-
_PyBytesWriter writer;
2515-
Py_buffer view;
2516-
view.obj = NULL;
2517-
2518-
_PyBytesWriter_Init(&writer);
2519-
writer.use_bytearray = use_bytearray;
2520-
2521-
if (PyUnicode_Check(string)) {
2522-
hexlen = PyUnicode_GET_LENGTH(string);
2523-
2524-
if (!PyUnicode_IS_ASCII(string)) {
2525-
const void *data = PyUnicode_DATA(string);
2526-
int kind = PyUnicode_KIND(string);
2527-
Py_ssize_t i;
2528-
2529-
/* search for the first non-ASCII character */
2530-
for (i = 0; i < hexlen; i++) {
2531-
if (PyUnicode_READ(kind, data, i) >= 128)
2532-
break;
2533-
}
2534-
invalid_char = i;
2535-
goto error;
2536-
}
2537-
2538-
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2539-
str = PyUnicode_1BYTE_DATA(string);
2540-
}
2541-
else if (PyObject_CheckBuffer(string)) {
2542-
if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2543-
return NULL;
2544-
}
2545-
hexlen = view.len;
2546-
str = view.buf;
2547-
}
2548-
else {
2549-
PyErr_Format(PyExc_TypeError,
2550-
"fromhex() argument must be str or bytes-like, not %T",
2551-
string);
2552-
return NULL;
2553-
}
2513+
PyObject *retval;
2514+
char* retbuf;
25542515

2555-
/* This overestimates if there are spaces */
2556-
buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2557-
if (buf == NULL) {
2558-
goto release_buffer;
2559-
}
2560-
2561-
start = str;
2562-
end = str + hexlen;
2563-
while (str < end) {
2564-
/* skip over spaces in the input */
2565-
if (Py_ISSPACE(*str)) {
2566-
do {
2567-
str++;
2568-
} while (Py_ISSPACE(*str));
2569-
if (str >= end)
2570-
break;
2571-
}
2572-
2573-
top = _PyLong_DigitValue[*str];
2574-
if (top >= 16) {
2575-
invalid_char = str - start;
2576-
goto error;
2577-
}
2578-
str++;
2579-
2580-
bot = _PyLong_DigitValue[*str];
2581-
if (bot >= 16) {
2582-
/* Check if we had a second digit */
2583-
if (str >= end){
2584-
invalid_char = -1;
2585-
} else {
2586-
invalid_char = str - start;
2587-
}
2588-
goto error;
2589-
}
2590-
str++;
2591-
2592-
*buf++ = (unsigned char)((top << 4) + bot);
2593-
}
2594-
2595-
if (view.obj != NULL) {
2596-
PyBuffer_Release(&view);
2597-
}
2598-
return _PyBytesWriter_Finish(&writer, buf);
2599-
2600-
error:
2601-
if (invalid_char == -1) {
2602-
PyErr_SetString(PyExc_ValueError,
2603-
"fromhex() arg must contain an even number of hexadecimal digits");
2604-
} else {
2605-
PyErr_Format(PyExc_ValueError,
2606-
"non-hexadecimal number found in "
2607-
"fromhex() arg at position %zd", invalid_char);
2608-
}
2609-
_PyBytesWriter_Dealloc(&writer);
2610-
2611-
release_buffer:
2612-
if (view.obj != NULL) {
2613-
PyBuffer_Release(&view);
2614-
}
2615-
return NULL;
2516+
retval = PyBytes_FromStringAndSize(NULL, 1);
2517+
retbuf = PyBytes_AS_STRING(retval);
2518+
retbuf[0] = 0;
2519+
return retval;
26162520
}
26172521

26182522
/*[clinic input]

0 commit comments

Comments
 (0)