Skip to content

Commit 0183a21

Browse files
authored
[mypyc] Add primitive for int to ascii conversion (#21036)
Added a primitive to convert `int`s into `bytes` objects suitable for bytes format strings, eg. `b'%d' % my_int`. Works the same as the existing `CPyTagged_Str` primitive used for formatting `str` objects, except it returns a `bytes` object instead of `str`. Currently only the bare `%d` specifier is supported, other integer specifiers and `%d` with options go through the regular CPython handling of format strings. Results in around 23% reduced runtime in this microbenchmark: ```python import time t0 = time.time() for i in range(10_000_000): val = b'%d' % i print(time.time() - t0) ``` Results: | setup | runtime (s) | | - | - | | interpreted | 0.90 | | mypyc master | 0.92 | | mypyc PR | 0.69 |
1 parent 72a2431 commit 0183a21

File tree

6 files changed

+66
-18
lines changed

6 files changed

+66
-18
lines changed

mypyc/irbuild/format_str_tokenizer.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from mypyc.irbuild.builder import IRBuilder
2626
from mypyc.irbuild.constant_fold import constant_fold_expr
2727
from mypyc.primitives.bytes_ops import bytes_build_op
28-
from mypyc.primitives.int_ops import int_to_str_op
28+
from mypyc.primitives.int_ops import int_to_ascii_op, int_to_str_op
2929
from mypyc.primitives.str_ops import str_build_op, str_op
3030

3131

@@ -225,8 +225,13 @@ def convert_format_expr_to_bytes(
225225
var_bytes = builder.accept(x)
226226
else:
227227
return None
228-
else:
229-
return None
228+
elif format_op == FormatOp.INT:
229+
if isinstance(folded := constant_fold_expr(builder, x), int):
230+
var_bytes = builder.load_literal_value(str(folded).encode("ascii"))
231+
elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
232+
var_bytes = builder.call_c(int_to_ascii_op, [builder.accept(x)], line)
233+
else:
234+
return None
230235
converted.append(var_bytes)
231236
return converted
232237

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ PyObject *CPyTagged_ToBigEndianBytes(CPyTagged self, Py_ssize_t length, int sign
154154
PyObject *CPyTagged_ToLittleEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag);
155155

156156
PyObject *CPyTagged_Str(CPyTagged n);
157+
PyObject *CPyTagged_AsciiBytes(CPyTagged n);
157158
CPyTagged CPyTagged_FromFloat(double f);
158159
PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base);
159160
PyObject *CPyLong_FromStr(PyObject *o);

mypyc/lib-rt/misc_ops.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,24 @@ PyObject *CPyTagged_Str(CPyTagged n) {
564564
}
565565
}
566566

567+
static PyObject *CPyTagged_ShortToAsciiBytes(Py_ssize_t n) {
568+
PyObject *obj = PyBytes_FromStringAndSize(NULL, MAX_INT_CHARS);
569+
if (!obj) return NULL;
570+
int len = fmt_ssize_t(PyBytes_AsString(obj), n);
571+
Py_SET_SIZE(obj, len);
572+
return obj;
573+
}
574+
575+
PyObject *CPyTagged_AsciiBytes(CPyTagged n) {
576+
if (CPyTagged_CheckShort(n)) {
577+
return CPyTagged_ShortToAsciiBytes(CPyTagged_ShortAsSsize_t(n));
578+
}
579+
PyObject *str = PyObject_Str(CPyTagged_AsObject(n));
580+
PyObject *bytes = PyUnicode_AsASCIIString(str);
581+
CPy_DECREF(str);
582+
return bytes;
583+
}
584+
567585
void CPyDebug_Print(const char *msg) {
568586
printf("%s\n", msg);
569587
fflush(stdout);

mypyc/primitives/int_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@
105105
priority=3,
106106
)
107107

108+
int_to_ascii_op = custom_op(
109+
arg_types=[int_rprimitive],
110+
return_type=bytes_rprimitive,
111+
c_function_name="CPyTagged_AsciiBytes",
112+
error_kind=ERR_MAGIC,
113+
)
114+
108115

109116
def int_binary_primitive(
110117
op: str, primitive_name: str, return_type: RType = int_rprimitive, error_kind: int = ERR_NEVER

mypyc/test-data/irbuild-bytes.test

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -163,31 +163,39 @@ def f(var: bytes, num: int) -> None:
163163
b2 = b'aaaa%bbbbb%s%d' % (var, var, num)
164164
b3 = b'%b' % var
165165
b4 = b'%ssss' % var
166+
b5 = b'%d' % num
167+
b6 = b'%d' % 42
168+
b7 = b'%d' % (67 + 2)
166169
[typing fixtures/typing-full.pyi]
167170
[out]
168171
def f(var, num):
169172
var :: bytes
170173
num :: int
171-
r0, r1, r2, b1, r3 :: bytes
172-
r4 :: tuple[bytes, bytes, int]
173-
r5, r6 :: object
174-
r7, b2, r8, b3, r9, r10, b4 :: bytes
174+
r0, r1, r2, b1, r3, r4, r5, r6, b2, r7, b3, r8, r9, b4, r10, r11, b5, r12, r13, b6, r14, r15, b7 :: bytes
175175
L0:
176176
r0 = b'aaaa'
177177
r1 = b'bbbb'
178178
r2 = CPyBytes_Build(4, r0, var, r1, var)
179179
b1 = r2
180-
r3 = b'aaaa%bbbbb%s%d'
181-
r4 = (var, var, num)
182-
r5 = box(tuple[bytes, bytes, int], r4)
183-
r6 = PyNumber_Remainder(r3, r5)
184-
r7 = cast(bytes, r6)
185-
b2 = r7
186-
r8 = CPyBytes_Build(1, var)
187-
b3 = r8
188-
r9 = b'sss'
189-
r10 = CPyBytes_Build(2, var, r9)
190-
b4 = r10
180+
r3 = CPyTagged_AsciiBytes(num)
181+
r4 = b'aaaa'
182+
r5 = b'bbbb'
183+
r6 = CPyBytes_Build(5, r4, var, r5, var, r3)
184+
b2 = r6
185+
r7 = CPyBytes_Build(1, var)
186+
b3 = r7
187+
r8 = b'sss'
188+
r9 = CPyBytes_Build(2, var, r8)
189+
b4 = r9
190+
r10 = CPyTagged_AsciiBytes(num)
191+
r11 = CPyBytes_Build(1, r10)
192+
b5 = r11
193+
r12 = b'42'
194+
r13 = CPyBytes_Build(1, r12)
195+
b6 = r13
196+
r14 = b'69'
197+
r15 = CPyBytes_Build(1, r14)
198+
b7 = r15
191199
return 1
192200

193201
[case testOptionalBytesEquality]

mypyc/test-data/run-bytes.test

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,15 @@ def test_bytes_formatting() -> None:
412412
assert b'%4x' % val == b' a'
413413
assert b'%#4x' % val == b' 0xa'
414414
assert b'%04X' % val == b'000A'
415+
assert b'%d' % val == b'10'
416+
assert b'%d' % (val + 2) == b'12'
417+
assert b'%d' % 0 == b'0'
418+
assert b'%d' % -3 == b'-3'
419+
assert b'folded: %d' % (4 - 2) == b'folded: 2'
420+
421+
large_num = 2**65
422+
assert b'number: %d' % large_num == b'number: 36893488147419103232'
423+
assert b'negative integer: %d' % (-large_num) == b'negative integer: -36893488147419103232'
415424

416425
assert b'%c' % 48 == b'0'
417426
assert b'%c' % b'a' == b'a'

0 commit comments

Comments
 (0)