Skip to content

Commit e74d4aa

Browse files
authored
[mypyc] Add str.isspace primitive (#20842)
Benchmarked this locally, for 500m `isspace()` calls mypyc was at 7.5 secs vs Python's 10 secs.
1 parent 00b5064 commit e74d4aa

File tree

6 files changed

+57
-1
lines changed

6 files changed

+57
-1
lines changed

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start)
780780
Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end);
781781
CPyTagged CPyStr_Ord(PyObject *obj);
782782
PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count);
783-
783+
bool CPyStr_IsSpace(PyObject *str);
784784

785785
// Bytes operations
786786

mypyc/lib-rt/str_ops.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,3 +630,27 @@ PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count) {
630630
}
631631
return PySequence_Repeat(str, temp_count);
632632
}
633+
634+
635+
bool CPyStr_IsSpace(PyObject *str) {
636+
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
637+
if (len == 0) return false;
638+
639+
if (PyUnicode_IS_ASCII(str)) {
640+
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str);
641+
for (Py_ssize_t i = 0; i < len; i++) {
642+
if (!_Py_ascii_whitespace[data[i]])
643+
return false;
644+
}
645+
return true;
646+
}
647+
648+
int kind = PyUnicode_KIND(str);
649+
const void *data = PyUnicode_DATA(str);
650+
for (Py_ssize_t i = 0; i < len; i++) {
651+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
652+
if (!Py_UNICODE_ISSPACE(ch))
653+
return false;
654+
}
655+
return true;
656+
}

mypyc/primitives/str_ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,14 @@
397397
error_kind=ERR_NEG_INT,
398398
)
399399

400+
method_op(
401+
name="isspace",
402+
arg_types=[str_rprimitive],
403+
return_type=bool_rprimitive,
404+
c_function_name="CPyStr_IsSpace",
405+
error_kind=ERR_NEVER,
406+
)
407+
400408
# obj.decode()
401409
method_op(
402410
name="decode",

mypyc/test-data/fixtures/ir.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def removeprefix(self, prefix: str, /) -> str: ...
131131
def removesuffix(self, suffix: str, /) -> str: ...
132132
def islower(self) -> bool: ...
133133
def count(self, substr: str, start: Optional[int] = None, end: Optional[int] = None) -> int: pass
134+
def isspace(self) -> bool: ...
134135

135136
class float:
136137
def __init__(self, x: object) -> None: pass

mypyc/test-data/irbuild-str.test

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,3 +972,14 @@ def i_times_s(s, n):
972972
L0:
973973
r0 = CPyStr_Multiply(s, n)
974974
return r0
975+
976+
[case testStrIsSpace]
977+
def is_space(x: str) -> bool:
978+
return x.isspace()
979+
[out]
980+
def is_space(x):
981+
x :: str
982+
r0 :: bool
983+
L0:
984+
r0 = CPyStr_IsSpace(x)
985+
return r0

mypyc/test-data/run-strings.test

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,3 +1257,15 @@ FMT: Final = "{} {}"
12571257

12581258
def test_format() -> None:
12591259
assert FMT.format(400 + 20, "roll" + "up") == "420 rollup"
1260+
1261+
[case testIsSpace]
1262+
from typing import Any
1263+
1264+
def test_isspace() -> None:
1265+
# Verify correctness across all Unicode codepoints.
1266+
# Exercises UCS-1 (0x00-0xFF), UCS-2 (0x100-0xFFFF), and UCS-4 (0x10000-x10FFFF inclusive) string kinds.
1267+
# Any forces generic dispatch so we compare our primitive against stdlib's
1268+
for i in range(0x110000):
1269+
c = chr(i)
1270+
a: Any = c
1271+
assert c.isspace() == a.isspace()

0 commit comments

Comments
 (0)