Skip to content

Commit bbeb275

Browse files
committed
PR Feedback 1: Better tetsing, ASCII fast path
1 parent c33260c commit bbeb275

2 files changed

Lines changed: 21 additions & 21 deletions

File tree

mypyc/lib-rt/str_ops.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,10 +636,20 @@ bool CPyStr_IsSpace(PyObject *str) {
636636
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
637637
if (len == 0) return false;
638638

639+
if (PyUnicode_IS_ASCII(str)) {
640+
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str);
641+
for (Py_ssize_t i = 0; i < len; i++) {
642+
if (!_Py_ascii_whitespace[data[i]])
643+
return false;
644+
}
645+
return true;
646+
}
647+
639648
int kind = PyUnicode_KIND(str);
640649
const void *data = PyUnicode_DATA(str);
641650
for (Py_ssize_t i = 0; i < len; i++) {
642-
if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i)))
651+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
652+
if (!Py_UNICODE_ISSPACE(ch))
643653
return false;
644654
}
645655
return true;

mypyc/test-data/run-strings.test

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,23 +1259,13 @@ def test_format() -> None:
12591259
assert FMT.format(400 + 20, "roll" + "up") == "420 rollup"
12601260

12611261
[case testIsSpace]
1262-
def test_isspace_basic() -> None:
1263-
assert " ".isspace()
1264-
assert "\t".isspace()
1265-
assert "\n".isspace()
1266-
assert "\r".isspace()
1267-
assert "\f".isspace()
1268-
assert "\v".isspace()
1269-
assert " \t\n".isspace()
1270-
assert not "".isspace()
1271-
assert not "a".isspace()
1272-
assert not " a".isspace()
1273-
assert not "a ".isspace()
1274-
assert not "hello".isspace()
1275-
1276-
def test_isspace_unicode() -> None:
1277-
assert "\u00A0".isspace()
1278-
assert "\u2000".isspace()
1279-
assert "\u200A".isspace()
1280-
assert "\u3000".isspace()
1281-
assert not "\u0041".isspace()
1262+
from typing import Any
1263+
1264+
def test_isspace() -> None:
1265+
# Verify correctness across all Unicode codepoints.
1266+
# Exercises UCS-1 (0x00-0xFF), UCS-2 (0x100-0xFFFF), and UCS-4 (0x10000-x10FFFF inclusive) string kinds.
1267+
# Any forces generic dispatch so we compare our primitive against stdlib's
1268+
for i in range(0x110000):
1269+
c = chr(i)
1270+
a: Any = c
1271+
assert c.isspace() == a.isspace()

0 commit comments

Comments
 (0)