Skip to content

Commit 50d5739

Browse files
committed
gh-147991: Speed up tomllib import time
Defer regular expressions import until the first datetime, localtime or non-trivial number (other that just decimal digits) is met.
1 parent c32e264 commit 50d5739

File tree

3 files changed

+69
-9
lines changed

3 files changed

+69
-9
lines changed

Lib/test/test_tomllib/test_misc.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,18 @@ def test_types_import(self):
124124
never imported by tests.
125125
"""
126126
importlib.import_module(f"{tomllib.__name__}._types")
127+
128+
def test_parse_simple_number(self):
129+
parse_simple_number = tomllib._parser._parse_simple_number
130+
self.assertEqual(parse_simple_number("123", 0), (3, 123))
131+
self.assertEqual(parse_simple_number("123\n", 0), (3, 123))
132+
self.assertEqual(parse_simple_number("0\n", 0), (1, 0))
133+
134+
self.assertIsNone(parse_simple_number("0123\n", 0))
135+
self.assertIsNone(parse_simple_number("123-456\n", 0))
136+
self.assertIsNone(parse_simple_number("123:456\n", 0))
137+
self.assertIsNone(parse_simple_number("1.0\n", 0))
138+
self.assertIsNone(parse_simple_number("1_000\n", 0))
139+
self.assertIsNone(parse_simple_number("x123\n", 0))
140+
self.assertIsNone(parse_simple_number("o123\n", 0))
141+
self.assertIsNone(parse_simple_number("b100\n", 0))

Lib/tomllib/_parser.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,26 @@
66

77
from types import MappingProxyType
88

9-
from ._re import (
10-
RE_DATETIME,
11-
RE_LOCALTIME,
12-
RE_NUMBER,
13-
match_to_datetime,
14-
match_to_localtime,
15-
match_to_number,
16-
)
17-
189
TYPE_CHECKING = False
1910
if TYPE_CHECKING:
2011
from collections.abc import Iterable
2112
from typing import IO, Any, Final
2213

2314
from ._types import Key, ParseFloat, Pos
2415

16+
_REGEX_IMPORTED = True
17+
from ._re import (
18+
RE_DATETIME,
19+
RE_LOCALTIME,
20+
RE_NUMBER,
21+
match_to_datetime,
22+
match_to_localtime,
23+
match_to_number,
24+
)
25+
else:
26+
# Regular expressions are lazy imported to speed up startup time
27+
_REGEX_IMPORTED = False
28+
2529
ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
2630

2731
# Neither of these sets include quotation mark or backslash. They are
@@ -41,6 +45,7 @@
4145
)
4246
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
4347
HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
48+
_DECDIGIT_CHARS: Final = frozenset("0123456789")
4449

4550
BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
4651
{
@@ -665,6 +670,25 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
665670
pos += 1
666671

667672

673+
def _parse_simple_number(
674+
src: str, pos: Pos
675+
) -> None | tuple[Pos, int]:
676+
start = pos
677+
src = src.rstrip()
678+
end = len(src)
679+
while src[pos] in _DECDIGIT_CHARS:
680+
pos += 1
681+
if pos >= end:
682+
break
683+
else:
684+
if src[pos] != "\n":
685+
return None
686+
digits = src[start:pos]
687+
if digits.startswith("0") and len(digits) > 1:
688+
return None
689+
return pos, int(digits)
690+
691+
668692
def parse_value(
669693
src: str, pos: Pos, parse_float: ParseFloat
670694
) -> tuple[Pos, Any]:
@@ -703,6 +727,25 @@ def parse_value(
703727
if char == "{":
704728
return parse_inline_table(src, pos, parse_float)
705729

730+
global _REGEX_IMPORTED, RE_DATETIME, RE_LOCALTIME, RE_NUMBER
731+
global match_to_datetime, match_to_localtime, match_to_number
732+
if not _REGEX_IMPORTED:
733+
# Simple number parser avoiding regex
734+
if char in _DECDIGIT_CHARS:
735+
res = _parse_simple_number(src, pos)
736+
if res is not None:
737+
return res
738+
739+
from ._re import (
740+
RE_DATETIME,
741+
RE_LOCALTIME,
742+
RE_NUMBER,
743+
match_to_datetime,
744+
match_to_localtime,
745+
match_to_number,
746+
)
747+
_REGEX_IMPORTED = True
748+
706749
# Dates and times
707750
datetime_match = RE_DATETIME.match(src, pos)
708751
if datetime_match:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve :mod:`tomllib` import time (up to 10x faster). Patch by Victor
2+
Stinner.

0 commit comments

Comments
 (0)