Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Lib/test/test_tomllib/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,18 @@ def test_types_import(self):
never imported by tests.
"""
importlib.import_module(f"{tomllib.__name__}._types")

def test_parse_simple_number(self):
parse_simple_number = tomllib._parser._parse_simple_number
self.assertEqual(parse_simple_number("123", 0), (3, 123))
self.assertEqual(parse_simple_number("123\n", 0), (3, 123))
self.assertEqual(parse_simple_number("0\n", 0), (1, 0))

self.assertIsNone(parse_simple_number("0123\n", 0))
self.assertIsNone(parse_simple_number("123-456\n", 0))
self.assertIsNone(parse_simple_number("123:456\n", 0))
self.assertIsNone(parse_simple_number("1.0\n", 0))
self.assertIsNone(parse_simple_number("1_000\n", 0))
self.assertIsNone(parse_simple_number("x123\n", 0))
self.assertIsNone(parse_simple_number("o123\n", 0))
self.assertIsNone(parse_simple_number("b100\n", 0))
61 changes: 52 additions & 9 deletions Lib/tomllib/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,26 @@

from types import MappingProxyType

from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)

TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable
from typing import IO, Any, Final

from ._types import Key, ParseFloat, Pos

_REGEX_IMPORTED = True
from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)
else:
# Regular expressions are lazy imported to speed up startup time
_REGEX_IMPORTED = False

ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))

# Neither of these sets include quotation mark or backslash. They are
Expand All @@ -41,6 +45,7 @@
)
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
_DECDIGIT_CHARS: Final = frozenset("0123456789")

BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
{
Expand Down Expand Up @@ -665,6 +670,25 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
pos += 1


def _parse_simple_number(
src: str, pos: Pos
) -> None | tuple[Pos, int]:
start = pos
src = src.rstrip()
end = len(src)
while src[pos] in _DECDIGIT_CHARS:
pos += 1
if pos >= end:
break
else:
if src[pos] != "\n":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this happen? We could just return None and fall back to the original path.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in many cases. See the added test_parse_simple_number(). Examples:

  • The test is true when parsing 1979-05-27: we cannot parse the date.
  • The test is false when parsing 1\n (ex: value = 1\n) or 23, 24]\n (ex: list = [23, 24]\n)

return None
digits = src[start:pos]
if digits.startswith("0") and len(digits) > 1:
return None
return pos, int(digits)


def parse_value(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Any]:
Expand Down Expand Up @@ -703,6 +727,25 @@ def parse_value(
if char == "{":
return parse_inline_table(src, pos, parse_float)

global _REGEX_IMPORTED, RE_DATETIME, RE_LOCALTIME, RE_NUMBER
global match_to_datetime, match_to_localtime, match_to_number
if not _REGEX_IMPORTED:
# Simple number parser avoiding regex
if char in _DECDIGIT_CHARS:
res = _parse_simple_number(src, pos)
if res is not None:
return res

from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)
_REGEX_IMPORTED = True

# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
if datetime_match:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve :mod:`tomllib` import time (up to 10x faster). Patch by Victor
Stinner.
Loading