Skip to content

Commit 415a102

Browse files
Add cgtrc configuration parser
1 parent 1b8189b commit 415a102

3 files changed

Lines changed: 380 additions & 0 deletions

File tree

source/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Utilities exposed for the unit tests."""
2+
3+
from .cgtrc_parser import CgtrcParseError, Setting, parse_cgtrc
4+
5+
__all__ = ["CgtrcParseError", "Setting", "parse_cgtrc"]

source/cgtrc_parser.py

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
"""Utilities for parsing ``cgtrc`` configuration files.
2+
3+
The real project that inspired this kata stores its configuration in
4+
files that look like ``cgtrc``. A file contains one assignment per line
5+
and uses a small DSL to declare the available options. The goal of this
6+
module is to interpret that DSL so that other code can work with a Python
7+
representation of the configuration.
8+
9+
The format is intentionally tiny: each non-empty line is either a comment
10+
(starting with ``#``) or an assignment of the form ``name = value``. The
11+
``value`` can be expressed in a few ways:
12+
13+
* ``boolean(default=True)`` or ``boolean(False)``
14+
* ``integer(default=4)``
15+
* ``string(default=single)``
16+
* ``option("python", "native", default="python")``
17+
* a bare literal such as ``False`` or ``~/path``
18+
19+
For the purposes of the exercises the default values are the only
20+
information that we need. Parsing is strict so that mistakes in a
21+
configuration file are reported with useful error messages. The parser
22+
returns :class:`Setting` objects that expose the inferred type, the default
23+
value, and (when applicable) the available choices.
24+
"""
25+
from __future__ import annotations
26+
27+
from dataclasses import dataclass
28+
import ast
29+
from pathlib import Path
30+
from typing import Iterable, Iterator, Sequence
31+
32+
33+
class CgtrcParseError(ValueError):
34+
"""Raised when a ``cgtrc`` configuration cannot be parsed."""
35+
36+
37+
@dataclass(frozen=True)
38+
class Setting:
39+
"""Represents a single configuration option from a ``cgtrc`` file."""
40+
41+
name: str
42+
type: str
43+
default: object
44+
choices: tuple[object, ...] | None = None
45+
46+
def as_dict(self) -> dict[str, object]:
47+
"""Return a JSON-serialisable representation of the setting."""
48+
49+
data: dict[str, object] = {"type": self.type, "default": self.default}
50+
if self.choices:
51+
data["choices"] = list(self.choices)
52+
return data
53+
54+
55+
def parse_cgtrc(source: str | Path | Iterable[str]) -> dict[str, Setting]:
56+
"""Parse *source* and return a mapping of option names to settings.
57+
58+
Parameters
59+
----------
60+
source:
61+
Either the textual contents of a ``cgtrc`` file, an iterable of
62+
lines, or a path pointing to a file on disk.
63+
"""
64+
65+
lines = _iter_lines(source)
66+
config: dict[str, Setting] = {}
67+
for lineno, raw_line in enumerate(lines, 1):
68+
line = _strip_inline_comment(raw_line).strip()
69+
if not line:
70+
continue
71+
if "=" not in line:
72+
raise CgtrcParseError(
73+
f"Line {lineno}: expected an assignment but got {raw_line!r}"
74+
)
75+
name, value_expr = (part.strip() for part in line.split("=", 1))
76+
if not name:
77+
raise CgtrcParseError(f"Line {lineno}: missing option name")
78+
setting = _parse_value(name, value_expr, lineno)
79+
config[name] = setting
80+
return config
81+
82+
83+
def _iter_lines(source: str | Path | Iterable[str]) -> Iterator[str]:
84+
if isinstance(source, Path):
85+
text = source.read_text(encoding="utf-8")
86+
yield from text.splitlines()
87+
elif isinstance(source, str):
88+
if "\n" not in source and Path(source).is_file():
89+
text = Path(source).read_text(encoding="utf-8")
90+
yield from text.splitlines()
91+
else:
92+
yield from source.splitlines()
93+
else:
94+
yield from source
95+
96+
97+
def _strip_inline_comment(line: str) -> str:
98+
"""Remove comments from *line* while respecting quoted strings."""
99+
100+
result: list[str] = []
101+
in_single = False
102+
in_double = False
103+
escaped = False
104+
for char in line:
105+
if escaped:
106+
result.append(char)
107+
escaped = False
108+
continue
109+
if char == "\\":
110+
result.append(char)
111+
escaped = True
112+
continue
113+
if char == "'" and not in_double:
114+
in_single = not in_single
115+
result.append(char)
116+
continue
117+
if char == '"' and not in_single:
118+
in_double = not in_double
119+
result.append(char)
120+
continue
121+
if char == "#" and not in_single and not in_double:
122+
break
123+
result.append(char)
124+
return "".join(result)
125+
126+
127+
def _parse_value(name: str, expr: str, lineno: int) -> Setting:
128+
call = _match_call(expr)
129+
if call is not None:
130+
func_name, args = call
131+
return _parse_call(name, func_name, args, lineno)
132+
value = _auto_coerce(expr.strip())
133+
value_type = _infer_type(value)
134+
return Setting(name=name, type=value_type, default=value)
135+
136+
137+
def _match_call(expr: str) -> tuple[str, str] | None:
138+
expr = expr.strip()
139+
if not expr.endswith(")") or "(" not in expr:
140+
return None
141+
open_paren = expr.find("(")
142+
func_name = expr[:open_paren].strip()
143+
if not func_name.isidentifier():
144+
return None
145+
inner = expr[open_paren + 1 : -1]
146+
return func_name, inner
147+
148+
149+
def _parse_call(name: str, func_name: str, arg_string: str, lineno: int) -> Setting:
150+
args = _split_args(arg_string)
151+
if func_name == "boolean":
152+
default_text, positional, extras = _parse_arguments(args, True)
153+
if positional or extras:
154+
raise CgtrcParseError(
155+
f"Line {lineno}: unexpected arguments to boolean() for {name}"
156+
)
157+
default = _parse_boolean(default_text, name, lineno)
158+
return Setting(name=name, type="boolean", default=default)
159+
if func_name == "integer":
160+
default_text, positional, extras = _parse_arguments(args, True)
161+
if positional or extras:
162+
raise CgtrcParseError(
163+
f"Line {lineno}: unexpected arguments to integer() for {name}"
164+
)
165+
default = _parse_integer(default_text, name, lineno)
166+
return Setting(name=name, type="integer", default=default)
167+
if func_name == "string":
168+
default_text, positional, extras = _parse_arguments(args, True)
169+
if positional or extras:
170+
raise CgtrcParseError(
171+
f"Line {lineno}: unexpected arguments to string() for {name}"
172+
)
173+
default = _parse_string(default_text, name, lineno)
174+
return Setting(name=name, type="string", default=default)
175+
if func_name == "option":
176+
default_text, positional, extras = _parse_arguments(args, False)
177+
if extras:
178+
raise CgtrcParseError(
179+
f"Line {lineno}: unexpected keyword arguments in option() for {name}"
180+
)
181+
if not positional:
182+
raise CgtrcParseError(
183+
f"Line {lineno}: option() requires at least one choice for {name}"
184+
)
185+
choices = tuple(_parse_string(arg, name, lineno) for arg in positional)
186+
if default_text is None:
187+
default_value = choices[0]
188+
else:
189+
default_value = _parse_string(default_text, name, lineno)
190+
if default_value not in choices:
191+
raise CgtrcParseError(
192+
f"Line {lineno}: default {default_value!r} not in choices {choices}"
193+
)
194+
return Setting(name=name, type="option", default=default_value, choices=choices)
195+
raise CgtrcParseError(
196+
f"Line {lineno}: unsupported function {func_name!r} for option {name}"
197+
)
198+
199+
200+
def _split_args(arg_string: str) -> list[str]:
201+
args: list[str] = []
202+
start = 0
203+
depth = 0
204+
in_single = False
205+
in_double = False
206+
escaped = False
207+
for index, char in enumerate(arg_string):
208+
if escaped:
209+
escaped = False
210+
continue
211+
if char == "\\":
212+
escaped = True
213+
continue
214+
if char == "'" and not in_double:
215+
in_single = not in_single
216+
continue
217+
if char == '"' and not in_single:
218+
in_double = not in_double
219+
continue
220+
if char == "(" and not in_single and not in_double:
221+
depth += 1
222+
continue
223+
if char == ")" and not in_single and not in_double:
224+
depth -= 1
225+
continue
226+
if char == "," and not in_single and not in_double and depth == 0:
227+
args.append(arg_string[start:index].strip())
228+
start = index + 1
229+
last = arg_string[start:].strip()
230+
if last:
231+
args.append(last)
232+
return args
233+
234+
235+
def _parse_arguments(
236+
args: Sequence[str], allow_positional_default: bool
237+
) -> tuple[str | None, list[str], dict[str, str]]:
238+
default: str | None = None
239+
positional: list[str] = []
240+
extras: dict[str, str] = {}
241+
for arg in args:
242+
if "=" in arg:
243+
key, value = (part.strip() for part in arg.split("=", 1))
244+
if key == "default":
245+
if default is not None:
246+
raise CgtrcParseError("duplicate default specification")
247+
default = value
248+
else:
249+
extras[key] = value
250+
else:
251+
positional.append(arg)
252+
if allow_positional_default and default is None and positional:
253+
default = positional.pop(0)
254+
return default, positional, extras
255+
256+
257+
def _parse_boolean(text: str | None, name: str, lineno: int) -> bool:
258+
if text is None:
259+
raise CgtrcParseError(f"Line {lineno}: boolean() for {name} requires a value")
260+
normalized = text.strip().lower()
261+
if normalized in {"true", "1", "yes", "on"}:
262+
return True
263+
if normalized in {"false", "0", "no", "off"}:
264+
return False
265+
raise CgtrcParseError(
266+
f"Line {lineno}: could not parse boolean value {text!r} for {name}"
267+
)
268+
269+
270+
def _parse_integer(text: str | None, name: str, lineno: int) -> int:
271+
if text is None:
272+
raise CgtrcParseError(f"Line {lineno}: integer() for {name} requires a value")
273+
try:
274+
return int(text, 0)
275+
except ValueError as exc:
276+
raise CgtrcParseError(
277+
f"Line {lineno}: could not parse integer value {text!r} for {name}"
278+
) from exc
279+
280+
281+
def _parse_string(text: str | None, name: str, lineno: int) -> str:
282+
if text is None:
283+
raise CgtrcParseError(f"Line {lineno}: string() for {name} requires a value")
284+
text = text.strip()
285+
if not text:
286+
return ""
287+
if text[0] in {'"', "'"} and text[-1] == text[0]:
288+
try:
289+
return ast.literal_eval(text)
290+
except (ValueError, SyntaxError) as exc:
291+
raise CgtrcParseError(
292+
f"Line {lineno}: invalid quoted string {text!r} for {name}"
293+
) from exc
294+
return text
295+
296+
297+
def _auto_coerce(text: str) -> object:
298+
text = text.strip()
299+
lowered = text.lower()
300+
if lowered in {"true", "1", "yes", "on"}:
301+
return True
302+
if lowered in {"false", "0", "no", "off"}:
303+
return False
304+
try:
305+
return int(text, 0)
306+
except ValueError:
307+
pass
308+
try:
309+
return float(text)
310+
except ValueError:
311+
pass
312+
if text and text[0] in {'"', "'"} and text[-1] == text[0]:
313+
try:
314+
return ast.literal_eval(text)
315+
except (ValueError, SyntaxError):
316+
return text[1:-1]
317+
return text
318+
319+
320+
def _infer_type(value: object) -> str:
321+
if isinstance(value, bool):
322+
return "boolean"
323+
if isinstance(value, int):
324+
return "integer"
325+
if isinstance(value, float):
326+
return "float"
327+
return type(value).__name__

source/test_cgtrc_parser.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from __future__ import annotations
2+
3+
import textwrap
4+
5+
import pytest
6+
7+
from . import CgtrcParseError, parse_cgtrc
8+
9+
10+
SAMPLE_CGTRC = textwrap.dedent(
11+
"""
12+
# Comments and blank lines should be ignored
13+
debug = boolean(default=False)
14+
precision = string(default=single)
15+
backend = option("python", "native", default="python") # inline comment
16+
cache_dir = string(default="~/.cgt_cache")
17+
parallel = boolean(default=False)
18+
num_threads = integer(default=4)
19+
custom_flag = True
20+
custom_count = 7
21+
custom_path = "#not a comment"
22+
"""
23+
)
24+
25+
26+
def test_parse_cgtrc_defaults() -> None:
27+
config = parse_cgtrc(SAMPLE_CGTRC)
28+
assert config["debug"].default is False
29+
assert config["precision"].default == "single"
30+
backend = config["backend"]
31+
assert backend.default == "python"
32+
assert backend.choices == ("python", "native")
33+
assert config["cache_dir"].default == "~/.cgt_cache"
34+
assert config["num_threads"].default == 4
35+
assert config["custom_flag"].default is True
36+
assert config["custom_count"].default == 7
37+
assert config["custom_path"].default == "#not a comment"
38+
39+
40+
def test_invalid_boolean_value() -> None:
41+
with pytest.raises(CgtrcParseError):
42+
parse_cgtrc("debug = boolean(default=maybe)")
43+
44+
45+
def test_option_default_must_be_choice() -> None:
46+
text = 'backend = option("python", default="native")'
47+
with pytest.raises(CgtrcParseError):
48+
parse_cgtrc(text)

0 commit comments

Comments
 (0)