Skip to content

Commit 692a2db

Browse files
Fix parsing algorithm in ValueParser (#522)
Fix parsing algorithm in `ValueParser` Reviewed-by: gemini-code-assist[bot] Reviewed-by: Nikola Forró Reviewed-by: Laura Barcziová
2 parents 8c3d7fe + 6d2f4e0 commit 692a2db

5 files changed

Lines changed: 109 additions & 49 deletions

File tree

specfile/sanitizer.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
from specfile.exceptions import UnterminatedMacroException
1010
from specfile.value_parser import (
11-
BuiltinMacro,
1211
ConditionalMacroExpansion,
1312
EnclosedMacroSubstitution,
1413
ExpressionExpansion,
1514
MacroSubstitution,
1615
ShellExpansion,
16+
SingleArgEnclosedMacroSubstitution,
1717
ValueParser,
1818
)
1919

@@ -968,7 +968,12 @@ def sanitize_nodes(nodes):
968968
while i < len(nodes):
969969
node = nodes[i]
970970
if isinstance(
971-
node, (MacroSubstitution, EnclosedMacroSubstitution, BuiltinMacro)
971+
node,
972+
(
973+
MacroSubstitution,
974+
EnclosedMacroSubstitution,
975+
SingleArgEnclosedMacroSubstitution,
976+
),
972977
) and node.name in ("include", "load", "uncompress"):
973978
removed += 1
974979
# %include/%load followed by whitespace and argument
@@ -1019,18 +1024,18 @@ def sanitize_nodes(nodes):
10191024
else:
10201025
converted += 1
10211026
result.append(replacement)
1022-
elif isinstance(node, BuiltinMacro):
1027+
elif isinstance(node, SingleArgEnclosedMacroSubstitution):
10231028
if not is_name_safe(node.name):
10241029
removed += 1
10251030
result.append("%{nil}")
10261031
elif node.name == "lua":
1027-
if not cls.is_lua_safe(node.body):
1032+
if not cls.is_lua_safe(node.arg):
10281033
removed += 1
10291034
result.append("%{nil}")
10301035
else:
10311036
result.append(str(node))
10321037
else:
1033-
sanitized_body, c, r = cls.sanitize(node.body, _depth + 1)
1038+
sanitized_body, c, r = cls.sanitize(node.arg, _depth + 1)
10341039
converted += c
10351040
removed += r
10361041
result.append(f"%{{{node.name}:{sanitized_body}}}")

specfile/spec_parser.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121
from specfile.sections import Section
2222
from specfile.tags import Tags
2323
from specfile.utils import get_filename_from_location
24-
from specfile.value_parser import BuiltinMacro, ShellExpansion, ValueParser
24+
from specfile.value_parser import (
25+
ShellExpansion,
26+
SingleArgEnclosedMacroSubstitution,
27+
ValueParser,
28+
)
2529

2630
logger = logging.getLogger(__name__)
2731

@@ -286,10 +290,13 @@ def collect_loaded_sources(content):
286290
# collect sources loaded using %{load:...}
287291
sources = set()
288292
for node in ValueParser.flatten(ValueParser.parse(content)):
289-
if isinstance(node, BuiltinMacro) and node.name == "load":
293+
if (
294+
isinstance(node, SingleArgEnclosedMacroSubstitution)
295+
and node.name == "load"
296+
):
290297
# we can expand macros here because the first non-build parse,
291298
# even though it failed, populated the macro context
292-
source = Path(Macros.expand(node.body))
299+
source = Path(Macros.expand(node.arg))
293300
# ignore files outside of sourcedir
294301
if source.parent.samefile(self.sourcedir):
295302
sources.add(source.name)

specfile/value_parser.py

Lines changed: 71 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def __eq__(self, other: object) -> bool:
4242
return self.value == other.value
4343

4444

45-
class ShellExpansion(Node):
46-
"""Node representing shell expansion, e.g. _%(whoami)_."""
45+
class Expansion(Node):
46+
"""Abstract base class for expansion nodes."""
4747

4848
def __init__(self, body: str) -> None:
4949
self.body = body
@@ -54,16 +54,20 @@ def __repr__(self) -> str:
5454
# don't have to reimplement __repr__()
5555
return f"{self.__class__.__name__}({self.body!r})"
5656

57-
def __str__(self) -> str:
58-
return f"%({self.body})"
59-
6057
def __eq__(self, other: object) -> bool:
6158
if not isinstance(other, self.__class__):
6259
return NotImplemented
6360
return self.body == other.body
6461

6562

66-
class ExpressionExpansion(ShellExpansion):
63+
class ShellExpansion(Expansion):
64+
"""Node representing shell expansion, e.g. _%(whoami)_."""
65+
66+
def __str__(self) -> str:
67+
return f"%({self.body})"
68+
69+
70+
class ExpressionExpansion(Expansion):
6771
"""Node representing expression expansion, e.g. _%[1+1]_."""
6872

6973
def __str__(self) -> str:
@@ -125,6 +129,29 @@ def __eq__(self, other: object) -> bool:
125129
)
126130

127131

132+
class SingleArgEnclosedMacroSubstitution(Node):
133+
"""
134+
Node representing single-argument bracket-enclosed macro substitution,
135+
e.g. _%{quote:Ancient Greek}_.
136+
"""
137+
138+
def __init__(self, name: str, arg: str) -> None:
139+
self.name = name
140+
self.arg = arg
141+
142+
@formatted
143+
def __repr__(self) -> str:
144+
return f"SingleArgEnclosedMacroSubstitution({self.name!r}, {self.arg!r})"
145+
146+
def __str__(self) -> str:
147+
return f"%{{{self.name}:{self.arg}}}"
148+
149+
def __eq__(self, other: object) -> bool:
150+
if not isinstance(other, self.__class__):
151+
return NotImplemented
152+
return self.name == other.name and self.arg == other.arg
153+
154+
128155
class ConditionalMacroExpansion(Node):
129156
"""Node representing conditional macro expansion, e.g. _%{?prerel:0.}_."""
130157

@@ -156,26 +183,6 @@ def __eq__(self, other: object) -> bool:
156183
)
157184

158185

159-
class BuiltinMacro(Node):
160-
"""Node representing built-in macro, e.g. _%{quote:Ancient Greek}_."""
161-
162-
def __init__(self, name: str, body: str) -> None:
163-
self.name = name
164-
self.body = body
165-
166-
@formatted
167-
def __repr__(self) -> str:
168-
return f"BuiltinMacro({self.name!r}, {self.body!r})"
169-
170-
def __str__(self) -> str:
171-
return f"%{{{self.name}:{self.body}}}"
172-
173-
def __eq__(self, other: object) -> bool:
174-
if not isinstance(other, self.__class__):
175-
return NotImplemented
176-
return self.name == other.name and self.body == other.body
177-
178-
179186
class ValueParser:
180187
@classmethod
181188
def flatten(cls, nodes: List[Node]) -> Generator[Node, None, None]:
@@ -243,7 +250,7 @@ def find_macro_end(index):
243250
return i
244251

245252
result: List[Node] = []
246-
start = 0
253+
start = start0 = 0
247254
offset = 0
248255
while start < len(value):
249256
try:
@@ -258,8 +265,6 @@ def find_macro_end(index):
258265
end = None
259266
if end is None:
260267
break
261-
if end > start:
262-
result.append(StringLiteral(value[start:end]))
263268
start = end
264269
end = find_macro_end(start + 1)
265270
if end is None:
@@ -269,24 +274,46 @@ def find_macro_end(index):
269274
elif value[start + 1] == "[":
270275
result.append(ExpressionExpansion(value[start + 2 : end - 1]))
271276
elif value[start + 1] == "{":
272-
if ":" in value[start:end]:
273-
condition, body = value[start + 2 : end - 1].split(":", maxsplit=1)
277+
index, delimiter = next(
278+
(
279+
(i, c)
280+
for i, c in enumerate(value[start + 2 : end - 1])
281+
if c in " :}"
282+
),
283+
(-1, None),
284+
)
285+
if delimiter == ":":
286+
condition = value[start + 2 : start + index + 2]
287+
body = value[start + index + 3 : end - 1]
274288
tokens = re.split(r"^([?!]+)", condition, maxsplit=1)
275289
prefix = "" if len(tokens) == 1 else tokens[1]
276290
if "?" in prefix:
277291
result.append(
278292
ConditionalMacroExpansion(condition, cls.parse(body))
279293
)
280294
else:
281-
result.append(BuiltinMacro(condition, body))
295+
result.append(
296+
SingleArgEnclosedMacroSubstitution(condition, body)
297+
)
282298
else:
283-
result.append(EnclosedMacroSubstitution(value[start + 2 : end - 1]))
299+
body = value[start + 2 : end - 1]
300+
name = (
301+
value[start + 2 : start + index + 2]
302+
if delimiter is not None
303+
else body
304+
)
305+
if not re.match(r"[A-Za-z_]\w*$", name.lstrip("?!"), re.ASCII):
306+
start += 2
307+
continue
308+
result.append(EnclosedMacroSubstitution(body))
284309
else:
285310
result.append(MacroSubstitution(value[start + 1 : end]))
286-
start = end
311+
if start > start0:
312+
result.insert(-1, StringLiteral(value[start0:start]))
313+
start = start0 = end
287314
offset = 0
288-
if value[start:]:
289-
result.append(StringLiteral(value[start:]))
315+
if value[start0:]:
316+
result.append(StringLiteral(value[start0:]))
290317
return result
291318

292319
@classmethod
@@ -387,7 +414,14 @@ def evaluate(node):
387414
tokens.append(("c", "", node))
388415
elif isinstance(node, StringLiteral):
389416
tokens.append(("v", node.value, ""))
390-
elif isinstance(node, (ShellExpansion, ExpressionExpansion, BuiltinMacro)):
417+
elif isinstance(
418+
node,
419+
(
420+
ShellExpansion,
421+
ExpressionExpansion,
422+
SingleArgEnclosedMacroSubstitution,
423+
),
424+
):
391425
const = expand(str(node))
392426
tokens.append(("c", const, str(node)))
393427
elif isinstance(node, MacroSubstitution):

tests/unit/test_sanitizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,9 +1215,9 @@ def test_expression_expansion_sanitized(value, expected_sanitized):
12151215
"value, expected_sanitized",
12161216
[
12171217
# EnclosedMacroSubstitution without args - unsafe name
1218-
("%{%(whoami)}", "%{nil}"),
1218+
("%{%(whoami)}", "%{%{nil}}"),
12191219
# EnclosedMacroSubstitution with args - unsafe name
1220-
("%{%(whoami) arg1 arg2}", "%{nil}"),
1220+
("%{%(whoami) arg1 arg2}", "%{%{nil} arg1 arg2}"),
12211221
# BuiltinMacro - unsafe name
12221222
("%{%(whoami):body}", "%{nil}"),
12231223
# BuiltinMacro - nested macro in name (splits at : giving unterminated name)

tests/unit/test_value_parser.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
from flexmock import flexmock
66

77
from specfile.value_parser import (
8-
BuiltinMacro,
98
ConditionalMacroExpansion,
109
EnclosedMacroSubstitution,
1110
Macros,
1211
MacroSubstitution,
1312
ShellExpansion,
13+
SingleArgEnclosedMacroSubstitution,
1414
StringLiteral,
1515
ValueParser,
1616
)
@@ -50,12 +50,26 @@
5050
(
5151
'%{lua:ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(string.lower(ver))}',
5252
[
53-
BuiltinMacro(
53+
SingleArgEnclosedMacroSubstitution(
5454
"lua",
5555
'ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(string.lower(ver))',
5656
)
5757
],
5858
),
59+
("%{%{28}}", [StringLiteral("%{%{28}}")]),
60+
(
61+
"%{%{macro}}",
62+
[
63+
StringLiteral("%{"),
64+
EnclosedMacroSubstitution("macro"),
65+
StringLiteral("}"),
66+
],
67+
),
68+
("%{upper:%{name}}", [SingleArgEnclosedMacroSubstitution("upper", "%{name}")]),
69+
(
70+
"%{version_no_tilde %{quote:nil}}",
71+
[EnclosedMacroSubstitution("version_no_tilde %{quote:nil}")],
72+
),
5973
],
6074
)
6175
def test_parse(value, nodes):

0 commit comments

Comments
 (0)