Skip to content

Commit 1ac248d

Browse files
Fix sanitizations that result in a macro with arguments (#525)
Fix sanitizations that result in a macro with arguments Reviewed-by: gemini-code-assist[bot] Reviewed-by: Nikola Forró Reviewed-by: Laura Barcziová
2 parents 9975d47 + de4fbe8 commit 1ac248d

2 files changed

Lines changed: 147 additions & 148 deletions

File tree

specfile/sanitizer.py

Lines changed: 124 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -91,59 +91,6 @@
9191
_LUA_STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
9292

9393

94-
def _strip_lua_comments(code):
95-
"""Strip Lua comments while preserving string literals.
96-
97-
Processes code left-to-right so that ``--`` inside a quoted string
98-
is never mistaken for a comment start.
99-
"""
100-
result = []
101-
i = 0
102-
while i < len(code):
103-
if code[i] in ('"', "'"):
104-
quote = code[i]
105-
result.append(code[i])
106-
i += 1
107-
while i < len(code) and code[i] != quote:
108-
if code[i] == "\\" and i + 1 < len(code):
109-
result.append(code[i : i + 2])
110-
i += 2
111-
else:
112-
result.append(code[i])
113-
i += 1
114-
if i < len(code):
115-
result.append(code[i])
116-
i += 1
117-
elif code[i : i + 2] == "--":
118-
j = i + 2
119-
if j < len(code) and code[j] == "[":
120-
level = 0
121-
k = j + 1
122-
while k < len(code) and code[k] == "=":
123-
level += 1
124-
k += 1
125-
if k < len(code) and code[k] == "[":
126-
close = "]" + "=" * level + "]"
127-
end = code.find(close, k + 1)
128-
if end != -1:
129-
i = end + len(close)
130-
else:
131-
i = len(code)
132-
result.append(" ")
133-
continue
134-
end = code.find("\n", i)
135-
if end != -1:
136-
result.append(" ")
137-
i = end
138-
else:
139-
result.append(" ")
140-
i = len(code)
141-
else:
142-
result.append(code[i])
143-
i += 1
144-
return "".join(result)
145-
146-
14794
_UNSAFE_LUA_BRACKET_RE = re.compile(r"\[\s*(?![#\d])")
14895

14996
_UNSAFE_LUA_STRING_CONTENT_RE = re.compile(
@@ -154,67 +101,6 @@ def _strip_lua_comments(code):
154101
_EXPRESSION_LUA_PREFIX_RE = re.compile(r"lua\s*:")
155102

156103

157-
def _decode_lua_escapes(s):
158-
"""Decode all Lua string escape sequences to their character values."""
159-
_SIMPLE = {
160-
"a": "\a",
161-
"b": "\b",
162-
"f": "\f",
163-
"n": "\n",
164-
"r": "\r",
165-
"t": "\t",
166-
"v": "\v",
167-
"\\": "\\",
168-
'"': '"',
169-
"'": "'",
170-
}
171-
result = []
172-
i = 0
173-
while i < len(s):
174-
if s[i] != "\\" or i + 1 >= len(s):
175-
result.append(s[i])
176-
i += 1
177-
continue
178-
c = s[i + 1]
179-
if c in _SIMPLE:
180-
result.append(_SIMPLE[c])
181-
i += 2
182-
elif c == "z":
183-
i += 2
184-
while i < len(s) and s[i] in " \t\n\r":
185-
i += 1
186-
elif c == "x" and i + 3 < len(s):
187-
try:
188-
result.append(chr(int(s[i + 2 : i + 4], 16)))
189-
i += 4
190-
except ValueError:
191-
result.append(s[i])
192-
i += 1
193-
elif c == "u" and i + 2 < len(s) and s[i + 2] == "{":
194-
end = s.find("}", i + 3)
195-
if end != -1:
196-
try:
197-
result.append(chr(int(s[i + 3 : end], 16)))
198-
i = end + 1
199-
except (ValueError, OverflowError):
200-
result.append(s[i])
201-
i += 1
202-
else:
203-
result.append(s[i])
204-
i += 1
205-
elif c.isdigit():
206-
j = i + 1
207-
while j < len(s) and j < i + 4 and s[j].isdigit():
208-
j += 1
209-
num = int(s[i + 1 : j])
210-
result.append(chr(num % 256))
211-
i = j
212-
else:
213-
result.append(s[i])
214-
i += 1
215-
return "".join(result)
216-
217-
218104
_UNSAFE_LUA_IDENTIFIERS = frozenset(
219105
{
220106
"_G",
@@ -287,7 +173,7 @@ def sanitize_shell_expansion(body: str) -> str:
287173
or Lua expressions. Covered patterns are:
288174
289175
- Substring extraction:
290-
`%(c=%{commit}; echo ${c:0:7})` → `%{sub %{commit}, 1, 7}`
176+
`%(c=%{commit}; echo ${c:0:7})` → `%{sub %{commit} 1 7}`
291177
- Bash string replacement:
292178
`%(v=%{version}; echo ${v//./_})` → `%{lua:
293179
print((rpm.expand("%{version}"):gsub("%.", "_")))}`
@@ -331,7 +217,7 @@ def sanitize_shell_expansion(body: str) -> str:
331217
`%(test "%{_libdir}" != "%{_prefix}/lib" && echo 1 || echo 0)` → `%{lua:
332218
print(rpm.expand("%{_libdir}") ~= rpm.expand("%{_prefix}/lib") and "1" or "0")}`
333219
- Printf truncation:
334-
`%(printf %%.7s %commit)` → `%{sub %{commit}, 1, 7}`
220+
`%(printf %%.7s %commit)` → `%{sub %{commit} 1 7}`
335221
- Printf float formatting:
336222
`%(LANG=C printf "%.4f" %{cpan_ver})` → `%{lua:
337223
print(string.format("%%.4f", tonumber(rpm.expand("%{cpan_ver}"))))}`
@@ -567,8 +453,8 @@ def convert_string_op(expr, cmd):
567453
mode, start, end, delim = cut
568454
if mode == "bytes":
569455
if end is not None:
570-
return f"%{{sub {expr}, {start}, {end}}}"
571-
return f"%{{sub {expr}, {start}}}"
456+
return f"%{{sub {expr} {start} {end}}}"
457+
return f"%{{sub {expr} {start}}}"
572458
elif mode == "field":
573459
return build_lua_field(expr, delim, start)
574460
elif mode == "range":
@@ -700,14 +586,14 @@ def convert_glob_removal(expr, op, pat):
700586
start = offset + 1
701587
if length_raw is not None:
702588
if length_raw.isdigit():
703-
return f"%{{sub {expr}, {start}, {offset + int(length_raw)}}}"
589+
return f"%{{sub {expr} {start} {offset + int(length_raw)}}}"
704590
length_macro = normalize_macro(length_raw)
705591
if length_macro is not None:
706592
if offset == 0:
707-
return f"%{{sub {expr}, {start}, {length_macro}}}"
708-
return f"%{{sub {expr}, {start}, %[{length_macro} + {offset}]}}"
593+
return f"%{{sub {expr} {start} {length_macro}}}"
594+
return f"%{{sub {expr} {start} %[{length_macro} + {offset}]}}"
709595
else:
710-
return f"%{{sub {expr}, {start}}}"
596+
return f"%{{sub {expr} {start}}}"
711597

712598
# --- var=macro; echo ${var//PAT/REPL} → Lua gsub ---
713599
m = _RE_BASH_REPLACE.match(body)
@@ -850,13 +736,13 @@ def convert_glob_removal(expr, op, pat):
850736
f' and "{lua_string_escape(a)}" or "{lua_string_escape(b)}")}}'
851737
)
852738

853-
# --- printf %.Ns MACRO → %{sub MACRO, 1, N} ---
739+
# --- printf %.Ns MACRO → %{sub MACRO 1 N} ---
854740
m = _RE_PRINTF_TRUNC.match(body)
855741
if m:
856742
n = int(m.group(1))
857743
expr = normalize_macro(m.group(2))
858744
if expr is not None:
859-
return f"%{{sub {expr}, 1, {n}}}"
745+
return f"%{{sub {expr} 1 {n}}}"
860746

861747
# --- printf %.Nf MACRO → Lua string.format ---
862748
m = _RE_PRINTF_FLOAT.match(body)
@@ -890,6 +776,118 @@ def convert_glob_removal(expr, op, pat):
890776

891777
@staticmethod
892778
def is_lua_safe(code):
779+
def strip_lua_comments(code):
780+
"""Strip Lua comments while preserving string literals.
781+
782+
Processes code left-to-right so that ``--`` inside a quoted string
783+
is never mistaken for a comment start.
784+
"""
785+
result = []
786+
i = 0
787+
while i < len(code):
788+
if code[i] in ('"', "'"):
789+
quote = code[i]
790+
result.append(code[i])
791+
i += 1
792+
while i < len(code) and code[i] != quote:
793+
if code[i] == "\\" and i + 1 < len(code):
794+
result.append(code[i : i + 2])
795+
i += 2
796+
else:
797+
result.append(code[i])
798+
i += 1
799+
if i < len(code):
800+
result.append(code[i])
801+
i += 1
802+
elif code[i : i + 2] == "--":
803+
j = i + 2
804+
if j < len(code) and code[j] == "[":
805+
level = 0
806+
k = j + 1
807+
while k < len(code) and code[k] == "=":
808+
level += 1
809+
k += 1
810+
if k < len(code) and code[k] == "[":
811+
close = "]" + "=" * level + "]"
812+
end = code.find(close, k + 1)
813+
if end != -1:
814+
i = end + len(close)
815+
else:
816+
i = len(code)
817+
result.append(" ")
818+
continue
819+
end = code.find("\n", i)
820+
if end != -1:
821+
result.append(" ")
822+
i = end
823+
else:
824+
result.append(" ")
825+
i = len(code)
826+
else:
827+
result.append(code[i])
828+
i += 1
829+
return "".join(result)
830+
831+
def decode_lua_escapes(s):
832+
"""Decode all Lua string escape sequences to their character values."""
833+
_SIMPLE = {
834+
"a": "\a",
835+
"b": "\b",
836+
"f": "\f",
837+
"n": "\n",
838+
"r": "\r",
839+
"t": "\t",
840+
"v": "\v",
841+
"\\": "\\",
842+
'"': '"',
843+
"'": "'",
844+
}
845+
result = []
846+
i = 0
847+
while i < len(s):
848+
if s[i] != "\\" or i + 1 >= len(s):
849+
result.append(s[i])
850+
i += 1
851+
continue
852+
c = s[i + 1]
853+
if c in _SIMPLE:
854+
result.append(_SIMPLE[c])
855+
i += 2
856+
elif c == "z":
857+
i += 2
858+
while i < len(s) and s[i] in " \t\n\r":
859+
i += 1
860+
elif c == "x" and i + 3 < len(s):
861+
try:
862+
result.append(chr(int(s[i + 2 : i + 4], 16)))
863+
i += 4
864+
except ValueError:
865+
result.append(s[i])
866+
i += 1
867+
elif c == "u" and i + 2 < len(s) and s[i + 2] == "{":
868+
end = s.find("}", i + 3)
869+
if end != -1:
870+
try:
871+
result.append(chr(int(s[i + 3 : end], 16)))
872+
i = end + 1
873+
except (ValueError, OverflowError):
874+
result.append(s[i])
875+
i += 1
876+
else:
877+
result.append(s[i])
878+
i += 1
879+
elif c.isdigit():
880+
j = i + 1
881+
while j < len(s) and j < i + 4 and s[j].isdigit():
882+
j += 1
883+
num = int(s[i + 1 : j])
884+
result.append(chr(num % 256))
885+
i = j
886+
else:
887+
result.append(s[i])
888+
i += 1
889+
return "".join(result)
890+
893891
def has_safe_format_specs(fmt):
894892
"""
895893
Check that a format string only uses safe specifiers.
@@ -903,13 +901,13 @@ def has_safe_format_specs(fmt):
903901
cleaned = _SAFE_FORMAT_SPEC_RE.sub("", expanded)
904902
return "%" not in cleaned
905903

906-
stripped = _strip_lua_comments(code)
904+
stripped = strip_lua_comments(code)
907905
string_spans = []
908906
for m in _LUA_STRING_LITERAL_RE.finditer(stripped):
909907
content = m.group(0)[1:-1]
910908
if _UNSAFE_LUA_STRING_CONTENT_RE.search(content):
911909
return False
912-
decoded = _decode_lua_escapes(content)
910+
decoded = decode_lua_escapes(content)
913911
if _UNSAFE_LUA_STRING_CONTENT_RE.search(decoded):
914912
return False
915913
if decoded.endswith("%"):

0 commit comments

Comments
 (0)