Skip to content

Commit b86ce3a

Browse files
committed
Avoid repeated concat building large strings.
This will hopefully avoid some awk bugs, causing memory corruption.
1 parent 8b2fcab commit b86ce3a

File tree

2 files changed

+266
-162
lines changed

2 files changed

+266
-162
lines changed

Tools/configure/transpiler/pyconf.awk

Lines changed: 134 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -287,55 +287,79 @@ function _arr_join_quoted(arr, sep, n, i, result) {
287287
# back to ENVIRON[]. Mirrors what Python's pyconf.run() does with its
288288
# vars= parameter: the command template contains shell-style variable
289289
# references that must be resolved before execution.
290-
function _expand_cmd_vars(cmd, result, i, n, c, varname, brace) {
291-
result = ""
292-
n = length(cmd)
293-
i = 1
294-
while (i <= n) {
295-
c = substr(cmd, i, 1)
296-
if (c == "$" && i < n) {
297-
# Check for ${VAR} or $VAR
298-
if (substr(cmd, i + 1, 1) == "{") {
299-
brace = 1
300-
i += 2
301-
varname = ""
302-
while (i <= n && substr(cmd, i, 1) != "}") {
303-
varname = varname substr(cmd, i, 1)
304-
i++
305-
}
306-
if (i <= n) i++ # skip closing }
307-
} else {
308-
brace = 0
309-
i++
310-
varname = ""
311-
while (i <= n && substr(cmd, i, 1) ~ /[A-Za-z0-9_]/) {
312-
varname = varname substr(cmd, i, 1)
313-
i++
314-
}
315-
}
316-
if (varname == "") {
317-
result = result "$"
318-
if (brace) result = result "{"
319-
} else if (varname in V) {
320-
result = result V[varname]
321-
} else if (varname in ENVIRON) {
322-
result = result ENVIRON[varname]
323-
}
324-
# else: unknown var expands to empty (matches shell behaviour)
325-
} else {
326-
result = result c
327-
i++
328-
}
329-
}
330-
return result
331-
}
332-
333-
function _cmd_output(cmd, line, result) {
334-
result = ""
335-
while ((cmd | getline line) > 0)
336-
result = result (result != "" ? "\n" : "") line
337-
close(cmd)
338-
return result
290+
function _expand_cmd_vars(cmd, n, i, c, varname, brace, parts, pc) {
291+
# Accumulate expanded command in array to avoid mawk string concatenation corruption
292+
# (character-by-character concatenation in a large loop causes memory issues in mawk).
293+
n = length(cmd)
294+
i = 1
295+
pc = 0 # parts count
296+
while (i <= n) {
297+
c = substr(cmd, i, 1)
298+
if (c == "$" && i < n) {
299+
# Check for ${VAR} or $VAR
300+
if (substr(cmd, i + 1, 1) == "{") {
301+
brace = 1
302+
i += 2
303+
varname = ""
304+
while (i <= n && substr(cmd, i, 1) != "}") {
305+
varname = varname substr(cmd, i, 1)
306+
i++
307+
}
308+
if (i <= n) i++ # skip closing }
309+
} else {
310+
brace = 0
311+
i++
312+
varname = ""
313+
while (i <= n && substr(cmd, i, 1) ~ /[A-Za-z0-9_]/) {
314+
varname = varname substr(cmd, i, 1)
315+
i++
316+
}
317+
}
318+
if (varname == "") {
319+
parts[++pc] = "$"
320+
if (brace) parts[++pc] = "{"
321+
} else if (varname in V) {
322+
parts[++pc] = V[varname]
323+
} else if (varname in ENVIRON) {
324+
parts[++pc] = ENVIRON[varname]
325+
}
326+
# else: unknown var expands to empty (matches shell behaviour)
327+
} else {
328+
parts[++pc] = c
329+
i++
330+
}
331+
}
332+
# Join all parts once at the end
333+
return _join_parts(parts, pc)
334+
}
335+
336+
function _join_parts(arr, n, i, result) {
337+
# Join array elements into a single string. Called once at end of
338+
# _expand_cmd_vars to avoid character-by-character concatenation.
339+
result = ""
340+
for (i = 1; i <= n; i++)
341+
result = result arr[i]
342+
return result
343+
}
344+
345+
function _cmd_output(cmd, line, lines, ln) {
346+
# Accumulate command output lines in array to avoid mawk string concatenation
347+
# corruption (large concatenation in loop causes memory issues in mawk).
348+
ln = 0
349+
while ((cmd | getline line) > 0)
350+
lines[++ln] = line
351+
close(cmd)
352+
# Join all lines once at the end
353+
return _join_lines(lines, ln)
354+
}
355+
356+
function _join_lines(arr, n, i, result) {
357+
# Join array of lines with newline separators. Called once at end of
358+
# _cmd_output to avoid repeated string concatenation in loop.
359+
result = ""
360+
for (i = 1; i <= n; i++)
361+
result = result (i > 1 ? "\n" : "") arr[i]
362+
return result
339363
}
340364

341365
function _cmd_output_oneline(cmd, line) {
@@ -1404,14 +1428,27 @@ function pyconf_use_system_extensions() {
14041428
# Environment save/restore
14051429
# ---------------------------------------------------------------------------
14061430

1407-
function pyconf_save_env( k) {
1408-
_saved_env_depth++
1409-
for (k in V)
1410-
_saved_env_stack[_saved_env_depth, k] = V[k]
1411-
# Store the set of keys so we can restore exactly
1412-
_saved_env_keys[_saved_env_depth] = ""
1413-
for (k in V)
1414-
_saved_env_keys[_saved_env_depth] = _saved_env_keys[_saved_env_depth] k "\036"
1431+
function pyconf_save_env( k, keys, kc) {
1432+
_saved_env_depth++
1433+
for (k in V)
1434+
_saved_env_stack[_saved_env_depth, k] = V[k]
1435+
# Store the set of keys so we can restore exactly.
1436+
# Accumulate keys in array, then concatenate once to avoid mawk string
1437+
# concatenation corruption (repeated concatenation in loop causes memory issues).
1438+
kc = 0
1439+
for (k in V)
1440+
keys[++kc] = k
1441+
# Join keys with \036 separator
1442+
_saved_env_keys[_saved_env_depth] = _join_keys(keys, kc)
1443+
}
1444+
1445+
function _join_keys(arr, n, i, result) {
1446+
# Join keys with \036 separator for environment save/restore.
1447+
# Called once per save to avoid repeated string concatenation in loop.
1448+
result = ""
1449+
for (i = 1; i <= n; i++)
1450+
result = result arr[i] "\036"
1451+
return result
14151452
}
14161453

14171454
function pyconf_restore_env( k, n, keys, i) {
@@ -1738,23 +1775,23 @@ function pyconf_output( i) {
17381775
pyconf_cleanup()
17391776
}
17401777

1741-
function _pyconf_build_module_block( i, key, uname, state, block, sep) {
1742-
block = ""
1743-
sep = ""
1778+
function _pyconf_build_module_block( i, key, uname, state) {
1779+
# Populate _module_block_lines array instead of concatenating into a string.
1780+
# This avoids building a large concatenated string in AWK memory, which causes
1781+
# memory corruption in mawk when many modules have CFLAGS/LDFLAGS.
1782+
_module_block_n = 0
17441783
for (i = 1; i <= _stdlib_mod_count; i++) {
17451784
uname = _stdlib_mod_names[i]
17461785
key = "MODULE_" uname
17471786
state = SUBST[key "_STATE"]
1748-
block = block sep key "_STATE=" state
1749-
sep = "\n"
1787+
_module_block_lines[++_module_block_n] = key "_STATE=" state
17501788
if (_stdlib_mod_has_cflags[uname] == "yes" && state != "disabled" && state != "n/a" && state != "missing") {
1751-
block = block sep key "_CFLAGS=" SUBST[key "_CFLAGS"]
1789+
_module_block_lines[++_module_block_n] = key "_CFLAGS=" SUBST[key "_CFLAGS"]
17521790
}
17531791
if (_stdlib_mod_has_ldflags[uname] == "yes" && state != "disabled" && state != "n/a" && state != "missing") {
1754-
block = block sep key "_LDFLAGS=" SUBST[key "_LDFLAGS"]
1792+
_module_block_lines[++_module_block_n] = key "_LDFLAGS=" SUBST[key "_LDFLAGS"]
17551793
}
17561794
}
1757-
SUBST["MODULE_BLOCK"] = block
17581795
}
17591796

17601797
function _pyconf_resolve_exports( k) {
@@ -1995,27 +2032,42 @@ function _last_index(s, ch, i, last) {
19952032
return last
19962033
}
19972034

1998-
function _pyconf_subst_file(inf, outf, line, k, pat, val, pos, before, after, skip) {
2035+
function _pyconf_subst_file(inf, outf, line, k, pat, val, pos, before, after, skip, i, b, a) {
19992036
while ((getline line < inf) > 0) {
20002037
skip = 0
2001-
# Replace @VAR@ patterns with SUBST values
2002-
for (k in SUBST) {
2003-
pat = "@" k "@"
2004-
while (index(line, pat) > 0) {
2005-
val = SUBST[k]
2006-
pos = index(line, pat)
2007-
before = substr(line, 1, pos - 1)
2008-
after = substr(line, pos + length(pat))
2009-
# If the value contains newlines, emit directly and skip normal print
2010-
if (index(val, "\n") > 0) {
2011-
printf "%s%s%s\n", before, val, after > outf
2012-
skip = 1
2013-
break
2014-
} else {
2015-
line = before val after
2038+
# Handle @MODULE_BLOCK@ specially: emit line-by-line to avoid
2039+
# building a large concatenated string in AWK memory (mawk safety).
2040+
if (index(line, "@MODULE_BLOCK@") > 0) {
2041+
pos = index(line, "@MODULE_BLOCK@")
2042+
before = substr(line, 1, pos - 1)
2043+
after = substr(line, pos + length("@MODULE_BLOCK@"))
2044+
for (i = 1; i <= _module_block_n; i++) {
2045+
b = (i == 1) ? before : ""
2046+
a = (i == _module_block_n) ? after : ""
2047+
printf "%s%s%s\n", b, _module_block_lines[i], a > outf
2048+
}
2049+
skip = 1
2050+
}
2051+
if (!skip) {
2052+
# Replace @VAR@ patterns with SUBST values
2053+
for (k in SUBST) {
2054+
pat = "@" k "@"
2055+
while (index(line, pat) > 0) {
2056+
val = SUBST[k]
2057+
pos = index(line, pat)
2058+
before = substr(line, 1, pos - 1)
2059+
after = substr(line, pos + length(pat))
2060+
# If the value contains newlines, emit directly and skip normal print
2061+
if (index(val, "\n") > 0) {
2062+
printf "%s%s%s\n", before, val, after > outf
2063+
skip = 1
2064+
break
2065+
} else {
2066+
line = before val after
2067+
}
20162068
}
2069+
if (skip) break
20172070
}
2018-
if (skip) break
20192071
}
20202072
if (!skip) {
20212073
# Neutralise VPATH when srcdir == "." (in-tree build):

0 commit comments

Comments
 (0)