Skip to content

Commit b62e1ed

Browse files
committed
fix: address final regex DoS security hotspots
1 parent 41c52c1 commit b62e1ed

5 files changed

Lines changed: 19 additions & 17 deletions

File tree

src/treemapper/diffctx/edges/semantic/c_family.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
re.MULTILINE,
2424
)
2525

26-
_CLASS_RE = re.compile(r"^\s*(?:template\s*<[^>]*>\s*)?(?:class|struct)\s+(\w+)", re.MULTILINE)
27-
_TYPEDEF_RE = re.compile(r"^\s*typedef\s+.*?\s+(\w+)\s*;", re.MULTILINE)
28-
_USING_TYPE_RE = re.compile(r"^\s*using\s+(\w+)\s*=", re.MULTILINE)
26+
_CLASS_RE = re.compile(r"^\s{0,20}(?:template\s{0,5}<[^>]{0,200}>\s{0,5})?(?:class|struct)\s+(\w+)", re.MULTILINE)
27+
_TYPEDEF_RE = re.compile(r"^\s{0,20}typedef\s{1,10}[^\n;]{1,500}\s{1,10}(\w+)\s{0,10};", re.MULTILINE)
28+
_USING_TYPE_RE = re.compile(r"^\s{0,20}using\s+(\w+)\s{0,10}=", re.MULTILINE)
2929
_ENUM_RE = re.compile(r"^\s*enum\s+(?:class\s+)?(\w+)", re.MULTILINE)
3030
_NAMESPACE_RE = re.compile(r"^\s*namespace\s+(\w+)", re.MULTILINE)
3131

src/treemapper/diffctx/edges/semantic/dotnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
_CS_USING_RE = re.compile(r"^\s*using\s+(?:static\s+)?([A-Z][a-zA-Z0-9_.]*);", re.MULTILINE)
1515
_CS_NAMESPACE_RE = re.compile(r"^\s*namespace\s+([A-Z][a-zA-Z0-9_.]*)", re.MULTILINE)
1616
_CS_CLASS_RE = re.compile(
17-
r"^\s*(?:public|private|protected|internal)?\s*(?:static|sealed|abstract|partial)?\s*(?:class|interface|struct|record|enum)\s+([A-Z]\w*)",
17+
r"^\s{0,20}(?:(?:public|private|protected|internal)\s{1,10})?(?:(?:static|sealed|abstract|partial)\s{1,10})?(?:class|interface|struct|record|enum)\s+([A-Z]\w{0,100})",
1818
re.MULTILINE,
1919
)
2020
_CS_INHERIT_RE = re.compile(r"(?:class|struct|record)\s+\w+[^:\n]{0,200}:\s*([A-Z]\w*(?:,\s*[A-Z]\w*)*)")

src/treemapper/diffctx/edges/semantic/javascript.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
_JS_SYMBOL_REF_WEIGHT = 0.75
1515
_JS_TYPE_REF_WEIGHT = 0.65
1616

17-
_JS_IMPORT_STATIC_RE = re.compile(r"""import\s+[^'"]{0,500}['"]([^'"]+)['"]""")
18-
_JS_REQUIRE_RE = re.compile(r"""require\s*\(\s*['"]([^'"]+)['"]\s*\)""")
19-
_JS_EXPORT_FROM_RE = re.compile(r"""export\s+[^'"]{0,500}\s+from\s+['"]([^'"]+)['"]""")
17+
_JS_IMPORT_STATIC_RE = re.compile(r"""import\s{1,10}[^'"]{0,500}['"]([^'"]{1,500})['"]""")
18+
_JS_REQUIRE_RE = re.compile(r"""require\s{0,10}\(\s{0,10}['"]([^'"]{1,500})['"]\s{0,10}\)""")
19+
_JS_EXPORT_FROM_RE = re.compile(r"""export\s{1,10}[^'"]{0,500}\s{1,10}from\s{1,10}['"]([^'"]{1,500})['"]""")
2020

2121

2222
def _is_js_file(path: Path) -> bool:

src/treemapper/diffctx/edges/semantic/shell.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
_POWERSHELL_EXTS = {".ps1", ".psm1", ".psd1"}
1111
_ALL_SHELL = _SHELL_EXTS | _POWERSHELL_EXTS
1212

13-
_SOURCE_RE = re.compile(r"^\s*(?:source|\.)\s+['\"]?([^'\"#\s]+)", re.MULTILINE)
14-
_BASH_FUNC_RE = re.compile(r"^\s*(?:function\s+)?(\w+)\s*\(\s*\)", re.MULTILINE)
13+
_SOURCE_RE = re.compile(r"^\s{0,20}(?:source|\.)\s{1,10}['\"]?([^'\"#\s]{1,300})", re.MULTILINE)
14+
_BASH_FUNC_RE = re.compile(r"^\s{0,20}(?:function\s{1,10})?(\w{1,100})\s{0,10}\(\s{0,10}\)", re.MULTILINE)
1515

16-
_SCRIPT_CALL_RE = re.compile(r"(?:bash|sh|zsh|python|python3|node|ruby|perl)\s+['\"]?([^\s'\"]+)", re.MULTILINE)
17-
_EXEC_CALL_RE = re.compile(r"(?:\./|scripts/|bin/)([a-zA-Z0-9_.-]+(?:\.(?:sh|py|rb|pl))?)", re.MULTILINE)
18-
_ENV_FILE_RE = re.compile(r"^\s*(?:source|\.)\s+[^\n]*\.env", re.MULTILINE)
16+
_SCRIPT_CALL_RE = re.compile(r"(?:bash|sh|zsh|python|python3|node|ruby|perl)\s{1,10}['\"]?([^\s'\"]{1,300})", re.MULTILINE)
17+
_EXEC_CALL_RE = re.compile(r"(?:\./|scripts/|bin/)([a-zA-Z0-9_.-]{1,100}(?:\.(?:sh|py|rb|pl))?)", re.MULTILINE)
18+
_ENV_FILE_RE = re.compile(r"^\s{0,20}(?:source|\.)\s{1,10}[^\n]{0,500}\.env", re.MULTILINE)
1919

2020
_PS_IMPORT_RE = re.compile(r"Import-Module\s+['\"]?([^\s'\"]+)", re.IGNORECASE)
2121
_PS_DOT_SOURCE_RE = re.compile(r"\.\s+['\"]?([^\s'\"]+\.ps[m1d]?1)", re.IGNORECASE)

src/treemapper/diffctx/edges/semantic/swift.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
_SWIFT_FUNC_RE = re.compile(r"^\s*(?:\w+\s+|@\w+\s+)*func\s+(\w+)\s*[(]", re.MULTILINE)
1818
_SWIFT_TYPEALIAS_RE = re.compile(r"^\s*(?:public\s+|private\s+|internal\s+|fileprivate\s+)*typealias\s+(\w+)", re.MULTILINE)
1919

20-
_SWIFT_CONFORMANCE_RE = re.compile(r"(?:class|struct|enum)\s+\w+\s*(?:<[^>]+>)?\s*:\s*([^{\n]{1,500})", re.MULTILINE)
21-
_SWIFT_TYPE_REF_RE = re.compile(r"(?<![a-z_])([A-Z]\w*)\b")
22-
_SWIFT_FUNC_CALL_RE = re.compile(r"(?<!\w)([a-z]\w*)\s*\(")
20+
_SWIFT_CONFORMANCE_RE = re.compile(
21+
r"(?:class|struct|enum)\s{1,10}\w{1,100}\s{0,10}(?:<[^>]{1,200}>)?\s{0,10}:\s{0,10}([^{\n]{1,500})", re.MULTILINE
22+
)
23+
_SWIFT_TYPE_REF_RE = re.compile(r"(?<![a-z_])([A-Z]\w{0,100})\b")
24+
_SWIFT_FUNC_CALL_RE = re.compile(r"(?<!\w)([a-z]\w{0,100})\s{0,10}\(")
2325
_SWIFT_DOT_CALL_RE = re.compile(r"(\w+)\.([a-z][a-zA-Z0-9]*)\s*\(")
2426

2527

@@ -51,8 +53,8 @@ def _extract_conformances(content: str) -> set[str]:
5153
inheritance = match.group(1)
5254
for part in inheritance.split(","):
5355
part = part.strip()
54-
part = re.sub(r"<[^>]+>", "", part)
55-
part = re.sub(r"\s+where\s+[^\n]*", "", part)
56+
part = re.sub(r"<[^>]{1,200}>", "", part)
57+
part = re.sub(r"\s{1,20}where\s{1,20}[^\n]{0,300}", "", part)
5658
if part and part[0].isupper():
5759
conformances.add(part.strip())
5860
return conformances

0 commit comments

Comments
 (0)