Skip to content

Commit 41c52c1

Browse files
committed
fix: address remaining regex DoS security hotspots
1 parent d10c6ca commit 41c52c1

3 files changed

Lines changed: 45 additions & 38 deletions

File tree

src/treemapper/diffctx/edges/config/build.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,21 @@
99
_MAKEFILE_NAMES = {"makefile", "gnumakefile"}
1010
_MAKEFILE_EXTS = {".mk", ".mak", ".make"}
1111

12-
_MAKE_TARGET_RE = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_.-]*)\s*:(?!=)", re.MULTILINE)
13-
_MAKE_INCLUDE_RE = re.compile(r"^(?:-)?include\s+([^\n]+)$", re.MULTILINE)
14-
_MAKE_VAR_RE = re.compile(r"^\s*([A-Z_][A-Z0-9_]*)\s*[:?]?=", re.MULTILINE)
15-
_MAKE_RECIPE_RE = re.compile(r"^\t(.+)$", re.MULTILINE)
12+
_MAKE_TARGET_RE = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_.-]{0,100})\s{0,20}:(?!=)", re.MULTILINE)
13+
_MAKE_INCLUDE_RE = re.compile(r"^(?:-)?include\s+([^\n]{1,500})$", re.MULTILINE)
14+
_MAKE_VAR_RE = re.compile(r"^\s{0,20}([A-Z_][A-Z0-9_]{0,100})\s{0,20}[:?]?=", re.MULTILINE)
15+
_MAKE_RECIPE_RE = re.compile(r"^\t([^\n]{1,1000})$", re.MULTILINE)
1616

17-
_CMAKE_ADD_EXE_RE = re.compile(r"add_executable\s*\(\s*(\w+)", re.IGNORECASE)
18-
_CMAKE_ADD_LIB_RE = re.compile(r"add_library\s*\(\s*(\w+)", re.IGNORECASE)
17+
_CMAKE_ADD_EXE_RE = re.compile(r"add_executable\s{0,10}\(\s{0,10}(\w{1,100})", re.IGNORECASE)
18+
_CMAKE_ADD_LIB_RE = re.compile(r"add_library\s{0,10}\(\s{0,10}(\w{1,100})", re.IGNORECASE)
1919
_CMAKE_TARGET_LINK_RE = re.compile(
20-
r"target_link_libraries\s*\(\s*(\w+)\s+(?:PUBLIC|PRIVATE|INTERFACE)?\s*([^)]+)\)", re.IGNORECASE
20+
r"target_link_libraries\s{0,10}\(\s{0,10}(\w{1,100})\s{1,20}(?:PUBLIC|PRIVATE|INTERFACE)?\s{0,10}([^)]{1,500})\)",
21+
re.IGNORECASE,
2122
)
22-
_CMAKE_INCLUDE_RE = re.compile(r"include\s*\(\s*([^)]+)\)", re.IGNORECASE)
23-
_CMAKE_ADD_SUBDIR_RE = re.compile(r"add_subdirectory\s*\(\s*([^\)\s]+)", re.IGNORECASE)
24-
_CMAKE_FIND_PKG_RE = re.compile(r"find_package\s*\(\s*(\w+)", re.IGNORECASE)
25-
_CMAKE_SET_RE = re.compile(r"set\s*\(\s*([A-Z_][A-Z0-9_]*)", re.IGNORECASE)
23+
_CMAKE_INCLUDE_RE = re.compile(r"include\s{0,10}\(\s{0,10}([^)]{1,300})\)", re.IGNORECASE)
24+
_CMAKE_ADD_SUBDIR_RE = re.compile(r"add_subdirectory\s{0,10}\(\s{0,10}([^\)\s]{1,200})", re.IGNORECASE)
25+
_CMAKE_FIND_PKG_RE = re.compile(r"find_package\s{0,10}\(\s{0,10}(\w{1,100})", re.IGNORECASE)
26+
_CMAKE_SET_RE = re.compile(r"set\s{0,10}\(\s{0,10}([A-Z_][A-Z0-9_]{0,100})", re.IGNORECASE)
2627

2728
_SCRIPT_CALL_RE = re.compile(r"(?:bash|sh|python|python3|\.\/scripts\/|\.\/bin\/)([a-zA-Z0-9_.-]+)")
2829
_SOURCE_FILE_RE = re.compile(r"\b([a-zA-Z_]\w*\.(?:c|cpp|cc|cxx|h|hpp|hxx|py|sh|go|rs|java))\b")

src/treemapper/diffctx/edges/config/cicd.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
from ...types import Fragment, FragmentId
77
from ..base import EdgeBuilder, EdgeDict, FragmentIndex, discover_files_by_refs
88

9-
_GHA_RUN_RE = re.compile(r"^\s*-?\s*run:\s*[|>]?\s*([^\n]+)", re.MULTILINE)
10-
11-
_GITLAB_SCRIPT_RE = re.compile(r"^\s*(?:script|before_script|after_script):\s*\n((?:\s+-\s*.+\n)+)", re.MULTILINE)
12-
_GITLAB_INCLUDE_RE = re.compile(r"^\s*-?\s*(?:local|project|remote|template):\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
9+
_GHA_RUN_RE = re.compile(r"^\s{0,20}-?\s{0,5}run:\s{0,5}[|>]?\s{0,5}([^\n]{1,500})", re.MULTILINE)
10+
11+
_GITLAB_SCRIPT_RE = re.compile(
12+
r"^\s{0,20}(?:script|before_script|after_script):\s?\n((?:\s{1,20}-\s{0,5}[^\n]{1,500}\n){1,100})", re.MULTILINE
13+
)
14+
_GITLAB_INCLUDE_RE = re.compile(
15+
r"^\s{0,20}-?\s{0,5}(?:local|project|remote|template):\s{0,5}['\"]?([^'\"#\n]{1,300})", re.MULTILINE
16+
)
1317

1418
_JENKINS_SH_RE = re.compile(r"sh\s*(?:\(['\"]|['\"])(.+?)['\"]\)?", re.MULTILINE | re.DOTALL)
1519
_JENKINS_SCRIPT_RE = re.compile(r"script\s*\{([^}]+)\}", re.MULTILINE | re.DOTALL)

src/treemapper/diffctx/edges/config/kubernetes.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,31 @@
77
from ...types import Fragment, FragmentId
88
from ..base import EdgeBuilder, EdgeDict
99

10-
_K8S_API_VERSION_RE = re.compile(r"^apiVersion:\s*([^\s#]+)", re.MULTILINE)
11-
_K8S_KIND_RE = re.compile(r"^kind:\s*(\w+)", re.MULTILINE)
12-
_K8S_NAME_RE = re.compile(r"^\s+name:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
13-
_K8S_NAMESPACE_RE = re.compile(r"^\s+namespace:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
14-
15-
_CONFIGMAP_REF_RE = re.compile(r"configMapKeyRef:\s*\n\s+name:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
16-
_CONFIGMAP_NAME_RE = re.compile(r"configMapName:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
17-
_SECRET_REF_RE = re.compile(r"secretKeyRef:\s*\n\s+name:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
18-
_SECRET_NAME_RE = re.compile(r"secretName:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
19-
20-
_SERVICE_NAME_RE = re.compile(r"serviceName:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
21-
_BACKEND_SERVICE_RE = re.compile(r"service:\s*\n\s+name:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
22-
23-
_IMAGE_RE = re.compile(r"^\s+image:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
24-
25-
_SELECTOR_MATCH_LABELS_RE = re.compile(r"selector:\s*\n\s+matchLabels:\s*\n((?:\s+[a-zA-Z0-9_./-]+:\s*[^\n:]+\n)+)", re.MULTILINE)
26-
_LABELS_RE = re.compile(r"labels:\s*\n((?:\s+[a-zA-Z0-9_./-]+:\s*[^\n:]+\n)+)", re.MULTILINE)
27-
_LABEL_PAIR_RE = re.compile(r"^\s*([a-zA-Z0-9_./-]+):\s*['\"]?([a-zA-Z0-9_./-]+)['\"]?\s*$", re.MULTILINE)
28-
_SIMPLE_SELECTOR_RE = re.compile(r"selector:\s*\n((?:\s+[a-zA-Z0-9_./-]+:\s*[^\n:]+\n)+)", re.MULTILINE)
29-
30-
_VOLUME_CONFIGMAP_RE = re.compile(r"configMap:\s*\n\s+name:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
31-
_VOLUME_SECRET_RE = re.compile(r"secret:\s*\n\s+secretName:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
32-
_VOLUME_PVC_RE = re.compile(r"persistentVolumeClaim:\s*\n\s+claimName:\s*['\"]?([^'\"#\n]+)", re.MULTILINE)
10+
_K8S_API_VERSION_RE = re.compile(r"^apiVersion:\s?([^\s#]{1,100})", re.MULTILINE)
11+
_K8S_KIND_RE = re.compile(r"^kind:\s?(\w{1,100})", re.MULTILINE)
12+
_K8S_NAME_RE = re.compile(r"^\s{1,20}name:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
13+
_K8S_NAMESPACE_RE = re.compile(r"^\s{1,20}namespace:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
14+
15+
_CONFIGMAP_REF_RE = re.compile(r"configMapKeyRef:\s?\n\s{1,20}name:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
16+
_CONFIGMAP_NAME_RE = re.compile(r"configMapName:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
17+
_SECRET_REF_RE = re.compile(r"secretKeyRef:\s?\n\s{1,20}name:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
18+
_SECRET_NAME_RE = re.compile(r"secretName:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
19+
20+
_SERVICE_NAME_RE = re.compile(r"serviceName:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
21+
_BACKEND_SERVICE_RE = re.compile(r"service:\s?\n\s{1,20}name:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
22+
23+
_IMAGE_RE = re.compile(r"^\s{1,20}image:\s?['\"]?([^'\"#\n]{1,300})", re.MULTILINE)
24+
25+
_SELECTOR_MATCH_LABELS_RE = re.compile(
26+
r"selector:\s?\n\s{1,20}matchLabels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})", re.MULTILINE
27+
)
28+
_LABELS_RE = re.compile(r"labels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})", re.MULTILINE)
29+
_LABEL_PAIR_RE = re.compile(r"^\s{0,20}([a-zA-Z0-9_./-]{1,100}):\s?['\"]?([a-zA-Z0-9_./-]{1,100})['\"]?\s{0,10}$", re.MULTILINE)
30+
_SIMPLE_SELECTOR_RE = re.compile(r"selector:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})", re.MULTILINE)
31+
32+
_VOLUME_CONFIGMAP_RE = re.compile(r"configMap:\s?\n\s{1,20}name:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
33+
_VOLUME_SECRET_RE = re.compile(r"secret:\s?\n\s{1,20}secretName:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
34+
_VOLUME_PVC_RE = re.compile(r"persistentVolumeClaim:\s?\n\s{1,20}claimName:\s?['\"]?([^'\"#\n]{1,200})", re.MULTILINE)
3335

3436
_YAML_EXTS = {".yaml", ".yml"}
3537

0 commit comments

Comments
 (0)