Skip to content

Commit 4d0baff

Browse files
committed
fix(edges): unbound k8s label/selector regex repetition (CompiledTooBig)
Nested bounded Unicode-class repetition compiled past regex's default 10 MiB limit, so the first workload manifest forcing the Lazy static aborted the whole --diff run on any k8s repo. Unbound the value class ([^\n:]{1,200} -> [^\n:]+) in the 3 label/selector patterns; matching is unchanged (regex is linear-time, no ReDoS). Add a regression test forcing every k8s regex to compile, plus a selector-edge YAML case. Also: drop unused/contradictory TREE_SITTER_LANGUAGES map; drop .sass (recognized but had no parser -> zero fragments); align MCP get_diff_context tau default with the documented CLI default (0.08, not engine 0.12). Fixes #58
1 parent aa4c112 commit 4d0baff

4 files changed

Lines changed: 152 additions & 96 deletions

File tree

diffctx/src/edges/config_edges/kubernetes.rs

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,11 @@ static IMAGE_RE: Lazy<Regex> =
4141
Lazy::new(|| Regex::new(r##"(?m)^\s{1,20}image:\s?['"]?([^'"#\n]{1,300})"##).unwrap());
4242

4343
static SELECTOR_MATCH_LABELS_RE: Lazy<Regex> = Lazy::new(|| {
44-
Regex::new(r"(?m)selector:\s?\n\s{1,20}matchLabels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})")
44+
Regex::new(r"(?m)selector:\s?\n\s{1,20}matchLabels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]+\n){1,50})")
4545
.unwrap()
4646
});
4747
static LABELS_RE: Lazy<Regex> = Lazy::new(|| {
48-
Regex::new(r"(?m)labels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})")
49-
.unwrap()
48+
Regex::new(r"(?m)labels:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]+\n){1,50})").unwrap()
5049
});
5150
static LABEL_PAIR_RE: Lazy<Regex> = Lazy::new(|| {
5251
Regex::new(
@@ -55,7 +54,7 @@ static LABEL_PAIR_RE: Lazy<Regex> = Lazy::new(|| {
5554
.unwrap()
5655
});
5756
static SIMPLE_SELECTOR_RE: Lazy<Regex> = Lazy::new(|| {
58-
Regex::new(r"(?m)selector:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]{1,200}\n){1,50})")
57+
Regex::new(r"(?m)selector:\s?\n((?:\s{1,20}[a-zA-Z0-9_./-]{1,100}:\s?[^\n:]+\n){1,50})")
5958
.unwrap()
6059
});
6160

@@ -515,3 +514,39 @@ impl EdgeBuilder for KubernetesEdgeBuilder {
515514
discovered
516515
}
517516
}
517+
518+
#[cfg(test)]
519+
mod tests {
520+
use super::*;
521+
522+
// Regression for issue #58: these label/selector patterns nest a Unicode
523+
// negated class under bounded repetition, which compiled past regex's
524+
// default 10 MiB size limit and made `.unwrap()` abort the whole process
525+
// the first time a workload manifest forced the Lazy static. Forcing every
526+
// k8s regex here fails CI on any future reintroduction instead of a user.
527+
#[test]
528+
fn all_kubernetes_regexes_compile() {
529+
for re in [
530+
&*K8S_API_VERSION_RE,
531+
&*K8S_KIND_RE,
532+
&*K8S_METADATA_NAME_RE,
533+
&*K8S_NAME_RE,
534+
&*CONFIGMAP_REF_RE,
535+
&*CONFIGMAP_NAME_RE,
536+
&*SECRET_REF_RE,
537+
&*SECRET_NAME_RE,
538+
&*SERVICE_NAME_RE,
539+
&*BACKEND_SERVICE_RE,
540+
&*IMAGE_RE,
541+
&*SELECTOR_MATCH_LABELS_RE,
542+
&*LABELS_RE,
543+
&*LABEL_PAIR_RE,
544+
&*SIMPLE_SELECTOR_RE,
545+
&*VOLUME_CONFIGMAP_RE,
546+
&*VOLUME_SECRET_RE,
547+
&*VOLUME_PVC_RE,
548+
] {
549+
let _ = re.is_match("x");
550+
}
551+
}
552+
}

diffctx/src/languages.rs

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ pub static EXTENSION_TO_LANGUAGE: Lazy<FxHashMap<&'static str, &'static str>> =
2525
(".htm", "html"),
2626
(".css", "css"),
2727
(".scss", "scss"),
28-
(".sass", "sass"),
2928
(".less", "less"),
3029
(".xml", "xml"),
3130
(".svg", "xml"),
@@ -211,97 +210,6 @@ pub static FILENAME_TO_LANGUAGE: Lazy<FxHashMap<&'static str, &'static str>> = L
211210
map
212211
});
213212

214-
pub static TREE_SITTER_LANGUAGES: Lazy<FxHashMap<&'static str, &'static str>> = Lazy::new(|| {
215-
let entries: &[(&str, &str)] = &[
216-
(".py", "python"),
217-
(".pyw", "python"),
218-
(".pyi", "python"),
219-
(".js", "javascript"),
220-
(".jsx", "jsx"),
221-
(".mjs", "javascript"),
222-
(".cjs", "javascript"),
223-
(".ts", "typescript"),
224-
(".tsx", "tsx"),
225-
(".mts", "typescript"),
226-
(".cts", "typescript"),
227-
(".go", "go"),
228-
(".rs", "rust"),
229-
(".java", "java"),
230-
(".c", "c"),
231-
(".h", "c"),
232-
(".cpp", "cpp"),
233-
(".hpp", "cpp"),
234-
(".cc", "cpp"),
235-
(".cxx", "cpp"),
236-
(".hh", "cpp"),
237-
(".hxx", "cpp"),
238-
(".c++", "cpp"),
239-
(".h++", "cpp"),
240-
(".ipp", "cpp"),
241-
(".tpp", "cpp"),
242-
(".rb", "ruby"),
243-
(".rake", "ruby"),
244-
(".gemspec", "ruby"),
245-
(".cs", "c_sharp"),
246-
(".php", "php"),
247-
(".phtml", "php"),
248-
(".scala", "scala"),
249-
(".sc", "scala"),
250-
(".swift", "swift"),
251-
(".html", "html"),
252-
(".htm", "html"),
253-
(".sh", "bash"),
254-
(".bash", "bash"),
255-
(".zsh", "bash"),
256-
(".ksh", "bash"),
257-
(".css", "css"),
258-
(".scss", "css"),
259-
(".less", "css"),
260-
(".hs", "haskell"),
261-
(".lhs", "haskell"),
262-
(".ex", "elixir"),
263-
(".exs", "elixir"),
264-
(".lua", "lua"),
265-
(".r", "r"),
266-
(".ml", "ocaml"),
267-
(".mli", "ocaml"),
268-
(".erl", "erlang"),
269-
(".hrl", "erlang"),
270-
(".jl", "julia"),
271-
(".zig", "zig"),
272-
(".clj", "clojure"),
273-
(".cljs", "clojure"),
274-
(".cljc", "clojure"),
275-
(".nix", "nix"),
276-
(".groovy", "groovy"),
277-
(".gradle", "groovy"),
278-
(".m", "objc"),
279-
(".mm", "objc"),
280-
(".dart", "dart"),
281-
(".graphql", "graphql"),
282-
(".gql", "graphql"),
283-
(".tex", "latex"),
284-
(".latex", "latex"),
285-
(".sty", "latex"),
286-
(".cls", "latex"),
287-
(".prisma", "prisma"),
288-
(".svelte", "svelte"),
289-
(".tf", "hcl"),
290-
(".hcl", "hcl"),
291-
(".json", "json"),
292-
(".yaml", "yaml"),
293-
(".yml", "yaml"),
294-
(".cmake", "cmake"),
295-
(".mk", "make"),
296-
];
297-
298-
let mut map = FxHashMap::with_capacity_and_hasher(entries.len(), Default::default());
299-
for &(ext, lang) in entries {
300-
map.insert(ext, lang);
301-
}
302-
map
303-
});
304-
305213
pub fn get_language_for_file(path: &str) -> Option<&'static str> {
306214
let p = Path::new(path);
307215

src/diffctx/mcp/server.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18+
# Keep in sync with diffctx.cli._DEFAULT_TAU. The layering contract forbids
19+
# mcp -> cli, so this user-facing default is duplicated rather than imported.
20+
# Without it, build_diff_context falls back to the engine default (0.12),
21+
# silently selecting less context than the documented CLI default.
22+
_DEFAULT_TAU = 0.08
23+
1824
mcp = FastMCP("diffctx")
1925

2026
_DIFF_DESCRIPTION = (
@@ -47,6 +53,7 @@ async def get_diff_context(
4753
root_dir=validated_path,
4854
diff_range=diff_range,
4955
budget_tokens=budget_tokens,
56+
tau=_DEFAULT_TAU,
5057
)
5158
)
5259
except GitError as e:
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
name: kubernetes_001_selector_edge_pulls_workload
2+
tags:
3+
- lang:yaml
4+
- domain:kubernetes
5+
- change:single_file
6+
- context:cross_file
7+
- distance:1hop
8+
- difficulty:medium
9+
- regression:issue_58_compiledtoobig
10+
fixtures:
11+
auto_garbage: true
12+
repo:
13+
initial_files:
14+
k8s/deployment.yaml: |
15+
apiVersion: apps/v1
16+
kind: Deployment
17+
metadata:
18+
name: web
19+
spec:
20+
selector:
21+
matchLabels:
22+
app: web
23+
template:
24+
metadata:
25+
labels:
26+
app: web
27+
tier: frontend
28+
spec:
29+
containers:
30+
- name: web
31+
image: nginx:1.25
32+
envFrom:
33+
- configMapRef:
34+
name: web-config
35+
k8s/service.yaml: |
36+
apiVersion: v1
37+
kind: Service
38+
metadata:
39+
name: web
40+
spec:
41+
selector:
42+
app: web
43+
ports:
44+
- port: 80
45+
targetPort: 8080
46+
k8s/configmap.yaml: |
47+
apiVersion: v1
48+
kind: ConfigMap
49+
metadata:
50+
name: web-config
51+
data:
52+
LOG_LEVEL: info
53+
changed_files:
54+
k8s/deployment.yaml: |
55+
apiVersion: apps/v1
56+
kind: Deployment
57+
metadata:
58+
name: web
59+
spec:
60+
selector:
61+
matchLabels:
62+
app: web
63+
template:
64+
metadata:
65+
labels:
66+
app: web
67+
tier: frontend
68+
spec:
69+
containers:
70+
- name: web
71+
image: nginx:1.25
72+
envFrom:
73+
- configMapRef:
74+
name: web-config
75+
k8s/service.yaml: |
76+
apiVersion: v1
77+
kind: Service
78+
metadata:
79+
name: web
80+
spec:
81+
selector:
82+
app: web
83+
ports:
84+
- port: 8080
85+
targetPort: 8080
86+
k8s/configmap.yaml: |
87+
apiVersion: v1
88+
kind: ConfigMap
89+
metadata:
90+
name: web-config
91+
data:
92+
LOG_LEVEL: info
93+
commit_message: change service port
94+
fragments:
95+
- id: deployment_via_selector
96+
selector:
97+
path: k8s/deployment.yaml
98+
anchor: 'tier: frontend'
99+
- id: service_seed
100+
selector:
101+
path: k8s/service.yaml
102+
oracle:
103+
required:
104+
- deployment_via_selector
105+
allowed: []
106+
forbidden: []

0 commit comments

Comments
 (0)