Skip to content

Commit 2f39cd9

Browse files
committed
feat(connector-linter): add VC1xx config and VC2xx metadata checks
1 parent 1d56c72 commit 2f39cd9

11 files changed

Lines changed: 1492 additions & 0 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""VC1xx — Configuration checks.
2+
3+
Validates connector configuration files (docker-compose.yml, .env.sample,
4+
config.yml.sample) for compliance with OpenCTI conventions.
5+
6+
VC101 config-token-default OPENCTI_TOKEN must default to ChangeMe
7+
VC102 config-url-default OPENCTI_URL must default to http://localhost
8+
VC103 config-variable-prefix Env vars must use OPENCTI_, CONNECTOR_, or <NAME>_ prefix
9+
VC104 config-file-samples config.yml.sample + docker-compose/env must exist
10+
VC105 no-absolute-import-date Import dates must use ISO duration, not absolute dates
11+
"""
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""Helpers for configuration-file parsing.
2+
3+
Extracts environment variables from ``docker-compose.yml`` and
4+
``.env.sample`` files, including commented-out lines. Also locates
5+
``config.yml.sample`` and scans for ``ChangeMe`` placeholder values.
6+
"""
7+
8+
import re
9+
from dataclasses import dataclass
10+
from pathlib import Path
11+
12+
from connector_linter.models import ConnectorContext
13+
14+
15+
@dataclass
16+
class EnvVar:
17+
"""A parsed environment variable."""
18+
19+
name: str
20+
value: str
21+
line: int
22+
file_path: Path
23+
is_commented: bool
24+
25+
26+
# ---------------------------------------------------------------------------
27+
# Regex: docker-compose.yml environment lines
28+
#
29+
# Matches lines like:
30+
# - OPENCTI_URL=http://localhost (uncommented)
31+
# # - OPENCTI_URL=http://localhost (commented out)
32+
#
33+
# Capture groups:
34+
# commented — leading "#" (present when the line is commented out)
35+
# name — uppercase env var name (e.g. OPENCTI_TOKEN)
36+
# value — everything after "=" up to an optional inline comment
37+
#
38+
# Trailing inline comments (# …) are stripped from the value.
39+
# ---------------------------------------------------------------------------
40+
_COMPOSE_ENV_RE = re.compile(
41+
r"^(?P<commented>\s*#)?\s*-\s*(?P<name>[A-Z][A-Z0-9_]*)=(?P<value>[^#\n]*?)(?:\s*#.*)?\s*$",
42+
)
43+
44+
# ---------------------------------------------------------------------------
45+
# Regex: .env.sample (dotenv-style) lines
46+
#
47+
# Matches lines like:
48+
# OPENCTI_TOKEN=ChangeMe (uncommented)
49+
# # OPENCTI_TOKEN=ChangeMe (commented out)
50+
#
51+
# Same capture groups as _COMPOSE_ENV_RE (commented, name, value).
52+
# The difference is the absence of the YAML list marker "- ".
53+
# ---------------------------------------------------------------------------
54+
_DOTENV_RE = re.compile(
55+
r"^(?P<commented>\s*#)?\s*(?P<name>[A-Z][A-Z0-9_]*)=(?P<value>[^#\n]*?)(?:\s*#.*)?\s*$",
56+
)
57+
58+
59+
def _parse_lines(
60+
file_path: Path,
61+
lines: list[str],
62+
pattern: re.Pattern[str],
63+
) -> list[EnvVar]:
64+
"""Extract EnvVar entries from raw lines.
65+
66+
Iterates line-by-line, applying the given regex ``pattern`` to each line.
67+
Both commented and uncommented matches are captured — the ``is_commented``
68+
flag lets callers decide which to keep or skip.
69+
"""
70+
results: list[EnvVar] = []
71+
for line_no, line in enumerate(lines, 1):
72+
m = pattern.match(line)
73+
if m:
74+
results.append(
75+
EnvVar(
76+
name=m.group("name"),
77+
value=m.group("value").strip(),
78+
line=line_no,
79+
file_path=file_path,
80+
is_commented=bool(m.group("commented")),
81+
),
82+
)
83+
return results
84+
85+
86+
def extract_env_vars_from_docker_compose(ctx: ConnectorContext) -> list[EnvVar]:
87+
"""Extract environment variables from docker-compose.yml."""
88+
compose_path = ctx.path / "docker-compose.yml"
89+
if not compose_path.is_file():
90+
return []
91+
with compose_path.open(encoding="utf-8") as f:
92+
return _parse_lines(compose_path, f.readlines(), _COMPOSE_ENV_RE)
93+
94+
95+
def extract_env_vars_from_env_sample(ctx: ConnectorContext) -> list[EnvVar]:
96+
"""Extract environment variables from .env.sample."""
97+
env_path = ctx.path / ".env.sample"
98+
if not env_path.is_file():
99+
return []
100+
with env_path.open(encoding="utf-8") as f:
101+
return _parse_lines(env_path, f.readlines(), _DOTENV_RE)
102+
103+
104+
def extract_all_env_vars(ctx: ConnectorContext) -> list[EnvVar]:
105+
"""Extract env vars from docker-compose.yml and .env.sample."""
106+
return extract_env_vars_from_docker_compose(ctx) + extract_env_vars_from_env_sample(
107+
ctx,
108+
)
109+
110+
111+
def derive_connector_prefixes(ctx: ConnectorContext) -> list[str]:
112+
"""Derive valid connector-specific prefixes from the directory name.
113+
114+
Examples:
115+
``mandiant`` → ``["MANDIANT"]``
116+
``abuse-ssl`` → ``["ABUSE_SSL", "ABUSESSL"]``
117+
``recorded-future`` → ``["RECORDED_FUTURE", "RECORDEDFUTURE"]``
118+
119+
"""
120+
dirname = ctx.path.name
121+
prefixes: set[str] = set()
122+
# Hyphen → underscore: "abuse-ssl" → "ABUSE_SSL"
123+
prefixes.add(dirname.upper().replace("-", "_"))
124+
# Hyphen removed: "abuse-ssl" → "ABUSESSL"
125+
# (Some legacy connectors use this convention.)
126+
prefixes.add(dirname.upper().replace("-", ""))
127+
return sorted(prefixes)
128+
129+
130+
def find_config_yml_sample(ctx: ConnectorContext) -> Path | None:
131+
"""Locate config.yml.sample (root or src/)."""
132+
candidates = [
133+
ctx.path / "config.yml.sample",
134+
ctx.path / "src" / "config.yml.sample",
135+
]
136+
for path in candidates:
137+
if path.is_file():
138+
return path
139+
return None
140+
141+
142+
def has_docker_compose_env(ctx: ConnectorContext) -> bool:
143+
"""Return True if docker-compose.yml exists with environment variables."""
144+
return bool(extract_env_vars_from_docker_compose(ctx))
145+
146+
147+
def has_env_sample(ctx: ConnectorContext) -> bool:
148+
"""Return True if .env.sample exists."""
149+
return (ctx.path / ".env.sample").is_file()
150+
151+
152+
@dataclass
153+
class ChangeMeHit:
154+
"""A ChangeMe value found in a config file with wrong case."""
155+
156+
file_path: Path
157+
line: int
158+
raw_value: str
159+
160+
161+
# ---------------------------------------------------------------------------
162+
# Regex: case-insensitive "ChangeMe" placeholder detector
163+
#
164+
# Matches the word "ChangeMe" regardless of case (CHANGEME, changeme, etc.)
165+
# appearing as a YAML value (after ":") or env value (after "="):
166+
# OPENCTI_TOKEN=changeme → matches "changeme"
167+
# token: 'CHANGEME' → matches "CHANGEME"
168+
#
169+
# Optional surrounding quotes (' or ") and trailing inline comments are
170+
# tolerated but not captured.
171+
# ---------------------------------------------------------------------------
172+
_CHANGEME_LINE_RE = re.compile(
173+
r"(?:^|[=:]\s*['\"]?)(?P<value>change\s*me)['\"]?\s*(?:#.*)?$",
174+
re.MULTILINE | re.IGNORECASE,
175+
)
176+
177+
178+
def find_bad_changeme_values(file_path: Path) -> list[ChangeMeHit]:
179+
"""Find ChangeMe values with wrong case in any config file.
180+
181+
A "bad" value is any case variant of ChangeMe that is *not* the
182+
canonical form ``ChangeMe`` (e.g. ``CHANGEME``, ``changeme``).
183+
184+
Commented lines (starting with ``#``) are skipped because they are
185+
inactive — fixing their case would be noise, and some commented lines
186+
may intentionally use a different casing as documentation.
187+
"""
188+
if not file_path.is_file():
189+
return []
190+
with file_path.open(encoding="utf-8") as f:
191+
lines = f.readlines()
192+
193+
hits: list[ChangeMeHit] = []
194+
for line_no, line in enumerate(lines, 1):
195+
# Skip fully commented lines — only active values matter for casing
196+
stripped = line.lstrip()
197+
if stripped.startswith("#"):
198+
continue
199+
m = _CHANGEME_LINE_RE.search(line)
200+
if m:
201+
raw = m.group("value").strip()
202+
# Only flag if casing does not match the canonical "ChangeMe"
203+
if raw != "ChangeMe":
204+
hits.append(ChangeMeHit(file_path, line_no, raw))
205+
return hits
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""VC101 — OPENCTI_TOKEN must default to ``ChangeMe``.
2+
3+
Following the January 2026 alignment commit, all configuration files
4+
must use exactly ``ChangeMe`` as the placeholder value for
5+
``OPENCTI_TOKEN`` (not ``CHANGEME``, ``changeme``, or a real token).
6+
7+
Environment-variable references like ``${OPENCTI_TOKEN}`` are acceptable.
8+
9+
Scope: Common (all connector types).
10+
"""
11+
12+
from connector_linter.checks.vc1xx_config._helpers import extract_all_env_vars
13+
from connector_linter.models import CheckFinding, ConnectorContext, Severity
14+
from connector_linter.registry import CheckRegistry
15+
16+
_VALID_PLACEHOLDER = "ChangeMe"
17+
18+
19+
@CheckRegistry.register(
20+
code="VC101",
21+
name="config-token-default",
22+
description="OPENCTI_TOKEN must default to ChangeMe",
23+
severity=Severity.ERROR,
24+
)
25+
def check_config_token(ctx: ConnectorContext) -> list[CheckFinding]:
26+
"""Check OPENCTI_TOKEN placeholder value in configuration files."""
27+
# Step 1: Gather all env vars from docker-compose.yml and .env.sample
28+
all_vars = extract_all_env_vars(ctx)
29+
30+
if not all_vars:
31+
return [
32+
CheckFinding(
33+
message="No configuration file found (docker-compose.yml or .env.sample)",
34+
passed=False,
35+
suggestion="Add a docker-compose.yml with environment variables.",
36+
),
37+
]
38+
39+
# Step 2: Keep only uncommented OPENCTI_TOKEN entries.
40+
# Commented-out lines are informational — only active values matter.
41+
token_vars = [
42+
v for v in all_vars if v.name == "OPENCTI_TOKEN" and not v.is_commented
43+
]
44+
45+
if not token_vars:
46+
return [
47+
CheckFinding(
48+
message="OPENCTI_TOKEN not found in configuration files",
49+
passed=False,
50+
suggestion="Add OPENCTI_TOKEN=ChangeMe to docker-compose.yml.",
51+
),
52+
]
53+
54+
# Step 3: Validate each OPENCTI_TOKEN occurrence.
55+
#
56+
# Decision tree for each value:
57+
# ${...} → PASS (env reference — delegated to runtime)
58+
# ChangeMe → PASS (canonical placeholder, exact case)
59+
# changeme/* → FAIL (wrong case — must match Jan 2026 alignment)
60+
# (empty) → FAIL (must have a placeholder)
61+
# anything → FAIL (likely a real token committed by mistake)
62+
results: list[CheckFinding] = []
63+
for var in token_vars:
64+
value = var.value
65+
66+
# ---------------------------------------------------------------------------
67+
# Environment variable reference — e.g. ${OPENCTI_TOKEN}
68+
#
69+
# docker-compose files often delegate to the host environment via
70+
# ${VAR} syntax. This is perfectly fine — the actual secret is
71+
# never stored in the repo.
72+
# ---------------------------------------------------------------------------
73+
if value.startswith("${") and value.endswith("}"):
74+
results.append(
75+
CheckFinding(
76+
message=f"OPENCTI_TOKEN uses env reference ({value})",
77+
passed=True,
78+
file_path=var.file_path,
79+
line=var.line,
80+
),
81+
)
82+
continue
83+
84+
if value == _VALID_PLACEHOLDER:
85+
results.append(
86+
CheckFinding(
87+
message="OPENCTI_TOKEN=ChangeMe ✓",
88+
passed=True,
89+
file_path=var.file_path,
90+
line=var.line,
91+
),
92+
)
93+
elif value.lower() == "changeme":
94+
# Case mismatch — the Jan 2026 alignment mandates exact "ChangeMe"
95+
results.append(
96+
CheckFinding(
97+
message=f"OPENCTI_TOKEN={value} — wrong case",
98+
passed=False,
99+
file_path=var.file_path,
100+
line=var.line,
101+
suggestion=f"Change from '{value}' to 'ChangeMe' (exact case).",
102+
),
103+
)
104+
elif not value:
105+
results.append(
106+
CheckFinding(
107+
message="OPENCTI_TOKEN has empty value",
108+
passed=False,
109+
file_path=var.file_path,
110+
line=var.line,
111+
suggestion="Set OPENCTI_TOKEN=ChangeMe as the placeholder value.",
112+
),
113+
)
114+
else:
115+
# Non-standard value — could be a real token accidentally committed.
116+
# Truncate to 20 chars to avoid leaking secrets in the output.
117+
results.append(
118+
CheckFinding(
119+
message=f"OPENCTI_TOKEN has non-standard value: {value[:20]}{'...' if len(value) > 20 else ''}",
120+
passed=False,
121+
file_path=var.file_path,
122+
line=var.line,
123+
suggestion=(
124+
"Use 'ChangeMe' as the placeholder value. "
125+
"Never commit real tokens."
126+
),
127+
),
128+
)
129+
130+
return results

0 commit comments

Comments
 (0)