Skip to content

Commit 84090f5

Browse files
committed
feat(connector-linter): add VC3xx code checks
1 parent fe7f863 commit 84090f5

27 files changed

Lines changed: 4040 additions & 1 deletion
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import ast
2+
from pathlib import Path
3+
4+
from connector_linter.models import ConnectorContext
5+
6+
# ---------------------------------------------------------------------------
7+
# Source reading
8+
# ---------------------------------------------------------------------------
9+
10+
11+
def read_all_python_sources(ctx: ConnectorContext) -> dict[Path, str]:
12+
"""Read all Python source files from the connector's src/ directory."""
13+
sources: dict[Path, str] = {}
14+
# Convention: all connector Python code lives under <connector>/src/
15+
src_dir = ctx.path / "src"
16+
if not src_dir.exists():
17+
return sources
18+
for py_file in src_dir.rglob("*.py"):
19+
# Key by relative path (from connector root) for portable reporting
20+
rel_path = py_file.relative_to(ctx.path)
21+
try:
22+
# errors="replace" avoids UnicodeDecodeError on malformed files
23+
sources[rel_path] = py_file.read_text(encoding="utf-8", errors="replace")
24+
except OSError:
25+
# Skip unreadable files (permissions, broken symlinks, etc.)
26+
continue
27+
return sources
28+
29+
30+
# ---------------------------------------------------------------------------
31+
# AST helpers — structural analysis of Python source
32+
# ---------------------------------------------------------------------------
33+
34+
35+
def parse_sources(sources: dict[Path, str]) -> dict[Path, ast.Module]:
36+
"""Parse all source files into AST modules.
37+
38+
Files that fail to parse (syntax errors) are silently skipped.
39+
"""
40+
trees: dict[Path, ast.Module] = {}
41+
for file_path, content in sources.items():
42+
try:
43+
trees[file_path] = ast.parse(content, filename=str(file_path))
44+
except SyntaxError:
45+
# Silently skip files with syntax errors — they can't be analyzed
46+
# structurally, but other checks (regex-based) may still find issues.
47+
continue
48+
return trees
Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,27 @@
1-
"""VC3xx — Code quality checks."""
1+
"""VC3xx — Code quality checks.
2+
3+
VC301: Connector must define an author identity.
4+
VC302: Author must be referenced on STIX entities (created_by_ref).
5+
VC303: CONNECTOR_TYPE must be defined in application code, not read from env.
6+
VC304: Ensure TLP markings are checked (check_max_tlp).
7+
VC305: Connector must implement Base Settings from connectors-sdk.
8+
VC306: Connector log level should default to 'error'.
9+
VC307: Except blocks should use error/warning logging, not debug/info.
10+
VC308: Main entry point must use traceback for error handling.
11+
VC309: Connector must use only absolute imports, no relative imports.
12+
VC310: External references must not be added by default to non-Identity objects.
13+
VC311: Connector should use TLP markings on entities with appropriate level.
14+
VC312: send_stix2_bundle must use cleanup_inconsistent_bundle=True.
15+
VC313: STIX SDO/SRO objects must use pycti.XXX.generate_id() for deterministic IDs.
16+
VC314: External-import connectors must use schedule_process or schedule_iso.
17+
VC315: Connector must call initiate_work before processing.
18+
VC316: Connector must close work with to_processed after processing.
19+
VC317: initiate_work should only be called when data is available.
20+
VC318: Internal-enrichment connectors must use helper.listen().
21+
VC319: Enrichment connector must return original bundle when not in scope.
22+
VC320: Enrichment connector must enforce TLP access control.
23+
VC321: Enrichment connector must be playbook-compatible.
24+
VC322: Enrichment connector must read data['stix_objects'] (former bundle).
25+
VC323: Stream connectors must use helper.listen_stream().
26+
VC324: Relationship should not set both start_time and stop_time.
27+
"""
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""VC301 — Connector must define an author identity.
2+
3+
Uses AST to detect author identity definitions by looking for constructor
4+
calls (``Identity(...)``, ``OrganizationAuthor(...)``, ``stix2.Identity(...)``)
5+
and API calls (``helper.api.identity.create(...)``).
6+
7+
Import validation for bare ``Identity(...)`` calls is also AST-based:
8+
the call is only counted when ``Identity`` is imported from ``stix2``
9+
or ``pycti`` (not some unrelated class).
10+
"""
11+
12+
import ast
13+
from pathlib import Path
14+
15+
from connector_linter.models import (
16+
CheckFinding,
17+
ConnectorContext,
18+
Severity,
19+
no_python_sources_finding,
20+
)
21+
from connector_linter.registry import CheckRegistry
22+
23+
# ---------------------------------------------------------------------------
24+
# Author-definition patterns (AST-based)
25+
# ---------------------------------------------------------------------------
26+
27+
# Function/constructor names that unambiguously define an author
28+
_UNAMBIGUOUS_AUTHOR_CALLS = {"OrganizationAuthor"}
29+
30+
# Import modules that confirm bare Identity() comes from stix2/pycti
31+
_IDENTITY_MODULES = {"stix2", "pycti"}
32+
33+
34+
def _has_identity_import(trees: dict[Path, ast.Module]) -> bool:
35+
"""Check if any source file imports Identity from stix2 or pycti."""
36+
for tree in trees.values():
37+
for node in ast.walk(tree):
38+
if isinstance(node, ast.ImportFrom) and node.module:
39+
# from stix2[.xxx] import Identity / from pycti[.xxx] import Identity
40+
mod_root = node.module.split(".")[0]
41+
if mod_root in _IDENTITY_MODULES:
42+
for alias in node.names:
43+
if alias.name == "Identity":
44+
return True
45+
elif isinstance(node, ast.Import):
46+
# import stix2 (Identity accessed as stix2.Identity)
47+
for alias in node.names:
48+
if alias.name.split(".")[0] in _IDENTITY_MODULES:
49+
return True
50+
return False
51+
52+
53+
def _is_author_call(node: ast.Call, identity_imported: bool) -> bool:
54+
"""Determine if an ast.Call node represents an author definition.
55+
56+
Recognized patterns:
57+
1. OrganizationAuthor(...) — connectors-sdk
58+
2. stix2.Identity(...) — qualified stix2 constructor
59+
3. Identity(...) — bare, only if imported from stix2/pycti
60+
4. *.api.identity.create(...) — legacy pycti API call
61+
"""
62+
func = node.func
63+
64+
# Pattern 1 & 3: bare function call — OrganizationAuthor(...) or Identity(...)
65+
if isinstance(func, ast.Name):
66+
if func.id in _UNAMBIGUOUS_AUTHOR_CALLS:
67+
return True
68+
if func.id == "Identity" and identity_imported:
69+
return True
70+
71+
# Patterns 2 & 4: attribute-based calls
72+
if isinstance(func, ast.Attribute):
73+
# Pattern 2: stix2.Identity(...)
74+
if func.attr == "Identity" and isinstance(func.value, ast.Name):
75+
if func.value.id == "stix2":
76+
return True
77+
78+
# Pattern 4: *.api.identity.create(...)
79+
if (
80+
func.attr == "create"
81+
and isinstance(func.value, ast.Attribute)
82+
and func.value.attr == "identity"
83+
and isinstance(func.value.value, ast.Attribute)
84+
and func.value.value.attr == "api"
85+
):
86+
return True
87+
88+
return False
89+
90+
91+
def find_author_definitions(
92+
sources: dict[Path, str],
93+
trees: dict[Path, ast.Module],
94+
) -> list[tuple[Path, int, str]]:
95+
"""Find author definition locations using AST analysis.
96+
97+
Args:
98+
sources: Raw Python source content keyed by relative path (used for
99+
line-text extraction in findings).
100+
trees: Pre-parsed AST modules (e.g. ``ctx.python_trees``). Passing
101+
the cached property avoids redundant parsing across checks.
102+
103+
Returns:
104+
List of (file_path, line_number, matched_line_text).
105+
"""
106+
identity_imported = _has_identity_import(trees)
107+
108+
hits: list[tuple[Path, int, str]] = []
109+
for file_path, tree in trees.items():
110+
content_lines = sources[file_path].splitlines()
111+
for node in ast.walk(tree):
112+
if isinstance(node, ast.Call) and _is_author_call(node, identity_imported):
113+
line_text = (
114+
content_lines[node.lineno - 1].strip()
115+
if node.lineno <= len(content_lines)
116+
else ""
117+
)
118+
hits.append((file_path, node.lineno, line_text))
119+
120+
return hits
121+
122+
123+
@CheckRegistry.register(
124+
code="VC301",
125+
name="author-defined",
126+
description="Connector must define an author identity",
127+
severity=Severity.ERROR,
128+
)
129+
def check_author_defined(ctx: ConnectorContext) -> list[CheckFinding]:
130+
"""Check that the connector defines an author identity somewhere in its source."""
131+
sources = ctx.python_sources
132+
133+
if not sources:
134+
return [no_python_sources_finding()]
135+
136+
hits = find_author_definitions(sources, ctx.python_trees)
137+
138+
if hits:
139+
file_path, line, _ = hits[0]
140+
return [
141+
CheckFinding(
142+
message="Author identity defined",
143+
severity=Severity.INFO,
144+
file_path=file_path,
145+
line=line,
146+
),
147+
]
148+
149+
return [
150+
CheckFinding(
151+
message="No author identity definition found in connector source",
152+
severity=Severity.ERROR,
153+
suggestion=(
154+
"Define an author using one of: "
155+
"stix2.Identity(name=..., identity_class='organization'), "
156+
"OrganizationAuthor(name=...), "
157+
"or self.helper.api.identity.create(type='Organization', name=...)"
158+
),
159+
),
160+
]
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""VC302 — Author must be referenced on STIX entities (created_by_ref).
2+
3+
Uses AST to detect ``author=`` as a keyword argument in function calls
4+
(avoids false positives from variable assignments like ``author = "John"``).
5+
Also detects ``created_by_ref=`` and ``x_opencti_created_by_ref`` via AST.
6+
"""
7+
8+
import ast
9+
from pathlib import Path
10+
11+
from connector_linter.checks.vc3xx_code.vc301_author_defined import (
12+
find_author_definitions,
13+
)
14+
from connector_linter.models import (
15+
CheckFinding,
16+
ConnectorContext,
17+
Severity,
18+
no_python_sources_finding,
19+
)
20+
from connector_linter.registry import CheckRegistry
21+
22+
# ---------------------------------------------------------------------------
23+
# Keyword argument names that attach an author identity to STIX entities:
24+
#
25+
# created_by_ref — standard STIX 2.1 field (SDOs)
26+
# author — connectors-sdk model parameter
27+
# x_opencti_created_by_ref — OpenCTI custom property (used on observables/SCOs)
28+
# ---------------------------------------------------------------------------
29+
_AUTHOR_KWARGS = {"created_by_ref", "author", "x_opencti_created_by_ref"}
30+
31+
32+
def _find_author_references(
33+
trees: dict[Path, ast.Module],
34+
) -> list[tuple[Path, int, str]]:
35+
"""Find keyword arguments that reference an author on STIX entities.
36+
37+
Detects ``created_by_ref=``, ``author=``, and ``x_opencti_created_by_ref``
38+
as keyword arguments in function/constructor calls — not plain assignments.
39+
"""
40+
hits: list[tuple[Path, int, str]] = []
41+
for file_path, tree in trees.items():
42+
for node in ast.walk(tree):
43+
# Only check Call nodes — we want keyword arguments in function/
44+
# constructor calls, not standalone assignments like `author = "John"`
45+
if not isinstance(node, ast.Call):
46+
continue
47+
for kw in node.keywords:
48+
if kw.arg in _AUTHOR_KWARGS:
49+
# Use the keyword node's own lineno when available;
50+
# fall back to the call node's lineno otherwise
51+
hits.append(
52+
(
53+
file_path,
54+
getattr(kw, "lineno", node.lineno) or node.lineno,
55+
kw.arg,
56+
),
57+
)
58+
return hits
59+
60+
61+
@CheckRegistry.register(
62+
code="VC302",
63+
name="author-referenced-on-entities",
64+
description="Author must be referenced on STIX entities (created_by_ref)",
65+
severity=Severity.ERROR,
66+
)
67+
def check_author_referenced(ctx: ConnectorContext) -> list[CheckFinding]:
68+
"""Check that created_by_ref or author= is used to attach author to entities."""
69+
sources = ctx.python_sources
70+
71+
if not sources:
72+
return [no_python_sources_finding()]
73+
74+
# Dependency: an author must be defined first (VC301) before it can be referenced.
75+
# If no author definition exists, we fail with a clear message pointing to VC301.
76+
author_hits = find_author_definitions(sources, ctx.python_trees)
77+
if not author_hits:
78+
return [
79+
CheckFinding(
80+
message="No author defined — cannot reference author on entities",
81+
severity=Severity.ERROR,
82+
suggestion="Define an author first (see VC301), then use created_by_ref= on STIX objects",
83+
),
84+
]
85+
86+
# Author exists — now check if it's actually referenced on STIX entities (AST-based)
87+
trees = ctx.python_trees
88+
ref_hits = _find_author_references(trees)
89+
90+
if ref_hits:
91+
file_path, line, _kwarg = ref_hits[0]
92+
return [
93+
CheckFinding(
94+
message=f"Author referenced on entities ({len(ref_hits)} occurrence(s) found)",
95+
severity=Severity.INFO,
96+
file_path=file_path,
97+
line=line,
98+
),
99+
]
100+
101+
return [
102+
CheckFinding(
103+
message="Author is defined but never referenced on STIX entities",
104+
severity=Severity.ERROR,
105+
suggestion=(
106+
"Use created_by_ref=self.author.id on SDOs, "
107+
"or x_opencti_created_by_ref in custom_properties for observables, "
108+
"or author= parameter when using connectors-sdk models"
109+
),
110+
),
111+
]

0 commit comments

Comments
 (0)