Skip to content

Commit 71ee150

Browse files
committed
Add lint command for structure definitions
1 parent 022b545 commit 71ee150

4 files changed

Lines changed: 504 additions & 0 deletions

File tree

docs/cli-reference.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,43 @@ structkit validate [-h] [-l LOG] [-c CONFIG_FILE] [-i LOG_FILE] yaml_file
8080

8181
- `yaml_file`: Path to the YAML configuration file.
8282

83+
### `lint`
84+
85+
Lint one or more StructKit YAML definitions for quality checks that are stricter than syntactic validation. The command reports warnings and errors with file paths and context. It exits with status code `1` when one or more lint errors are found, and exits `0` when only warnings or no issues are found.
86+
87+
**Usage:**
88+
89+
```sh
90+
structkit lint [-h] [-l LOG] [-c CONFIG_FILE] [-i LOG_FILE] [-s STRUCTURES_PATH] [--all] [--json] [targets ...]
91+
```
92+
93+
**Arguments:**
94+
95+
- `targets`: YAML file paths, `file://` YAML URLs, or bundled/custom structure names. Multiple targets are supported.
96+
- `--all`: Lint all bundled contrib structures, plus custom structures when `--structures-path` is supplied.
97+
- `-s STRUCTURES_PATH, --structures-path STRUCTURES_PATH`: Path to custom structure definitions.
98+
- `--json`: Print machine-readable JSON with a summary and issue list.
99+
100+
**Lint rules:**
101+
102+
- Missing top-level `description` (warning).
103+
- Variables referenced in StructKit templates but not declared (error).
104+
- Declared variables that are never referenced (warning).
105+
- Duplicate file or folder entries (error).
106+
- Unsafe hooks, such as destructive shell patterns (error), and suspicious hooks, such as `curl | bash`, `sudo`, `eval`, or `chmod 777` (warning).
107+
- GitHub remote URLs that do not appear pinned to a stable tag, release, or commit SHA (warning).
108+
- Invalid entry names that are absolute paths or escape with `..` (error), and unusual name characters (warning).
109+
110+
**Examples:**
111+
112+
```sh
113+
structkit lint .struct.yaml
114+
structkit lint structkit/contribs/project/python.yaml
115+
structkit lint project/python
116+
structkit lint --all
117+
structkit lint .struct.yaml --json
118+
```
119+
83120
### `generate`
84121

85122
Generate the project structure.

structkit/commands/lint.py

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
import json
2+
import os
3+
import re
4+
from dataclasses import asdict, dataclass
5+
6+
import yaml
7+
from jinja2 import Environment, TemplateSyntaxError, meta
8+
9+
from structkit.commands import Command
10+
from structkit.commands.validate import ValidateCommand
11+
12+
13+
class _NoopLogger:
14+
def info(self, *_args, **_kwargs):
15+
pass
16+
17+
18+
@dataclass
19+
class LintIssue:
20+
severity: str
21+
rule: str
22+
message: str
23+
path: str
24+
context: str = ""
25+
26+
27+
class LintCommand(Command):
28+
"""Lint StructKit YAML files for quality issues beyond schema validity."""
29+
30+
STABLE_GIT_REF_RE = re.compile(r"^[0-9a-f]{40}$|^v?\d+\.\d+(?:\.\d+)?(?:[-+][0-9A-Za-z.-]+)?$")
31+
SUSPICIOUS_HOOK_PATTERNS = [
32+
re.compile(r"\bcurl\b.*\|\s*(?:ba)?sh\b"),
33+
re.compile(r"\bwget\b.*\|\s*(?:ba)?sh\b"),
34+
re.compile(r"\beval\b"),
35+
re.compile(r"\bchmod\s+777\b"),
36+
re.compile(r"\bsudo\b"),
37+
]
38+
UNSAFE_HOOK_PATTERNS = [
39+
re.compile(r"\brm\s+-rf\s+/(?:\s|$)"),
40+
re.compile(r"\brm\s+-rf\s+\$\{?\w+\}?"),
41+
re.compile(r":\(\)\s*\{\s*:\|:"),
42+
]
43+
REMOTE_URL_RE = re.compile(r"https?://[^\s'\"]+")
44+
NAME_RE = re.compile(r"^[A-Za-z0-9._@{}%/+=, -]+$")
45+
46+
def __init__(self, parser):
47+
super().__init__(parser)
48+
parser.description = "Lint StructKit YAML definitions for quality issues"
49+
target = parser.add_argument(
50+
'targets',
51+
nargs='*',
52+
help='YAML file paths, file:// URLs, or bundled/custom structure names to lint',
53+
)
54+
from structkit.completers import structures_completer
55+
target.completer = structures_completer
56+
parser.add_argument(
57+
'--all',
58+
action='store_true',
59+
help='Lint all bundled contrib structures plus custom structures when --structures-path is set',
60+
)
61+
parser.add_argument(
62+
'-s', '--structures-path',
63+
type=str,
64+
help='Path to custom structure definitions (env: STRUCTKIT_STRUCTURES_PATH)',
65+
default=os.getenv('STRUCTKIT_STRUCTURES_PATH', None),
66+
)
67+
parser.add_argument('--json', action='store_true', help='Print machine-readable JSON output')
68+
parser.set_defaults(func=self.execute)
69+
70+
self.template_env = Environment(
71+
trim_blocks=True,
72+
block_start_string='{%@',
73+
block_end_string='@%}',
74+
variable_start_string='{{@',
75+
variable_end_string='@}}',
76+
comment_start_string='{#@',
77+
comment_end_string='@#}',
78+
)
79+
self.template_env.globals.update({
80+
'current_repo': lambda: None,
81+
'uuid': lambda: None,
82+
'now': lambda: None,
83+
'env': lambda *_args, **_kwargs: None,
84+
'read_file': lambda *_args, **_kwargs: None,
85+
})
86+
self.template_env.filters.update({
87+
'latest_release': lambda value: value,
88+
'slugify': lambda value: value,
89+
'default_branch': lambda value: value,
90+
'to_yaml': lambda value: value,
91+
'from_yaml': lambda value: value,
92+
'to_json': lambda value: value,
93+
'from_json': lambda value: value,
94+
})
95+
96+
def execute(self, args):
97+
targets = self._resolve_targets(args)
98+
issues = []
99+
100+
if not targets:
101+
issues.append(LintIssue('error', 'missing-target', 'Provide at least one target or use --all.', '<args>'))
102+
for target in targets:
103+
issues.extend(self.lint_file(target))
104+
105+
if args.json:
106+
self._print_json(issues)
107+
else:
108+
self._print_text(issues)
109+
110+
if any(issue.severity == 'error' for issue in issues):
111+
raise SystemExit(1)
112+
113+
def _contribs_path(self):
114+
this_file = os.path.dirname(os.path.realpath(__file__))
115+
return os.path.join(this_file, '..', 'contribs')
116+
117+
def _resolve_targets(self, args):
118+
if args.all:
119+
roots = [self._contribs_path()]
120+
if args.structures_path:
121+
roots.insert(0, args.structures_path)
122+
return self._find_yaml_files(roots)
123+
124+
targets = []
125+
for target in args.targets:
126+
targets.append(self._resolve_target(target, args.structures_path))
127+
return targets
128+
129+
def _find_yaml_files(self, roots):
130+
files = []
131+
seen = set()
132+
for root in roots:
133+
if not root or not os.path.exists(root):
134+
continue
135+
for dirpath, _, filenames in os.walk(root):
136+
for filename in filenames:
137+
if filename.endswith(('.yaml', '.yml')):
138+
path = os.path.join(dirpath, filename)
139+
if path not in seen:
140+
files.append(path)
141+
seen.add(path)
142+
return sorted(files)
143+
144+
def _resolve_target(self, target, structures_path=None):
145+
if target.startswith('file://'):
146+
return target[7:]
147+
if target.endswith(('.yaml', '.yml')) or os.path.exists(target):
148+
return target
149+
150+
candidates = []
151+
if structures_path:
152+
candidates.append(os.path.join(structures_path, f'{target}.yaml'))
153+
candidates.append(os.path.join(structures_path, f'{target}.yml'))
154+
candidates.append(os.path.join(self._contribs_path(), f'{target}.yaml'))
155+
candidates.append(os.path.join(self._contribs_path(), f'{target}.yml'))
156+
for candidate in candidates:
157+
if os.path.exists(candidate):
158+
return candidate
159+
return target
160+
161+
def lint_file(self, path):
162+
issues = []
163+
if not os.path.exists(path):
164+
return [LintIssue('error', 'not-found', f'Could not find structure target: {path}', path)]
165+
166+
try:
167+
with open(path, 'r') as f:
168+
config = yaml.safe_load(f) or {}
169+
except yaml.YAMLError as exc:
170+
return [LintIssue('error', 'invalid-yaml', f'YAML could not be parsed: {exc}', path)]
171+
except OSError as exc:
172+
return [LintIssue('error', 'read-error', f'File could not be read: {exc}', path)]
173+
174+
if not isinstance(config, dict):
175+
return [LintIssue('error', 'invalid-root', 'Top-level YAML document must be a mapping.', path)]
176+
177+
issues.extend(self._validate_baseline(config, path))
178+
issues.extend(self._check_description(config, path))
179+
issues.extend(self._check_duplicates(config, path))
180+
issues.extend(self._check_templates(config, path))
181+
issues.extend(self._check_hooks(config, path))
182+
issues.extend(self._check_remote_urls(config, path))
183+
issues.extend(self._check_names(config, path))
184+
return issues
185+
186+
def _validate_baseline(self, config, path):
187+
validator = ValidateCommand.__new__(ValidateCommand)
188+
validator.logger = _NoopLogger()
189+
try:
190+
validator._validate_structure_config(config.get('structure') or config.get('files', []))
191+
validator._validate_folders_config(config.get('folders', []))
192+
validator._validate_variables_config(config.get('variables', []))
193+
except ValueError as exc:
194+
return [LintIssue('error', 'validate', str(exc), path)]
195+
return []
196+
197+
def _check_description(self, config, path):
198+
description = config.get('description')
199+
if not isinstance(description, str) or not description.strip():
200+
return [LintIssue('warning', 'missing-description', 'Missing top-level description.', path)]
201+
return []
202+
203+
def _check_duplicates(self, config, path):
204+
issues = []
205+
for section, rule in (('files', 'duplicate-file'), ('structure', 'duplicate-file'), ('folders', 'duplicate-folder')):
206+
seen = {}
207+
for index, item in enumerate(config.get(section, []) or []):
208+
if not isinstance(item, dict):
209+
continue
210+
for name in item:
211+
if name in seen:
212+
issues.append(LintIssue('error', rule, f"Duplicate {section[:-1]} entry '{name}'.", path, f'{section}[{index}]'))
213+
else:
214+
seen[name] = index
215+
return issues
216+
217+
def _check_templates(self, config, path):
218+
issues = []
219+
declared = self._declared_variables(config)
220+
referenced = set()
221+
for context, value in self._walk_strings(config, skip_keys={'variables'}):
222+
try:
223+
parsed = self.template_env.parse(value)
224+
except TemplateSyntaxError as exc:
225+
issues.append(LintIssue('error', 'template-syntax', f'Template syntax error: {exc.message}', path, context))
226+
continue
227+
referenced.update(meta.find_undeclared_variables(parsed))
228+
229+
referenced -= {'mappings'}
230+
for name in sorted(referenced - declared):
231+
issues.append(LintIssue('error', 'undeclared-variable', f"Variable '{name}' is referenced but not declared.", path))
232+
for name in sorted(declared - referenced):
233+
issues.append(LintIssue('warning', 'unused-variable', f"Variable '{name}' is declared but never referenced.", path))
234+
return issues
235+
236+
def _declared_variables(self, config):
237+
declared = set()
238+
for item in config.get('variables', []) or []:
239+
if isinstance(item, dict):
240+
declared.update(str(name) for name in item.keys())
241+
return declared
242+
243+
def _walk_strings(self, value, context='', skip_keys=None):
244+
skip_keys = skip_keys or set()
245+
if isinstance(value, str):
246+
yield context, value
247+
elif isinstance(value, list):
248+
for index, item in enumerate(value):
249+
yield from self._walk_strings(item, f'{context}[{index}]', skip_keys)
250+
elif isinstance(value, dict):
251+
for key, item in value.items():
252+
key_context = f'{context}.{key}' if context else str(key)
253+
if key in skip_keys:
254+
continue
255+
if isinstance(key, str):
256+
yield key_context, key
257+
yield from self._walk_strings(item, key_context, skip_keys)
258+
259+
def _check_hooks(self, config, path):
260+
issues = []
261+
for hook_key in ('pre_hooks', 'post_hooks'):
262+
for index, hook in enumerate(config.get(hook_key, []) or []):
263+
if not isinstance(hook, str):
264+
continue
265+
context = f'{hook_key}[{index}]'
266+
if any(pattern.search(hook) for pattern in self.UNSAFE_HOOK_PATTERNS):
267+
issues.append(LintIssue('error', 'unsafe-hook', 'Hook contains an unsafe destructive command.', path, context))
268+
elif any(pattern.search(hook) for pattern in self.SUSPICIOUS_HOOK_PATTERNS):
269+
issues.append(LintIssue('warning', 'suspicious-hook', 'Hook contains a suspicious shell pattern; review before use.', path, context))
270+
return issues
271+
272+
def _check_remote_urls(self, config, path):
273+
issues = []
274+
for context, value in self._walk_strings(config, skip_keys={'variables'}):
275+
for url in self.REMOTE_URL_RE.findall(value):
276+
if self._is_unpinned_url(url):
277+
issues.append(LintIssue('warning', 'unpinned-remote-url', 'Remote URL does not appear pinned to a stable ref.', path, context))
278+
return issues
279+
280+
def _is_unpinned_url(self, url):
281+
if 'github.com' not in url and 'raw.githubusercontent.com' not in url:
282+
return False
283+
if '/releases/download/' in url:
284+
return False
285+
raw_match = re.search(r'raw\.githubusercontent\.com/[^/]+/[^/]+/([^/]+)/', url)
286+
if raw_match:
287+
return not bool(self.STABLE_GIT_REF_RE.match(raw_match.group(1)))
288+
ref_match = re.search(r'[?&]ref=([^&]+)', url)
289+
if ref_match:
290+
return not bool(self.STABLE_GIT_REF_RE.match(ref_match.group(1)))
291+
return any(branch in url for branch in ('/main/', '/master/', '/HEAD/', '/develop/')) or not re.search(r'/[0-9a-f]{40}/|/v?\d+\.\d+', url)
292+
293+
def _check_names(self, config, path):
294+
issues = []
295+
for section in ('files', 'structure', 'folders'):
296+
for index, item in enumerate(config.get(section, []) or []):
297+
if not isinstance(item, dict):
298+
continue
299+
for name in item:
300+
if name.startswith('/') or '..' in name.split('/'):
301+
issues.append(LintIssue('error', 'invalid-name', f"Entry name '{name}' must be relative and stay within the target directory.", path, f'{section}[{index}]'))
302+
elif '\\' in name or not self.NAME_RE.match(str(name)):
303+
issues.append(LintIssue('warning', 'naming-convention', f"Entry name '{name}' uses unusual characters.", path, f'{section}[{index}]'))
304+
return issues
305+
306+
def _print_json(self, issues):
307+
payload = {
308+
'summary': {
309+
'errors': sum(1 for issue in issues if issue.severity == 'error'),
310+
'warnings': sum(1 for issue in issues if issue.severity == 'warning'),
311+
},
312+
'issues': [asdict(issue) for issue in issues],
313+
}
314+
print(json.dumps(payload, indent=2, sort_keys=True))
315+
316+
def _print_text(self, issues):
317+
if not issues:
318+
print('✅ No lint issues found.')
319+
return
320+
321+
for issue in issues:
322+
label = 'ERROR' if issue.severity == 'error' else 'WARN'
323+
context = f' [{issue.context}]' if issue.context else ''
324+
print(f'{label} {issue.path}{context}: {issue.message} ({issue.rule})')
325+
errors = sum(1 for issue in issues if issue.severity == 'error')
326+
warnings = sum(1 for issue in issues if issue.severity == 'warning')
327+
print(f'\nLint summary: {errors} error(s), {warnings} warning(s).')

structkit/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from structkit.commands.generate import GenerateCommand
77
from structkit.commands.info import InfoCommand
88
from structkit.commands.validate import ValidateCommand
9+
from structkit.commands.lint import LintCommand
910
from structkit.commands.list import ListCommand
1011
from structkit.commands.search import SearchCommand
1112
from structkit.commands.generate_schema import GenerateSchemaCommand
@@ -33,6 +34,7 @@ def get_parser():
3334

3435
InfoCommand(subparsers.add_parser('info', help='Show information about the package'))
3536
ValidateCommand(subparsers.add_parser('validate', help='Validate the YAML configuration file'))
37+
LintCommand(subparsers.add_parser('lint', help='Lint YAML structure definitions for quality issues'))
3638
GenerateCommand(subparsers.add_parser('generate', help='Generate the project structure'))
3739
ListCommand(subparsers.add_parser('list', help='List available structures'))
3840
SearchCommand(subparsers.add_parser('search', help='Search available structures by keyword'))

0 commit comments

Comments
 (0)