|
| 1 | +import json |
| 2 | +import os |
| 3 | +import re |
| 4 | +from dataclasses import asdict, dataclass |
| 5 | + |
| 6 | +import yaml |
| 7 | +from jinja2 import Environment, TemplateSyntaxError, meta |
| 8 | + |
| 9 | +from structkit.commands import Command |
| 10 | +from structkit.commands.validate import ValidateCommand |
| 11 | + |
| 12 | + |
| 13 | +class _NoopLogger: |
| 14 | + def info(self, *_args, **_kwargs): |
| 15 | + pass |
| 16 | + |
| 17 | + |
| 18 | +@dataclass |
| 19 | +class LintIssue: |
| 20 | + severity: str |
| 21 | + rule: str |
| 22 | + message: str |
| 23 | + path: str |
| 24 | + context: str = "" |
| 25 | + |
| 26 | + |
| 27 | +class LintCommand(Command): |
| 28 | + """Lint StructKit YAML files for quality issues beyond schema validity.""" |
| 29 | + |
| 30 | + STABLE_GIT_REF_RE = re.compile(r"^[0-9a-f]{40}$|^v?\d+\.\d+(?:\.\d+)?(?:[-+][0-9A-Za-z.-]+)?$") |
| 31 | + SUSPICIOUS_HOOK_PATTERNS = [ |
| 32 | + re.compile(r"\bcurl\b.*\|\s*(?:ba)?sh\b"), |
| 33 | + re.compile(r"\bwget\b.*\|\s*(?:ba)?sh\b"), |
| 34 | + re.compile(r"\beval\b"), |
| 35 | + re.compile(r"\bchmod\s+777\b"), |
| 36 | + re.compile(r"\bsudo\b"), |
| 37 | + ] |
| 38 | + UNSAFE_HOOK_PATTERNS = [ |
| 39 | + re.compile(r"\brm\s+-rf\s+/(?:\s|$)"), |
| 40 | + re.compile(r"\brm\s+-rf\s+\$\{?\w+\}?"), |
| 41 | + re.compile(r":\(\)\s*\{\s*:\|:"), |
| 42 | + ] |
| 43 | + REMOTE_URL_RE = re.compile(r"https?://[^\s'\"]+") |
| 44 | + NAME_RE = re.compile(r"^[A-Za-z0-9._@{}%/+=, -]+$") |
| 45 | + |
| 46 | + def __init__(self, parser): |
| 47 | + super().__init__(parser) |
| 48 | + parser.description = "Lint StructKit YAML definitions for quality issues" |
| 49 | + target = parser.add_argument( |
| 50 | + 'targets', |
| 51 | + nargs='*', |
| 52 | + help='YAML file paths, file:// URLs, or bundled/custom structure names to lint', |
| 53 | + ) |
| 54 | + from structkit.completers import structures_completer |
| 55 | + target.completer = structures_completer |
| 56 | + parser.add_argument( |
| 57 | + '--all', |
| 58 | + action='store_true', |
| 59 | + help='Lint all bundled contrib structures plus custom structures when --structures-path is set', |
| 60 | + ) |
| 61 | + parser.add_argument( |
| 62 | + '-s', '--structures-path', |
| 63 | + type=str, |
| 64 | + help='Path to custom structure definitions (env: STRUCTKIT_STRUCTURES_PATH)', |
| 65 | + default=os.getenv('STRUCTKIT_STRUCTURES_PATH', None), |
| 66 | + ) |
| 67 | + parser.add_argument('--json', action='store_true', help='Print machine-readable JSON output') |
| 68 | + parser.set_defaults(func=self.execute) |
| 69 | + |
| 70 | + self.template_env = Environment( |
| 71 | + trim_blocks=True, |
| 72 | + block_start_string='{%@', |
| 73 | + block_end_string='@%}', |
| 74 | + variable_start_string='{{@', |
| 75 | + variable_end_string='@}}', |
| 76 | + comment_start_string='{#@', |
| 77 | + comment_end_string='@#}', |
| 78 | + ) |
| 79 | + self.template_env.globals.update({ |
| 80 | + 'current_repo': lambda: None, |
| 81 | + 'uuid': lambda: None, |
| 82 | + 'now': lambda: None, |
| 83 | + 'env': lambda *_args, **_kwargs: None, |
| 84 | + 'read_file': lambda *_args, **_kwargs: None, |
| 85 | + }) |
| 86 | + self.template_env.filters.update({ |
| 87 | + 'latest_release': lambda value: value, |
| 88 | + 'slugify': lambda value: value, |
| 89 | + 'default_branch': lambda value: value, |
| 90 | + 'to_yaml': lambda value: value, |
| 91 | + 'from_yaml': lambda value: value, |
| 92 | + 'to_json': lambda value: value, |
| 93 | + 'from_json': lambda value: value, |
| 94 | + }) |
| 95 | + |
| 96 | + def execute(self, args): |
| 97 | + targets = self._resolve_targets(args) |
| 98 | + issues = [] |
| 99 | + |
| 100 | + if not targets: |
| 101 | + issues.append(LintIssue('error', 'missing-target', 'Provide at least one target or use --all.', '<args>')) |
| 102 | + for target in targets: |
| 103 | + issues.extend(self.lint_file(target)) |
| 104 | + |
| 105 | + if args.json: |
| 106 | + self._print_json(issues) |
| 107 | + else: |
| 108 | + self._print_text(issues) |
| 109 | + |
| 110 | + if any(issue.severity == 'error' for issue in issues): |
| 111 | + raise SystemExit(1) |
| 112 | + |
| 113 | + def _contribs_path(self): |
| 114 | + this_file = os.path.dirname(os.path.realpath(__file__)) |
| 115 | + return os.path.join(this_file, '..', 'contribs') |
| 116 | + |
| 117 | + def _resolve_targets(self, args): |
| 118 | + if args.all: |
| 119 | + roots = [self._contribs_path()] |
| 120 | + if args.structures_path: |
| 121 | + roots.insert(0, args.structures_path) |
| 122 | + return self._find_yaml_files(roots) |
| 123 | + |
| 124 | + targets = [] |
| 125 | + for target in args.targets: |
| 126 | + targets.append(self._resolve_target(target, args.structures_path)) |
| 127 | + return targets |
| 128 | + |
| 129 | + def _find_yaml_files(self, roots): |
| 130 | + files = [] |
| 131 | + seen = set() |
| 132 | + for root in roots: |
| 133 | + if not root or not os.path.exists(root): |
| 134 | + continue |
| 135 | + for dirpath, _, filenames in os.walk(root): |
| 136 | + for filename in filenames: |
| 137 | + if filename.endswith(('.yaml', '.yml')): |
| 138 | + path = os.path.join(dirpath, filename) |
| 139 | + if path not in seen: |
| 140 | + files.append(path) |
| 141 | + seen.add(path) |
| 142 | + return sorted(files) |
| 143 | + |
| 144 | + def _resolve_target(self, target, structures_path=None): |
| 145 | + if target.startswith('file://'): |
| 146 | + return target[7:] |
| 147 | + if target.endswith(('.yaml', '.yml')) or os.path.exists(target): |
| 148 | + return target |
| 149 | + |
| 150 | + candidates = [] |
| 151 | + if structures_path: |
| 152 | + candidates.append(os.path.join(structures_path, f'{target}.yaml')) |
| 153 | + candidates.append(os.path.join(structures_path, f'{target}.yml')) |
| 154 | + candidates.append(os.path.join(self._contribs_path(), f'{target}.yaml')) |
| 155 | + candidates.append(os.path.join(self._contribs_path(), f'{target}.yml')) |
| 156 | + for candidate in candidates: |
| 157 | + if os.path.exists(candidate): |
| 158 | + return candidate |
| 159 | + return target |
| 160 | + |
| 161 | + def lint_file(self, path): |
| 162 | + issues = [] |
| 163 | + if not os.path.exists(path): |
| 164 | + return [LintIssue('error', 'not-found', f'Could not find structure target: {path}', path)] |
| 165 | + |
| 166 | + try: |
| 167 | + with open(path, 'r') as f: |
| 168 | + config = yaml.safe_load(f) or {} |
| 169 | + except yaml.YAMLError as exc: |
| 170 | + return [LintIssue('error', 'invalid-yaml', f'YAML could not be parsed: {exc}', path)] |
| 171 | + except OSError as exc: |
| 172 | + return [LintIssue('error', 'read-error', f'File could not be read: {exc}', path)] |
| 173 | + |
| 174 | + if not isinstance(config, dict): |
| 175 | + return [LintIssue('error', 'invalid-root', 'Top-level YAML document must be a mapping.', path)] |
| 176 | + |
| 177 | + issues.extend(self._validate_baseline(config, path)) |
| 178 | + issues.extend(self._check_description(config, path)) |
| 179 | + issues.extend(self._check_duplicates(config, path)) |
| 180 | + issues.extend(self._check_templates(config, path)) |
| 181 | + issues.extend(self._check_hooks(config, path)) |
| 182 | + issues.extend(self._check_remote_urls(config, path)) |
| 183 | + issues.extend(self._check_names(config, path)) |
| 184 | + return issues |
| 185 | + |
| 186 | + def _validate_baseline(self, config, path): |
| 187 | + validator = ValidateCommand.__new__(ValidateCommand) |
| 188 | + validator.logger = _NoopLogger() |
| 189 | + try: |
| 190 | + validator._validate_structure_config(config.get('structure') or config.get('files', [])) |
| 191 | + validator._validate_folders_config(config.get('folders', [])) |
| 192 | + validator._validate_variables_config(config.get('variables', [])) |
| 193 | + except ValueError as exc: |
| 194 | + return [LintIssue('error', 'validate', str(exc), path)] |
| 195 | + return [] |
| 196 | + |
| 197 | + def _check_description(self, config, path): |
| 198 | + description = config.get('description') |
| 199 | + if not isinstance(description, str) or not description.strip(): |
| 200 | + return [LintIssue('warning', 'missing-description', 'Missing top-level description.', path)] |
| 201 | + return [] |
| 202 | + |
| 203 | + def _check_duplicates(self, config, path): |
| 204 | + issues = [] |
| 205 | + for section, rule in (('files', 'duplicate-file'), ('structure', 'duplicate-file'), ('folders', 'duplicate-folder')): |
| 206 | + seen = {} |
| 207 | + for index, item in enumerate(config.get(section, []) or []): |
| 208 | + if not isinstance(item, dict): |
| 209 | + continue |
| 210 | + for name in item: |
| 211 | + if name in seen: |
| 212 | + issues.append(LintIssue('error', rule, f"Duplicate {section[:-1]} entry '{name}'.", path, f'{section}[{index}]')) |
| 213 | + else: |
| 214 | + seen[name] = index |
| 215 | + return issues |
| 216 | + |
| 217 | + def _check_templates(self, config, path): |
| 218 | + issues = [] |
| 219 | + declared = self._declared_variables(config) |
| 220 | + referenced = set() |
| 221 | + for context, value in self._walk_strings(config, skip_keys={'variables'}): |
| 222 | + try: |
| 223 | + parsed = self.template_env.parse(value) |
| 224 | + except TemplateSyntaxError as exc: |
| 225 | + issues.append(LintIssue('error', 'template-syntax', f'Template syntax error: {exc.message}', path, context)) |
| 226 | + continue |
| 227 | + referenced.update(meta.find_undeclared_variables(parsed)) |
| 228 | + |
| 229 | + referenced -= {'mappings'} |
| 230 | + for name in sorted(referenced - declared): |
| 231 | + issues.append(LintIssue('error', 'undeclared-variable', f"Variable '{name}' is referenced but not declared.", path)) |
| 232 | + for name in sorted(declared - referenced): |
| 233 | + issues.append(LintIssue('warning', 'unused-variable', f"Variable '{name}' is declared but never referenced.", path)) |
| 234 | + return issues |
| 235 | + |
| 236 | + def _declared_variables(self, config): |
| 237 | + declared = set() |
| 238 | + for item in config.get('variables', []) or []: |
| 239 | + if isinstance(item, dict): |
| 240 | + declared.update(str(name) for name in item.keys()) |
| 241 | + return declared |
| 242 | + |
| 243 | + def _walk_strings(self, value, context='', skip_keys=None): |
| 244 | + skip_keys = skip_keys or set() |
| 245 | + if isinstance(value, str): |
| 246 | + yield context, value |
| 247 | + elif isinstance(value, list): |
| 248 | + for index, item in enumerate(value): |
| 249 | + yield from self._walk_strings(item, f'{context}[{index}]', skip_keys) |
| 250 | + elif isinstance(value, dict): |
| 251 | + for key, item in value.items(): |
| 252 | + key_context = f'{context}.{key}' if context else str(key) |
| 253 | + if key in skip_keys: |
| 254 | + continue |
| 255 | + if isinstance(key, str): |
| 256 | + yield key_context, key |
| 257 | + yield from self._walk_strings(item, key_context, skip_keys) |
| 258 | + |
| 259 | + def _check_hooks(self, config, path): |
| 260 | + issues = [] |
| 261 | + for hook_key in ('pre_hooks', 'post_hooks'): |
| 262 | + for index, hook in enumerate(config.get(hook_key, []) or []): |
| 263 | + if not isinstance(hook, str): |
| 264 | + continue |
| 265 | + context = f'{hook_key}[{index}]' |
| 266 | + if any(pattern.search(hook) for pattern in self.UNSAFE_HOOK_PATTERNS): |
| 267 | + issues.append(LintIssue('error', 'unsafe-hook', 'Hook contains an unsafe destructive command.', path, context)) |
| 268 | + elif any(pattern.search(hook) for pattern in self.SUSPICIOUS_HOOK_PATTERNS): |
| 269 | + issues.append(LintIssue('warning', 'suspicious-hook', 'Hook contains a suspicious shell pattern; review before use.', path, context)) |
| 270 | + return issues |
| 271 | + |
| 272 | + def _check_remote_urls(self, config, path): |
| 273 | + issues = [] |
| 274 | + for context, value in self._walk_strings(config, skip_keys={'variables'}): |
| 275 | + for url in self.REMOTE_URL_RE.findall(value): |
| 276 | + if self._is_unpinned_url(url): |
| 277 | + issues.append(LintIssue('warning', 'unpinned-remote-url', 'Remote URL does not appear pinned to a stable ref.', path, context)) |
| 278 | + return issues |
| 279 | + |
| 280 | + def _is_unpinned_url(self, url): |
| 281 | + if 'github.com' not in url and 'raw.githubusercontent.com' not in url: |
| 282 | + return False |
| 283 | + if '/releases/download/' in url: |
| 284 | + return False |
| 285 | + raw_match = re.search(r'raw\.githubusercontent\.com/[^/]+/[^/]+/([^/]+)/', url) |
| 286 | + if raw_match: |
| 287 | + return not bool(self.STABLE_GIT_REF_RE.match(raw_match.group(1))) |
| 288 | + ref_match = re.search(r'[?&]ref=([^&]+)', url) |
| 289 | + if ref_match: |
| 290 | + return not bool(self.STABLE_GIT_REF_RE.match(ref_match.group(1))) |
| 291 | + return any(branch in url for branch in ('/main/', '/master/', '/HEAD/', '/develop/')) or not re.search(r'/[0-9a-f]{40}/|/v?\d+\.\d+', url) |
| 292 | + |
| 293 | + def _check_names(self, config, path): |
| 294 | + issues = [] |
| 295 | + for section in ('files', 'structure', 'folders'): |
| 296 | + for index, item in enumerate(config.get(section, []) or []): |
| 297 | + if not isinstance(item, dict): |
| 298 | + continue |
| 299 | + for name in item: |
| 300 | + if name.startswith('/') or '..' in name.split('/'): |
| 301 | + issues.append(LintIssue('error', 'invalid-name', f"Entry name '{name}' must be relative and stay within the target directory.", path, f'{section}[{index}]')) |
| 302 | + elif '\\' in name or not self.NAME_RE.match(str(name)): |
| 303 | + issues.append(LintIssue('warning', 'naming-convention', f"Entry name '{name}' uses unusual characters.", path, f'{section}[{index}]')) |
| 304 | + return issues |
| 305 | + |
| 306 | + def _print_json(self, issues): |
| 307 | + payload = { |
| 308 | + 'summary': { |
| 309 | + 'errors': sum(1 for issue in issues if issue.severity == 'error'), |
| 310 | + 'warnings': sum(1 for issue in issues if issue.severity == 'warning'), |
| 311 | + }, |
| 312 | + 'issues': [asdict(issue) for issue in issues], |
| 313 | + } |
| 314 | + print(json.dumps(payload, indent=2, sort_keys=True)) |
| 315 | + |
| 316 | + def _print_text(self, issues): |
| 317 | + if not issues: |
| 318 | + print('✅ No lint issues found.') |
| 319 | + return |
| 320 | + |
| 321 | + for issue in issues: |
| 322 | + label = 'ERROR' if issue.severity == 'error' else 'WARN' |
| 323 | + context = f' [{issue.context}]' if issue.context else '' |
| 324 | + print(f'{label} {issue.path}{context}: {issue.message} ({issue.rule})') |
| 325 | + errors = sum(1 for issue in issues if issue.severity == 'error') |
| 326 | + warnings = sum(1 for issue in issues if issue.severity == 'warning') |
| 327 | + print(f'\nLint summary: {errors} error(s), {warnings} warning(s).') |
0 commit comments