Skip to content

Commit a10c15c

Browse files
committed
Load and wire validator plugins; identify them in reports; improve test file handling
Loading and wiring: - validate.py: add load_validation_plugins(), thread plugin_validators into validate_test_resources() and validate_transform_output() - postprocess.py: instantiate plugin validators at startup, pass them through the validate/transform calls; delete legacy .transforms-sandbox on startup instead of renaming it (venvs have hardcoded absolute shebangs that break on rename); fix missing _rel import; add shutil import - validation/plugin.py: PluginValidator class — subprocess-based execution, per-class venv management, mime-type/extension matching Plugin identification in reports: - Every entry emitted by a plugin now carries payload['plugin'] = 'module.ClassName', making the source identifiable in all report formats - validation/__init__.py: write_text() groups PLUGIN section entries with '--- Plugin: ... ---' sub-headers on class transitions - report.html.mako: render a per-plugin heading in the Plugin section using a mutable state container to track transitions across loop iterations; fix subsection lookup (payload keys are top-level in the template dict, not nested under 'payload') Test file handling improvements: - validate.py: replace the hard-coded .json/.jsonld/.ttl extension filter for tests/ directory files with fn.is_file() — validators self-filter via _matches(); files no validator claims are silently dropped - Remove the same extension filter from tests.yaml extra resources - Add media-type field support for tests.yaml entries, allowing explicit MIME type declaration when the file extension is ambiguous or absent - tests.schema.yaml: document the new media-type property
1 parent ffe16b0 commit a10c15c

6 files changed

Lines changed: 411 additions & 14 deletions

File tree

ogc/bblocks/postprocess.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
from __future__ import annotations
22

3+
import copy
34
import itertools
45
import json
56
import logging
67
import os.path
78
import re
9+
import shutil
810
import subprocess
911
import sys
10-
from ogc.bblocks.sandbox import SANDBOX_DIR_NAME
12+
from ogc.bblocks.sandbox import SANDBOX_DIR_NAME, _OLD_SANDBOX_DIR_NAME
1113
from argparse import ArgumentParser
1214
import datetime
1315
from pathlib import Path
@@ -28,8 +30,8 @@
2830
PathOrUrl, get_git_repo_url, load_yaml
2931
from ogc.bblocks.schema import annotate_schema, resolve_all_schema_references, write_annotated_schema
3032
from ogc.bblocks.models import BuildingBlock, BuildingBlockRegister, ImportedBuildingBlocks, BuildingBlockError
31-
from ogc.bblocks.validate import validate_test_resources, write_report
32-
from ogc.bblocks.transform import apply_transforms, load_transform_plugins, transformers, cleanup_sandbox
33+
from ogc.bblocks.validate import validate_test_resources, write_report, load_validation_plugins
34+
from ogc.bblocks.transform import _rel, apply_transforms, load_transform_plugins, transformers, cleanup_sandbox
3335
from ogc.bblocks.permissions import check_permissions
3436

3537

@@ -58,7 +60,11 @@ def postprocess(registered_items_path: str | Path = 'registereditems',
5860

5961
cwd = Path().resolve()
6062

63+
old_sandbox = Path(_OLD_SANDBOX_DIR_NAME)
6164
sandbox_dir = Path(SANDBOX_DIR_NAME)
65+
if old_sandbox.is_dir():
66+
logger.info("Removing legacy sandbox %s", _OLD_SANDBOX_DIR_NAME)
67+
shutil.rmtree(old_sandbox, ignore_errors=True)
6268
sandbox_dir.mkdir(exist_ok=True)
6369
gitignore = sandbox_dir / '.gitignore'
6470
if not gitignore.exists():
@@ -67,13 +73,15 @@ def postprocess(registered_items_path: str | Path = 'registereditems',
6773
if skip_permissions:
6874
allowed_transform_types = None
6975
allowed_plugin_modules = None
76+
allowed_validator_modules = None
7077
else:
71-
allowed_transform_types, allowed_plugin_modules = check_permissions(
78+
allowed_transform_types, allowed_plugin_modules, allowed_validator_modules = check_permissions(
7279
sandbox_dir, registered_items_path if isinstance(registered_items_path, Path)
7380
else Path(registered_items_path),
7481
)
7582

7683
transform_plugins = load_transform_plugins(sandbox_dir, allowed_modules=allowed_plugin_modules)
84+
plugin_validators = load_validation_plugins(sandbox_dir, allowed_modules=allowed_validator_modules)
7785

7886
if not isinstance(test_outputs_path, Path):
7987
test_outputs_path = Path(test_outputs_path)
@@ -151,6 +159,27 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
151159
bblock.metadata.pop('dateOfLastChange', None)
152160

153161
output_file_root = Path(output_file).resolve().parent
162+
163+
# Snapshot metadata with local paths re-anchored to cwd before the
164+
# with_base_url block rewrites everything to published URLs.
165+
_snap = copy.deepcopy(bblock.metadata)
166+
for _field in ('ldContext', 'schema', 'openAPIDocument'):
167+
_v = _snap.get(_field)
168+
if _v and isinstance(_v, str) and not is_url(_v):
169+
_snap[_field] = _rel(bblock.files_path / _v, cwd)
170+
_shapes = _snap.get('shaclRules') or _snap.get('shaclShapes')
171+
if isinstance(_shapes, list):
172+
_key = 'shaclRules' if 'shaclRules' in _snap else 'shaclShapes'
173+
_snap[_key] = [
174+
_rel(bblock.files_path / s, cwd) if isinstance(s, str) and not is_url(s) else s
175+
for s in _shapes
176+
]
177+
for _r in _snap.get('resources', []):
178+
_ref = _r.get('ref')
179+
if _ref and not is_url(_ref):
180+
_r['ref'] = _rel(bblock.files_path / _ref, cwd)
181+
bblock.pre_baseurl_metadata = _snap
182+
154183
if bblock.annotated_schema.is_file():
155184
schema_url_yaml = PathOrUrl(bblock.annotated_schema).with_base_url(
156185
base_url, cwd if base_url else output_file_root
@@ -234,7 +263,8 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
234263
bblock,
235264
bblocks_register=bbr,
236265
outputs_path=test_outputs_path,
237-
base_url=base_url)
266+
base_url=base_url,
267+
plugin_validators=plugin_validators)
238268
validation_reports.append(json_report)
239269

240270
bblock.metadata['validationPassed'] = validation_passed
@@ -480,7 +510,8 @@ def do_postprocess(bblock: BuildingBlock, light: bool = False) -> bool:
480510
id_prefix=id_prefix,
481511
imported_register_urls=imported_registers,
482512
transform_plugins=transform_plugins,
483-
allowed_transform_types=allowed_transform_types)
513+
allowed_transform_types=allowed_transform_types,
514+
plugin_validators=plugin_validators)
484515

485516
if filter_id is None:
486517
cleanup_sandbox(sandbox_dir, child_bblocks)

ogc/bblocks/schemas/tests.schema.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,9 @@ items:
3131
Explicit ATS test method description. When absent, the standard generator infers the method
3232
from the file extension. When present, this text is used verbatim in the generated ATS entry.
3333
type: string
34+
media-type:
35+
description: |
36+
Explicit MIME type for this test resource. When absent, the media type is inferred from the
37+
file extension. Useful for files whose extension does not map to a standard MIME type, or
38+
when a validator plugin requires a specific type that differs from the default mapping.
39+
type: string

ogc/bblocks/validate.py

Lines changed: 98 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,13 @@
1919
from ogc.bblocks.validation import Validator, ValidationItemSourceType, ValidationReportSection, ValidationItemSource, \
2020
ValidationReportEntry, ValidationReportItem
2121
from ogc.bblocks.validation.json_ import JsonValidator
22+
from ogc.bblocks.validation.plugin import PluginValidator
2223
from ogc.bblocks.validation.rdf import RdfValidator
2324

25+
import logging
26+
27+
logger = logging.getLogger(__name__)
28+
2429
OUTPUT_SUBDIR = 'output'
2530
FORMAT_ALIASES = {
2631
'turtle': 'ttl',
@@ -31,6 +36,72 @@
3136
DEFAULT_UPLIFT_FORMATS = ['jsonld', 'ttl']
3237

3338

39+
def load_validation_plugins(sandbox_dir: Path,
40+
allowed_modules: set[str] | None = None) -> list[PluginValidator]:
41+
"""Read validator plugin config, create per-plugin venvs, and return PluginValidator instances.
42+
43+
Reads from ``plugins.validators`` in bblocks-config.yaml.
44+
allowed_modules: if provided, only install/register modules in this set. Pass None to allow all.
45+
"""
46+
from ogc.bblocks.transform import read_plugin_entries
47+
48+
plugin_entries = read_plugin_entries('validators')
49+
if not plugin_entries:
50+
return []
51+
52+
result: list[PluginValidator] = []
53+
54+
for plugin in plugin_entries:
55+
pip_deps = plugin.get('pip', [])
56+
if isinstance(pip_deps, str):
57+
pip_deps = [pip_deps]
58+
59+
modules = plugin.get('modules', [])
60+
if isinstance(modules, str):
61+
modules = [modules]
62+
63+
for module_path in modules:
64+
if allowed_modules is not None and module_path not in allowed_modules:
65+
logger.info("Skipping validator plugin '%s': not permitted by user", module_path)
66+
continue
67+
68+
if pip_deps:
69+
logger.info("Installing validator plugin pip dependencies for '%s': %s",
70+
module_path, pip_deps)
71+
else:
72+
logger.info("Setting up validator plugin venv for '%s'", module_path)
73+
venv_dir = PluginValidator.ensure_venv_for(pip_deps, sandbox_dir)
74+
75+
discovered = PluginValidator.discover(venv_dir, module_path)
76+
if discovered is None:
77+
raise RuntimeError(
78+
f"Validator plugin '{module_path}' could not be loaded — "
79+
"check that the module path is correct and all pip dependencies are declared"
80+
)
81+
if not discovered:
82+
logger.warning("No validator classes found in plugin '%s'", module_path)
83+
continue
84+
85+
for entry in discovered:
86+
mime_types = entry.get('mime_types', [])
87+
file_extensions = entry.get('file_extensions', [])
88+
if not mime_types and not file_extensions:
89+
continue
90+
pv = PluginValidator(
91+
module_path=module_path,
92+
class_name=entry['class'],
93+
pip_deps=pip_deps,
94+
sandbox_dir=sandbox_dir,
95+
mime_types=mime_types,
96+
file_extensions=file_extensions,
97+
)
98+
logger.info("Registered validator plugin '%s' (%s) for mime_types=%s extensions=%s",
99+
module_path, entry['class'], mime_types, file_extensions)
100+
result.append(pv)
101+
102+
return result
103+
104+
34105
def report_to_dict(bblock: BuildingBlock,
35106
items: Sequence[ValidationReportItem] | None,
36107
base_url: str | None = None) -> dict:
@@ -158,7 +229,7 @@ def write_report(json_reports: list[dict],
158229
def _validate_resource(bblock: BuildingBlock,
159230
filename: Path,
160231
output_filename: Path,
161-
validators: list[Validator],
232+
validators: list,
162233
resource_contents: str | None = None,
163234
additional_shacl_closures: list[str | Path] | None = None,
164235
base_uri: str | None = None,
@@ -167,7 +238,9 @@ def _validate_resource(bblock: BuildingBlock,
167238
resource_url: str | None = None,
168239
example_index: tuple[int, int] | None = None,
169240
prefixes: dict[str, str] | None = None,
170-
file_format: str | None = None) -> ValidationReportItem | None:
241+
file_format: str | None = None,
242+
bblocks_register: BuildingBlockRegister | None = None,
243+
validation_resources: list[dict] | None = None) -> ValidationReportItem | None:
171244
if require_fail is None:
172245
require_fail = filename.stem.endswith('-fail') and not example_index
173246

@@ -203,7 +276,10 @@ def _validate_resource(bblock: BuildingBlock,
203276
schema_ref=schema_ref,
204277
prefixes=prefixes,
205278
file_format=file_format,
206-
resource_url=resource_url)
279+
resource_url=resource_url,
280+
bblock=bblock,
281+
bblocks_register=bblocks_register,
282+
validation_resources=validation_resources)
207283
any_validator_run = any_validator_run or (result is not False)
208284

209285
except Exception as unknown_exc:
@@ -245,6 +321,7 @@ def validate_transform_output(
245321
transform_id: str,
246322
output_file: Path,
247323
profile_output_base: Path,
324+
plugin_validators: list[PluginValidator] = (),
248325
) -> ValidationReportItem:
249326
"""Validate a transform output file against a profile building block.
250327
@@ -259,6 +336,7 @@ def validate_transform_output(
259336
validators = [
260337
JsonValidator(profile_bblock, bblocks_register),
261338
RdfValidator(profile_bblock, bblocks_register),
339+
*plugin_validators,
262340
]
263341

264342
mime_type = mimetypes.from_extension(output_file.suffix[1:]) if output_file.suffix else None
@@ -276,6 +354,9 @@ def validate_transform_output(
276354
validator.validate(
277355
output_file, profile_output_base, report,
278356
file_format=mime_type,
357+
bblock=profile_bblock,
358+
bblocks_register=bblocks_register,
359+
validation_resources=profile_bblock.validation_resources,
279360
)
280361
except Exception as unknown_exc:
281362
report.add_entry(ValidationReportEntry(
@@ -295,7 +376,8 @@ def validate_transform_output(
295376
def validate_test_resources(bblock: BuildingBlock,
296377
bblocks_register: BuildingBlockRegister,
297378
outputs_path: Path | None = None,
298-
base_url: str | None = None) -> tuple[bool, int, dict]:
379+
base_url: str | None = None,
380+
plugin_validators: list[PluginValidator] = ()) -> tuple[bool, int, dict]:
299381
final_result = True
300382
test_count = 0
301383

@@ -313,12 +395,13 @@ def validate_test_resources(bblock: BuildingBlock,
313395
validators = [
314396
JsonValidator(bblock, bblocks_register),
315397
RdfValidator(bblock, bblocks_register),
398+
*plugin_validators,
316399
]
317400

318401
# Test resources
319402
if bblock.tests_dir.is_dir():
320403
for fn in sorted(bblock.tests_dir.resolve().iterdir()):
321-
if fn.suffix not in ('.json', '.jsonld', '.ttl'):
404+
if not fn.is_file():
322405
continue
323406
output_fn = output_dir / fn.name
324407
output_base_filenames.add(fn.stem)
@@ -328,19 +411,22 @@ def validate_test_resources(bblock: BuildingBlock,
328411
filename=fn,
329412
output_filename=output_fn,
330413
validators=validators,
414+
bblocks_register=bblocks_register,
415+
validation_resources=bblock.validation_resources,
331416
)
332417
if test_result:
333418
all_results.append(test_result)
334419
final_result = not test_result.failed and final_result
335420
test_count += 1
336421

337422
for extra_test_resource in bblock.get_extra_test_resources():
338-
if not re.search(r'\.(json(ld)?|ttl)$', extra_test_resource['output-filename']):
339-
continue
340423
fn = bblock.files_path / 'tests' / extra_test_resource['output-filename']
341424
output_fn = output_dir / fn.name
342425
output_base_filenames.add(fn.stem)
343426

427+
declared_media_type = extra_test_resource.get('media-type')
428+
file_format = declared_media_type or (mimetypes.from_extension(fn.suffix[1:]) if fn.suffix else None)
429+
344430
test_result = _validate_resource(
345431
bblock=bblock,
346432
filename=fn,
@@ -349,7 +435,9 @@ def validate_test_resources(bblock: BuildingBlock,
349435
resource_contents=extra_test_resource['contents'],
350436
require_fail=extra_test_resource.get('require-fail', False),
351437
resource_url=extra_test_resource['ref'] if isinstance(extra_test_resource['ref'], str) else None,
352-
file_format=mimetypes.from_extension(fn.suffix[1:]),
438+
file_format=file_format,
439+
bblocks_register=bblocks_register,
440+
validation_resources=bblock.validation_resources,
353441
)
354442
if test_result:
355443
all_results.append(test_result)
@@ -406,6 +494,8 @@ def validate_test_resources(bblock: BuildingBlock,
406494
prefixes=example.get('prefixes'),
407495
file_format=snippet_language,
408496
additional_shacl_closures=snippet.get('shacl-closure'),
497+
bblocks_register=bblocks_register,
498+
validation_resources=bblock.validation_resources,
409499
)
410500
if example_result:
411501
all_results.append(example_result)

ogc/bblocks/validation/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class ValidationReportSection(Enum):
4545
TURTLE = 'Turtle'
4646
SHACL = 'SHACL'
4747
SEMANTIC_UPLIFT = 'Semantic Uplift'
48+
PLUGIN = 'Plugin'
4849
UNKNOWN = 'Unknown errors'
4950

5051

@@ -99,7 +100,13 @@ def write_text(self, bblock: BuildingBlock, report_fn: Path):
99100
if not entries:
100101
continue
101102
f.write(f"=== {section.value} ===\n")
103+
current_plugin = None
102104
for entry in entries:
105+
if section == ValidationReportSection.PLUGIN:
106+
plugin = (entry.payload or {}).get('plugin')
107+
if plugin != current_plugin:
108+
current_plugin = plugin
109+
f.write(f"\n--- Plugin: {plugin or '(unknown)'} ---\n")
103110
if entry.is_error:
104111
f.write("\n** Validation error **\n")
105112
f.write(f"{entry.message}\n")

0 commit comments

Comments
 (0)