Skip to content

Commit dff5eb6

Browse files
committed
Add validator plugin infrastructure
Introduces the validator plugin system parallel to the existing transform plugin system: - _plugin_harness.py: subprocess harness for validator plugin discovery and execution (mirrors the transform plugin harness pattern) - permissions.py: prompt and allowlist support for validator plugin modules - models.py: expose validation_resources and pre_baseurl_metadata on BuildingBlock for use by plugin validators - transform.py / transformers/plugin.py: extract read_plugin_entries() helper shared by both transform and validator plugin loading - entrypoint.py: thread allowed_validator_modules through from CLI - bblocks-config.schema.yaml: document the plugins.transforms / plugins.validators structure (replaces standalone transform-plugins.yml) - transform-plugins.schema.yaml: update to reflect shared plugin-entry schema used by both transform and validator sections - validate-and-process.yml: wire skip_permissions for validator plugins
1 parent 46539f0 commit dff5eb6

9 files changed

Lines changed: 463 additions & 89 deletions

File tree

.github/workflows/validate-and-process.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,13 @@ jobs:
207207
208208
echo "Repositories cloned and mapped in $RANDOM_DIR — output saved to $MAPPINGS_YML"
209209
210-
- name: Restore transforms sandbox cache
210+
- name: Restore bblocks sandbox cache
211211
uses: actions/cache/restore@v4
212212
with:
213-
path: .transforms-sandbox
214-
key: transforms-sandbox-${{ runner.os }}-${{ github.run_id }}
213+
path: .bblocks-sandbox
214+
key: bblocks-sandbox-${{ runner.os }}-${{ github.run_id }}
215215
restore-keys: |
216-
transforms-sandbox-${{ runner.os }}-
216+
bblocks-sandbox-${{ runner.os }}-
217217
- name: Before postprocess
218218
if: ${{inputs.before_postprocess}}
219219
run: ${{inputs.before_postprocess}}
@@ -247,17 +247,17 @@ jobs:
247247
uses: EndBug/add-and-commit@v9
248248
with:
249249
message: Building blocks postprocessing
250-
- name: Save transforms sandbox cache
250+
- name: Save bblocks sandbox cache
251251
if: ${{ !inputs.skip-build }}
252252
uses: actions/cache/save@v4
253253
with:
254-
path: .transforms-sandbox
255-
key: transforms-sandbox-${{ runner.os }}-${{ github.run_id }}
254+
path: .bblocks-sandbox
255+
key: bblocks-sandbox-${{ runner.os }}-${{ github.run_id }}
256256
- name: Remove build artifacts from pages
257257
run: |
258258
sudo find "${{ inputs.annotated_path }}" -name _visited_properties.tsv -delete || true
259259
sudo find "${{ inputs.annotated_path }}" -name _visited_properties.tsv.gz -delete || true
260-
rm -rf .transforms-sandbox
260+
rm -rf .bblocks-sandbox
261261
# Remove LLM tool configuration files
262262
rm -rf .claude CLAUDE.md AGENTS.md GEMINI.md .cursor .cursorrules .windsurfrules .ruler .continue
263263
find . -maxdepth 1 -name '.aider*' -exec rm -rf {} + || true

ogc/bblocks/entrypoint.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
from ogc.bblocks.postprocess import postprocess
1717
from ogc.na import ingest_json, update_vocabs
1818

19-
from ogc.bblocks.util import get_github_repo, load_yaml
19+
import jsonschema
20+
21+
from ogc.bblocks.util import get_github_repo, load_yaml, get_schema
2022

2123
MAIN_BBR = 'https://opengeospatial.github.io/bblocks/register.json'
2224
DEFAULT_IMPORT_MARKER = 'default'
@@ -219,6 +221,11 @@
219221
bb_config.update(load_yaml(filename=bb_override_config_file) or {})
220222
break
221223
if bb_config:
224+
try:
225+
jsonschema.validate(bb_config, get_schema('bblocks-config'))
226+
except jsonschema.ValidationError as e:
227+
raise ValueError(f"Invalid bblocks-config.yaml: {e.message} (at {' > '.join(str(p) for p in e.absolute_path)})") from e
228+
222229
id_prefix = bb_config.get('identifier-prefix', id_prefix)
223230
if id_prefix and id_prefix[-1] != '.':
224231
id_prefix += '.'

ogc/bblocks/models.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ def __init__(self, identifier: str, metadata_file: Path,
7575
}
7676

7777
self._lazy_properties = {}
78+
# Snapshot resource refs before postprocess.py rewrites them to published URLs.
79+
self._raw_resources: list[dict] = [dict(r) for r in self.metadata.get('resources', [])]
80+
# Set by postprocess.py just before with_base_url rewrites start.
81+
# Standard path fields are translated to cwd-relative; custom fields remain
82+
# as declared in bblock.json (bblock-source-relative).
83+
self.pre_baseurl_metadata: dict | None = None
7884

7985
self.subdirs = rel_path
8086
if '.' in self.identifier:
@@ -354,6 +360,32 @@ def transforms(self) -> list:
354360
self._lazy_properties['transforms'] = transforms
355361
return self._lazy_properties['transforms']
356362

363+
@property
364+
def validation_resources(self) -> list[dict]:
365+
"""Resources with role 'validation', with refs resolved to local paths or kept as URLs.
366+
367+
Reads from the snapshot taken at construction time so the result is stable
368+
even after postprocess.py rewrites resource refs to published URLs.
369+
Local refs are resolved to absolute paths; remote refs are kept as URLs.
370+
Callers serializing these into a subprocess wire format should convert to
371+
cwd-relative paths (see _to_wire_path in validation/plugin.py).
372+
"""
373+
result = []
374+
for r in self._raw_resources:
375+
if r.get('role') != 'validation':
376+
continue
377+
ref = r.get('ref')
378+
if not ref:
379+
continue
380+
entry = {
381+
'ref': ref if is_url(ref) else str(self.files_path / ref),
382+
'format': r.get('format'),
383+
}
384+
if r.get('conformsTo'):
385+
entry['conformsTo'] = r['conformsTo']
386+
result.append(entry)
387+
return result
388+
357389
def get_extra_test_resources(self) -> Generator[dict, None, None]:
358390
extra_tests_file = self.files_path / 'tests.yaml'
359391
try:

ogc/bblocks/permissions.py

Lines changed: 62 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import yaml
88

9-
from ogc.bblocks.transform import _PERMISSION_CHECKED_TYPES as _RISKY_TRANSFORM_TYPES
9+
from ogc.bblocks.transform import _PERMISSION_CHECKED_TYPES as _RISKY_TRANSFORM_TYPES, read_plugin_entries
1010
_PERMISSIONS_FILE = 'permissions.json'
1111

1212

@@ -47,19 +47,6 @@ def _ask_yes_no(prompt: str) -> bool:
4747
print(" Please answer y or n.")
4848

4949

50-
def _read_plugin_configs() -> list[dict]:
51-
"""Read transform-plugins.yml without installing anything."""
52-
plugins_path = Path('transform-plugins.yml')
53-
if not plugins_path.exists():
54-
return []
55-
try:
56-
with open(plugins_path) as f:
57-
config = yaml.safe_load(f)
58-
if not config or 'plugins' not in config:
59-
return []
60-
return config.get('plugins', []) or []
61-
except Exception:
62-
return []
6350

6451

6552
def _scan_risky_transforms(items_dir: Path) -> dict[str, list[tuple[str, str]]]:
@@ -98,23 +85,68 @@ def _plugin_version_key(plugin: dict) -> str:
9885
return ','.join(sorted(pip))
9986

10087

88+
def _check_plugin_permissions(
89+
plugin_entries: list[dict],
90+
cache_key: str,
91+
cache: dict,
92+
label: str,
93+
) -> tuple[set[str], bool]:
94+
"""Prompt for permissions for a list of plugin entries.
95+
96+
Returns (allowed_modules, cache_was_modified).
97+
"""
98+
cached: dict[str, str] = dict(cache.get(cache_key, {}))
99+
allowed: set[str] = set()
100+
dirty = False
101+
102+
for plugin in plugin_entries:
103+
modules = plugin.get('modules', [])
104+
if isinstance(modules, str):
105+
modules = [modules]
106+
version_key = _plugin_version_key(plugin)
107+
pip_deps = plugin.get('pip', [])
108+
if isinstance(pip_deps, str):
109+
pip_deps = [pip_deps]
110+
111+
for module in modules:
112+
if cached.get(module) == version_key:
113+
allowed.add(module)
114+
continue
115+
116+
_require_tty()
117+
print()
118+
print(f"╔══ {label} plugin permission required")
119+
print(f"║ Plugin: {module}")
120+
if pip_deps:
121+
print(f"║ Dependencies: {', '.join(pip_deps)}")
122+
print()
123+
if _ask_yes_no(f"Allow {label.lower()} plugin '{module}' to be installed and run?"):
124+
cached[module] = version_key
125+
allowed.add(module)
126+
cache[cache_key] = cached
127+
dirty = True
128+
129+
return allowed, dirty
130+
131+
101132
def check_permissions(
102133
sandbox_dir: Path,
103134
items_dir: Path,
104-
) -> tuple[set[str], set[str]]:
135+
) -> tuple[set[str], set[str], set[str]]:
105136
"""Check and prompt for permissions for risky transforms and plugins.
106137
107-
Must be called before load_transform_plugins and before apply_transforms.
138+
Must be called before load_transform_plugins, load_validation_plugins,
139+
and apply_transforms.
108140
Returns:
109-
allowed_transform_types: set of approved type strings
110-
allowed_plugin_modules: set of approved module paths
141+
allowed_transform_types: set of approved type strings
142+
allowed_plugin_modules: set of approved transform plugin module paths
143+
allowed_validator_modules: set of approved validator plugin module paths
111144
Raises RuntimeError if stdin is not a TTY and permissions are needed.
112145
"""
113146
cache = _load_cache(sandbox_dir)
114147
cache_dirty = False
115148

116149
cached_types: set[str] = set(cache.get('transform-types', []))
117-
cached_plugins: dict[str, str] = dict(cache.get('plugins', {}))
118150

119151
# --- Transform types ---
120152
needed_types = _scan_risky_transforms(items_dir)
@@ -139,38 +171,19 @@ def check_permissions(
139171

140172
allowed_transform_types = cached_types
141173

142-
# --- Plugins ---
143-
plugin_configs = _read_plugin_configs()
144-
allowed_plugin_modules: set[str] = set()
145-
146-
for plugin in plugin_configs:
147-
modules = plugin.get('modules', [])
148-
if isinstance(modules, str):
149-
modules = [modules]
150-
version_key = _plugin_version_key(plugin)
151-
pip_deps = plugin.get('pip', [])
152-
if isinstance(pip_deps, str):
153-
pip_deps = [pip_deps]
154-
155-
for module in modules:
156-
if cached_plugins.get(module) == version_key:
157-
allowed_plugin_modules.add(module)
158-
continue
174+
# --- Transform plugins ---
175+
allowed_plugin_modules, dirty = _check_plugin_permissions(
176+
read_plugin_entries('transforms'), 'plugins', cache, 'Transform',
177+
)
178+
cache_dirty = cache_dirty or dirty
159179

160-
_require_tty()
161-
print()
162-
print(f"╔══ Plugin permission required")
163-
print(f"║ Plugin: {module}")
164-
if pip_deps:
165-
print(f"║ Dependencies: {', '.join(pip_deps)}")
166-
print()
167-
if _ask_yes_no(f"Allow plugin '{module}' to be installed and run?"):
168-
cached_plugins[module] = version_key
169-
allowed_plugin_modules.add(module)
170-
cache['plugins'] = cached_plugins
171-
cache_dirty = True
180+
# --- Validator plugins ---
181+
allowed_validator_modules, dirty = _check_plugin_permissions(
182+
read_plugin_entries('validators'), 'validator-plugins', cache, 'Validator',
183+
)
184+
cache_dirty = cache_dirty or dirty
172185

173186
if cache_dirty:
174187
_save_cache(sandbox_dir, cache)
175188

176-
return allowed_transform_types, allowed_plugin_modules
189+
return allowed_transform_types, allowed_plugin_modules, allowed_validator_modules
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
$schema: https://json-schema.org/draft/2020-12/schema
2+
title: bblocks-config.yaml
3+
description: Per-repository configuration for the OGC Building Blocks postprocessor.
4+
type: object
5+
properties:
6+
7+
identifier-prefix:
8+
type: string
9+
description: >
10+
Dot-separated prefix prepended to all building block identifiers in this repository
11+
(e.g. "ogc.geo"). A trailing dot is added automatically if omitted.
12+
13+
imports:
14+
description: >
15+
List of external building block register URLs to import. Use the special value
16+
"default" to include the main OGC Building Blocks register. Omitting this key
17+
also defaults to the main register.
18+
oneOf:
19+
- type: array
20+
items:
21+
type: string
22+
- type: 'null'
23+
24+
name:
25+
type: string
26+
description: Human-readable name for this register.
27+
28+
abstract:
29+
type: string
30+
description: Short description of this register.
31+
32+
description:
33+
type: string
34+
description: Full description of this register.
35+
36+
schema-oas30-downcompile:
37+
type: boolean
38+
default: false
39+
description: >
40+
When true, JSON Schemas are downcompiled to OpenAPI 3.0 format during annotation.
41+
42+
sparql:
43+
type: object
44+
description: SPARQL endpoint configuration for semantic publishing.
45+
properties:
46+
query:
47+
type: string
48+
description: SPARQL query endpoint URL. Exposed as sparqlEndpoint in register.json.
49+
push:
50+
type: string
51+
description: >
52+
SPARQL Graph Store Protocol endpoint URL. When --enable-sparql is set, the
53+
register Turtle is pushed here.
54+
graph:
55+
type: string
56+
description: >
57+
Named graph URI to use when pushing to the GSP endpoint. Defaults to the
58+
register base URL if omitted.
59+
resources:
60+
type: object
61+
description: Additional resources to upload to the triplestore beyond building block metadata.
62+
properties:
63+
ontologies:
64+
type: boolean
65+
description: When true, ontology files from building blocks are uploaded to the triplestore.
66+
67+
viewer:
68+
type: object
69+
description: Configuration for the Building Blocks viewer.
70+
properties:
71+
show-imported-depth:
72+
description: >
73+
Controls which imported building blocks are shown in the viewer.
74+
0 (default): only local building blocks.
75+
N (positive integer): local + imported up to N levels deep.
76+
-1 (any negative integer): show all imported building blocks.
77+
null or false: disable.
78+
oneOf:
79+
- type: integer
80+
- type: 'null'
81+
- type: boolean
82+
enum: [false]
83+
84+
plugins:
85+
type: object
86+
description: >
87+
External plugin configuration. Replaces the standalone transform-plugins.yml file.
88+
properties:
89+
transforms:
90+
type: array
91+
description: External transformer plugins.
92+
items:
93+
$ref: '#/$defs/plugin-entry'
94+
validators:
95+
type: array
96+
description: External validator plugins.
97+
items:
98+
$ref: '#/$defs/plugin-entry'
99+
100+
$defs:
101+
plugin-entry:
102+
type: object
103+
required:
104+
- modules
105+
properties:
106+
pip:
107+
description: >
108+
One or more pip install specifiers for packages that provide the plugin modules.
109+
Accepts any specifier that `pip install` understands (package name, version
110+
constraint, GitHub URL, etc.).
111+
oneOf:
112+
- type: string
113+
- type: array
114+
items:
115+
type: string
116+
minItems: 1
117+
modules:
118+
description: >
119+
One or more dotted Python module paths to import. Each module is scanned for
120+
plugin classes (transformers or validators depending on context).
121+
oneOf:
122+
- type: string
123+
- type: array
124+
items:
125+
type: string
126+
minItems: 1
127+
url:
128+
type: string
129+
format: uri
130+
description: >
131+
URL for this plugin (e.g. its repository or PyPI page). If omitted, the
132+
postprocessor attempts to derive one automatically from the pip specifier.

0 commit comments

Comments
 (0)