Skip to content

Commit e60dc0e

Browse files
committed
Add getTransformer() to Node transforms with full cross-type chaining
Node transforms now have getTransformer(bblockId, transformId) injected, mirroring Python's get_transformer() builtin. Any combination of transform types (python, node, jq, xslt, jsonld-frame) can call each other arbitrarily deep, with cycle detection working across process and language boundaries. Bblock context is now consistently derived from a single _bblock_context_dict() function in transform.py and stored in the transforms registry, so direct calls and get_transformer/getTransformer calls see the same context fields. Also fixes two bugs: a Node v24 spawnSync encoding crash, and stdout output from dispatched Python transforms leaking into the JSON response.
1 parent a3f65b8 commit e60dc0e

3 files changed

Lines changed: 520 additions & 236 deletions

File tree

ogc/bblocks/transform.py

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,63 @@ def cleanup_sandbox(sandbox_dir: Path, bblocks: list[BuildingBlock]) -> None:
299299
_GET_TRANSFORMER_TYPES = frozenset({'python', 'node', 'jq', 'xslt', 'jsonld-frame'})
300300

301301

302-
def _build_transforms_registry(bblocks_register: BuildingBlockRegister) -> dict:
302+
def _bblock_context_dict(bblock_id: str, bblock, cwd: Path) -> dict:
303+
"""Return a JSON-serializable dict of the bblock-level TransformContext fields.
304+
305+
Accepts either a local BuildingBlock or an imported bblock raw dict. The returned
306+
dict can be spread into TransformContext(...) alongside the run-specific fields
307+
(example_index, example, snippet_index, snippet, output_file, output_dir, working_dir,
308+
base_url, github_base_url, git_repository, id_prefix, imported_register_urls,
309+
transform_plugins).
310+
"""
311+
if isinstance(bblock, BuildingBlock):
312+
bblock_metadata = json.loads(json.dumps(bblock.metadata, default=str))
313+
source_schema_path = None
314+
if bblock.schema:
315+
source_schema_path = _rel(bblock.schema.path, cwd) if bblock.schema.is_path else bblock.schema.url
316+
return {
317+
'bblock_id': bblock_id,
318+
'bblock_name': bblock.name,
319+
'bblock_version': bblock.version,
320+
'bblock_tags': list(bblock.metadata.get('tags') or []),
321+
'bblock_files_path': _rel(bblock.files_path, cwd),
322+
'bblock_annotated_path': _rel(bblock.annotated_path, cwd),
323+
'bblock_metadata': bblock_metadata,
324+
'source_schema_path': source_schema_path,
325+
'annotated_schema_path': _rel(bblock.annotated_schema, cwd) if bblock.annotated_schema.is_file() else None,
326+
'jsonld_context_path': _rel(bblock.jsonld_context, cwd) if bblock.jsonld_context and bblock.jsonld_context.is_file() else None,
327+
'shacl_shapes_paths': [s if isinstance(s, str) else _rel(s, cwd) for s in (bblock.shacl_shapes or [])],
328+
}
329+
else:
330+
# Imported bblock raw dict
331+
raw_copy = {k: v for k, v in bblock.items() if k != 'register'}
332+
bblock_metadata = json.loads(json.dumps(raw_copy, default=str))
333+
schema = bblock.get('schema')
334+
schema_url = None
335+
if isinstance(schema, dict):
336+
schema_url = schema.get('application/json') or schema.get('application/yaml') or next(iter(schema.values()), None)
337+
elif isinstance(schema, str):
338+
schema_url = schema
339+
shacl_shapes = bblock.get('shaclShapes') or bblock.get('shaclRules') or []
340+
if isinstance(shacl_shapes, dict):
341+
shacl_shapes = [s for shapes in shacl_shapes.values()
342+
for s in (shapes if isinstance(shapes, list) else [shapes])]
343+
return {
344+
'bblock_id': bblock_id,
345+
'bblock_name': bblock.get('name', bblock_id),
346+
'bblock_version': bblock.get('version'),
347+
'bblock_tags': list(bblock.get('tags') or []),
348+
'bblock_files_path': None,
349+
'bblock_annotated_path': None,
350+
'bblock_metadata': bblock_metadata,
351+
'source_schema_path': schema_url,
352+
'annotated_schema_path': schema_url,
353+
'jsonld_context_path': bblock.get('ldContext'),
354+
'shacl_shapes_paths': [s for s in shacl_shapes if isinstance(s, str)],
355+
}
356+
357+
358+
def _build_transforms_registry(bblocks_register: BuildingBlockRegister, cwd: Path) -> dict:
303359
registry = {}
304360

305361
for bblock_id, bblock in bblocks_register.bblocks.items():
@@ -312,11 +368,8 @@ def _build_transforms_registry(bblocks_register: BuildingBlockRegister) -> dict:
312368
}
313369
if not supported_transforms:
314370
continue
315-
bblock_metadata = json.loads(json.dumps(bblock.metadata, default=str))
316371
registry[bblock_id] = {
317-
'name': bblock.metadata.get('name', bblock_id),
318-
'version': bblock.metadata.get('version'),
319-
'bblock_metadata': bblock_metadata,
372+
'context': _bblock_context_dict(bblock_id, bblock, cwd),
320373
'transforms': supported_transforms,
321374
}
322375

@@ -328,12 +381,8 @@ def _build_transforms_registry(bblocks_register: BuildingBlockRegister) -> dict:
328381
}
329382
if not supported_transforms:
330383
continue
331-
raw_copy = {k: v for k, v in raw.items() if k != 'register'}
332-
bblock_metadata = json.loads(json.dumps(raw_copy, default=str))
333384
registry[bblock_id] = {
334-
'name': raw.get('name', bblock_id),
335-
'version': raw.get('version'),
336-
'bblock_metadata': bblock_metadata,
385+
'context': _bblock_context_dict(bblock_id, raw, cwd),
337386
'transforms': supported_transforms,
338387
}
339388

@@ -360,7 +409,12 @@ def apply_transforms(bblock: BuildingBlock,
360409
shutil.rmtree(output_dir, ignore_errors=True)
361410
output_dir.mkdir(parents=True, exist_ok=True)
362411

363-
transforms_registry = _build_transforms_registry(bblocks_register) if bblocks_register else {}
412+
transforms_registry = _build_transforms_registry(bblocks_register, cwd) if bblocks_register else {}
413+
414+
# Bblock-level context fields — constant for all examples/snippets of this bblock.
415+
# Prefer the registry entry (already computed) to avoid redundant Path → str conversions.
416+
bblock_ctx = (transforms_registry.get(bblock.identifier) or {}).get('context') \
417+
or _bblock_context_dict(bblock.identifier, bblock, cwd)
364418

365419
# Collects ValidationReportItems per profile across all snippets/transforms,
366420
# so we can write one consolidated _report.json per profile at the end.
@@ -453,29 +507,14 @@ def apply_transforms(bblock: BuildingBlock,
453507
metadata['_prefixes'] = example_prefixes
454508

455509
ctx = TransformContext(
456-
bblock_id=bblock.identifier,
457-
bblock_name=bblock.name,
458-
bblock_version=bblock.version,
459-
bblock_tags=list(bblock.metadata.get('tags') or []),
460-
bblock_files_path=_rel(bblock.files_path, cwd),
461-
bblock_annotated_path=_rel(bblock.annotated_path, cwd),
462-
bblock_metadata=bblock.metadata,
510+
**bblock_ctx,
463511
example_index=example_id,
464512
example={k: v for k, v in example.items() if k != 'snippets'},
465513
snippet_index=snippet_id,
466514
snippet={k: v for k, v in snippet.items() if k != 'code'},
467515
output_file=_rel(output_fn, cwd),
468516
output_dir=_rel(output_dir, cwd),
469517
working_dir=str(cwd),
470-
source_schema_path=(
471-
_rel(bblock.schema.path, cwd) if bblock.schema.is_path else bblock.schema.url
472-
) if bblock.schema else None,
473-
annotated_schema_path=_rel(bblock.annotated_schema, cwd) if bblock.annotated_schema.is_file() else None,
474-
jsonld_context_path=_rel(bblock.jsonld_context, cwd) if bblock.jsonld_context.is_file() else None,
475-
shacl_shapes_paths=[
476-
s if isinstance(s, str) else _rel(s, cwd)
477-
for s in (bblock.shacl_shapes or [])
478-
],
479518
base_url=base_url,
480519
github_base_url=github_base_url,
481520
git_repository=git_repository,

ogc/bblocks/transformers/node.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
import os
77
import shutil
88
import subprocess
9+
import sys
910
import tempfile
1011
from pathlib import Path
1112

1213
from ogc.bblocks.log import log_indent
1314
from ogc.bblocks.models import TransformMetadata, TransformResult, Transformer
15+
from ogc.bblocks.transformers.python import _NON_PYTHON_HARNESS_CODE
1416

1517
logger = logging.getLogger(__name__)
1618

@@ -42,6 +44,12 @@ def transform(self, metadata: TransformMetadata) -> TransformResult:
4244
sandbox_dir = metadata.sandbox_dir
4345
node_path = str(sandbox_dir / 'node' / 'node_modules') if sandbox_dir else None
4446

47+
transforms_registry = metadata._transforms_registry or {}
48+
main_python = sys.executable
49+
sandbox_base = str(metadata._sandbox_base_dir) if metadata._sandbox_base_dir else None
50+
self_key = (f"{metadata.ctx.bblock_id}:{metadata.id}"
51+
if metadata.ctx and metadata.id else None)
52+
4553
transform_metadata_dict = {
4654
'sourceMimeType': metadata.source_mime_type,
4755
'targetMimeType': metadata.target_mime_type,
@@ -52,10 +60,124 @@ def transform(self, metadata: TransformMetadata) -> TransformResult:
5260

5361
harness = f"""\
5462
const fs = require('fs');
63+
const path = require('path');
64+
const {{spawnSync}} = require('child_process');
5565
const transformMetadata = {json.dumps(transform_metadata_dict)};
5666
const inputData = fs.readFileSync(0, 'utf8');
5767
let outputData = null;
5868
69+
const _TRANSFORMS_REGISTRY = {json.dumps(transforms_registry)};
70+
const _MAIN_PYTHON = {json.dumps(main_python)};
71+
const _SANDBOX_BASE = {json.dumps(sandbox_base)};
72+
const _DISPATCH_HARNESS = {json.dumps(_NON_PYTHON_HARNESS_CODE)};
73+
const _SELF_KEY = {json.dumps(self_key)};
74+
75+
// Call stack inherited from the process that invoked this Node transform (may be empty at root).
76+
const _INITIAL_CALL_STACK = process.env._BBLOCKS_CALL_STACK
77+
? JSON.parse(process.env._BBLOCKS_CALL_STACK) : [];
78+
79+
// Execution context of this process: ancestors + self. Used as the base for cycle detection
80+
// and as the child call stack prefix when dispatching sub-transforms.
81+
const _SELF_STACK = (_SELF_KEY && _INITIAL_CALL_STACK.indexOf(_SELF_KEY) === -1)
82+
? _INITIAL_CALL_STACK.concat([_SELF_KEY])
83+
: _INITIAL_CALL_STACK;
84+
85+
const _getTransformerCache = {{}};
86+
87+
function getTransformer(bblockId, transformId) {{
88+
const cacheKey = bblockId + ':' + transformId;
89+
if (_getTransformerCache[cacheKey]) return _getTransformerCache[cacheKey];
90+
91+
const bblockEntry = _TRANSFORMS_REGISTRY[bblockId];
92+
if (!bblockEntry) throw new Error('Building block ' + JSON.stringify(bblockId) + ' not found in transforms registry');
93+
const transformEntry = bblockEntry.transforms[transformId];
94+
if (!transformEntry) throw new Error('Transform ' + JSON.stringify(transformId) + ' not found for building block ' + JSON.stringify(bblockId));
95+
96+
if (transformEntry.type === 'node' && _SANDBOX_BASE) {{
97+
let npmDeps = ((transformEntry.metadata || {{}}).dependencies || {{}}).npm || [];
98+
if (!Array.isArray(npmDeps)) npmDeps = [npmDeps];
99+
if (npmDeps.length > 0) {{
100+
const nodeDir = path.join(_SANDBOX_BASE, 'get_transformer',
101+
bblockId.replace(/\\./g, '_'), transformId, 'node');
102+
fs.mkdirSync(nodeDir, {{recursive: true}});
103+
const r = spawnSync('npm', ['install', '--prefix', nodeDir].concat(npmDeps),
104+
{{encoding: 'utf8'}});
105+
if (r.status !== 0) throw new Error('npm install failed: ' + (r.stderr || ''));
106+
}}
107+
}}
108+
109+
const fn = function(content, opts) {{
110+
opts = opts || {{}};
111+
const sourceMimeType = opts.sourceMimeType || null;
112+
const extraMetadata = opts.extraMetadata || {{}};
113+
114+
// Cycle detection: check if the target transform is already in the execution context.
115+
// _SELF_STACK contains all ancestors (from the env var) plus this Node transform's own key,
116+
// so cross-type cycles (Python → Node → Python, Node → Python → Node) are caught here.
117+
if (_SELF_STACK.indexOf(cacheKey) !== -1) {{
118+
throw new Error('Cycle detected: transform ' + JSON.stringify(transformId) +
119+
' of ' + JSON.stringify(bblockId) + ' is already executing');
120+
}}
121+
// Child processes see _SELF_STACK plus the transform being dispatched.
122+
const _childStack = _SELF_STACK.concat([cacheKey]);
123+
const _childStackStr = JSON.stringify(_childStack);
124+
125+
const inputBytes = typeof content === 'string'
126+
? Buffer.from(content, 'utf8')
127+
: (Buffer.isBuffer(content) ? content : Buffer.from(String(content)));
128+
129+
const meta = Object.assign({{}}, transformEntry.metadata || {{}}, extraMetadata,
130+
{{_nested_transform: true}});
131+
132+
let sandboxDir = null;
133+
if (transformEntry.type === 'node' && _SANDBOX_BASE) {{
134+
let deps = ((transformEntry.metadata || {{}}).dependencies || {{}}).npm || [];
135+
if (!Array.isArray(deps)) deps = [deps];
136+
if (deps.length > 0) {{
137+
sandboxDir = path.join(_SANDBOX_BASE, 'get_transformer',
138+
bblockId.replace(/\\./g, '_'), transformId);
139+
}}
140+
}}
141+
142+
const req = {{
143+
type: transformEntry.type,
144+
transform_content: transformEntry.code,
145+
input: inputBytes.toString('base64'),
146+
source_mime_type: sourceMimeType,
147+
target_mime_type: null,
148+
metadata: meta,
149+
bblock_id: bblockId,
150+
transform_id: transformId,
151+
sandbox_dir: sandboxDir,
152+
transforms_registry: _TRANSFORMS_REGISTRY,
153+
main_python: _MAIN_PYTHON,
154+
sandbox_base: _SANDBOX_BASE,
155+
non_python_harness: _DISPATCH_HARNESS,
156+
call_stack: _childStack,
157+
}};
158+
159+
const spawnEnv = Object.assign({{}}, process.env,
160+
{{_BBLOCKS_CALL_STACK: _childStackStr}});
161+
const proc = spawnSync(_MAIN_PYTHON, ['-c', _DISPATCH_HARNESS],
162+
{{input: Buffer.from(JSON.stringify(req), 'utf8'), encoding: 'buffer', env: spawnEnv}});
163+
164+
const stdoutStr = (proc.stdout || Buffer.alloc(0)).toString('utf8').trim();
165+
if (!stdoutStr) {{
166+
const stderrStr = (proc.stderr || Buffer.alloc(0)).toString('utf8');
167+
throw new Error('getTransformer sub-process for ' + JSON.stringify(transformEntry.type) +
168+
' produced no output\\n' + stderrStr);
169+
}}
170+
const resp = JSON.parse(stdoutStr);
171+
if (!resp.success) throw new Error(resp.stderr || (JSON.stringify(transformEntry.type) + ' sub-transform failed'));
172+
if (resp.output == null) return null;
173+
const outBuf = Buffer.from(resp.output, 'base64');
174+
return resp.binary ? outBuf : outBuf.toString('utf8');
175+
}};
176+
177+
_getTransformerCache[cacheKey] = fn;
178+
return fn;
179+
}}
180+
59181
const _origStdoutWrite = process.stdout.write.bind(process.stdout);
60182
process.stdout.write = process.stderr.write.bind(process.stderr);
61183

0 commit comments

Comments
 (0)