Skip to content

Commit ccc2722

Browse files
committed
Improve subprocess transform robustness and add binary + metadata dict support
- Python transforms: fix JSONDecodeError escaping _send_raw uncaught, add capture_output=True to pip/npm calls inside get_transformer (stdout pipe was being polluted), include full traceback when exception escapes transformer.transform() - Node transforms: switch to structured JSON protocol (same as Python) so binary input/output is handled explicitly; fix binary input (was always read as utf-8); exceptions are now caught and surfaced with a stack trace; console.log no longer interferes with the protocol - Plugin transforms: fix exception traceback (was str(e) only) - All transforms: transform_metadata.metadata is now an _MNS instance — a SimpleNamespace-style object that also supports dict-style access (__getitem__, keys/values/items, 'in', iteration) so transform code can use either style interchangeably
1 parent 25abca2 commit ccc2722

4 files changed

Lines changed: 120 additions & 32 deletions

File tree

ogc/bblocks/transform.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os.path
99
import subprocess
1010
import sys
11+
import traceback
1112

1213
logger = logging.getLogger(__name__)
1314
from packaging.version import Version
@@ -547,7 +548,7 @@ def apply_transforms(bblock: BuildingBlock,
547548
logger.debug("Transform %s result: %s", transform['id'], result)
548549
except Exception as e:
549550
result = TransformResult(output=None, success=False,
550-
stderr=f"{type(e).__name__}: {e}")
551+
stderr=f"{type(e).__name__}: {e}\n{traceback.format_exc()}")
551552

552553
entry = {'success': result.success}
553554
if not result.success:

ogc/bblocks/transformers/_plugin_harness.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,26 @@
3232
import io
3333
import json
3434
import sys
35+
import traceback
3536
import types
3637
from base64 import b64encode
3738

3839

40+
class _MNS:
41+
def __init__(self, **kw): self.__dict__.update(kw)
42+
def __getitem__(self, k): return self.__dict__[k]
43+
def __setitem__(self, k, v): self.__dict__[k] = v
44+
def __delitem__(self, k): del self.__dict__[k]
45+
def __contains__(self, k): return k in self.__dict__
46+
def __iter__(self): return iter(self.__dict__)
47+
def __len__(self): return len(self.__dict__)
48+
def keys(self): return self.__dict__.keys()
49+
def values(self): return self.__dict__.values()
50+
def items(self): return self.__dict__.items()
51+
def get(self, k, d=None): return self.__dict__.get(k, d)
52+
def __repr__(self): return 'namespace(' + ', '.join(f'{k}={v!r}' for k, v in self.__dict__.items()) + ')'
53+
54+
3955
# ---------------------------------------------------------------------------
4056
# Helpers
4157
# ---------------------------------------------------------------------------
@@ -87,7 +103,7 @@ def _transform(meta_json: str) -> None:
87103
m.transform_content = meta_dict['transform_content']
88104
m.source_mime_type = meta_dict['source_mime_type']
89105
m.target_mime_type = meta_dict['target_mime_type']
90-
m.metadata = meta_dict.get('metadata', {})
106+
m.metadata = _MNS(**meta_dict.get('metadata', {}))
91107
raw = sys.stdin.buffer.read()
92108
m.input_data = raw if meta_dict.get('input_binary') else raw.decode('utf-8')
93109
m.sandbox_dir = None
@@ -117,9 +133,9 @@ def _transform(meta_json: str) -> None:
117133
try:
118134
result = transformer.transform(m)
119135
error = None
120-
except Exception as e:
136+
except Exception:
121137
result = None
122-
error = str(e)
138+
error = traceback.format_exc()
123139
finally:
124140
sys.stdout, sys.stderr = old_stdout, old_stderr
125141

ogc/bblocks/transformers/node.py

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22
from __future__ import annotations
33

4+
import base64
45
import json
56
import logging
67
import os
@@ -19,17 +20,6 @@
1920
transform_type = 'node'
2021

2122

22-
def _decode_output(raw: bytes) -> tuple[str | bytes | None, bool]:
23-
if not raw:
24-
return None, False
25-
try:
26-
text = raw.decode('utf-8')
27-
if '\x00' in text:
28-
return raw, True
29-
return text, False
30-
except UnicodeDecodeError:
31-
return raw, True
32-
3323

3424
class NodeTransformer(Transformer):
3525

@@ -63,7 +53,8 @@ def transform(self, metadata: TransformMetadata) -> TransformResult:
6353
const path = require('path');
6454
const {{spawnSync}} = require('child_process');
6555
const transformMetadata = {json.dumps(transform_metadata_dict)};
66-
const inputData = fs.readFileSync(0, 'utf8');
56+
const _inputBuf = fs.readFileSync(0);
57+
const inputData = {json.dumps(isinstance(metadata.input_data, bytes))} ? _inputBuf : _inputBuf.toString('utf8');
6758
let outputData = null;
6859
6960
const _TRANSFORMS_REGISTRY = {json.dumps(transforms_registry)};
@@ -179,14 +170,38 @@ def transform(self, metadata: TransformMetadata) -> TransformResult:
179170
}}
180171
181172
const _origStdoutWrite = process.stdout.write.bind(process.stdout);
182-
process.stdout.write = process.stderr.write.bind(process.stderr);
183-
184-
{metadata.transform_content}
173+
const _origStderrWrite = process.stderr.write.bind(process.stderr);
174+
const _captured = [];
175+
const _captureWrite = (chunk) => {{ _captured.push(Buffer.isBuffer(chunk) ? chunk.toString('utf8') : String(chunk)); return true; }};
176+
process.stdout.write = _captureWrite;
177+
process.stderr.write = _captureWrite;
178+
179+
let _success = true, _errMsg = null;
180+
try {{
181+
{metadata.transform_content}
182+
}} catch (_e) {{
183+
_success = false;
184+
_errMsg = (_e && _e.stack) ? _e.stack : String(_e);
185+
}}
185186
186187
process.stdout.write = _origStdoutWrite;
187-
if (outputData !== null) {{
188-
process.stdout.write(typeof outputData === 'string' ? outputData : Buffer.from(outputData));
188+
process.stderr.write = _origStderrWrite;
189+
190+
const _capturedStr = _captured.join('') || null;
191+
const _stderrStr = (_capturedStr !== null || _errMsg !== null)
192+
? (_capturedStr || '') + (_errMsg ? (_capturedStr ? '\\n' : '') + _errMsg : '')
193+
: null;
194+
195+
let _outB64 = null, _binary = false;
196+
if (_success && outputData !== null) {{
197+
const _outBuf = Buffer.isBuffer(outputData)
198+
? outputData
199+
: Buffer.from(typeof outputData === 'string' ? outputData : String(outputData), 'utf8');
200+
_outB64 = _outBuf.toString('base64');
201+
_binary = Buffer.isBuffer(outputData);
189202
}}
203+
204+
_origStdoutWrite(JSON.stringify({{output: _outB64, binary: _binary, success: _success, stderr: _stderrStr}}) + '\\n');
190205
"""
191206

192207
with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False) as f:
@@ -208,15 +223,40 @@ def transform(self, metadata: TransformMetadata) -> TransformResult:
208223
finally:
209224
Path(harness_path).unlink(missing_ok=True)
210225

211-
stderr = result.stderr.decode('utf-8', errors='replace').replace(harness_path, '<transform>') or None
212-
if stderr:
213-
label = metadata.id or 'node'
226+
label = metadata.id or 'node'
227+
228+
if not result.stdout.strip():
229+
stderr = result.stderr.decode('utf-8', errors='replace').replace(harness_path, '<transform>') or 'Node transform produced no output'
214230
with log_indent():
215231
for line in stderr.splitlines():
216232
if line.strip():
217233
logger.info("[%s] %s", label, line)
218-
if result.returncode != 0:
219234
return TransformResult(output=None, success=False, stderr=stderr)
220235

221-
output, binary = _decode_output(result.stdout)
222-
return TransformResult(output=output, success=True, stderr=stderr, binary=binary)
236+
try:
237+
data = json.loads(result.stdout)
238+
except json.JSONDecodeError as e:
239+
stderr = result.stderr.decode('utf-8', errors='replace').replace(harness_path, '<transform>') or f'Invalid JSON from Node harness: {e}'
240+
return TransformResult(output=None, success=False, stderr=stderr)
241+
242+
stderr = data.get('stderr') or None
243+
if stderr:
244+
stderr = stderr.replace(harness_path, '<transform>')
245+
with log_indent():
246+
for line in stderr.splitlines():
247+
if line.strip():
248+
logger.info("[%s] %s", label, line)
249+
250+
output_b64 = data.get('output')
251+
if output_b64 is not None:
252+
output_bytes = base64.b64decode(output_b64)
253+
output: str | bytes | None = output_bytes if data.get('binary') else output_bytes.decode('utf-8')
254+
else:
255+
output = None
256+
257+
return TransformResult(
258+
output=output,
259+
success=data.get('success', False),
260+
stderr=stderr,
261+
binary=bool(data.get('binary')),
262+
)

ogc/bblocks/transformers/python.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,25 @@
3737
# When dispatching non-Python transforms, the child call stack is propagated both via
3838
# the _BBLOCKS_CALL_STACK env var (read by Node's getTransformer) and via the request's
3939
# 'call_stack' field (read by the one-shot Python dispatch harness).
40+
# Class injected into every harness context.
41+
# Behaves like SimpleNamespace (attribute access) and like a dict (item access, keys/values/items,
42+
# 'in', iteration) so transform code can use either style interchangeably.
43+
_MNS_CLASS_DEF = """\
44+
class _MNS:
45+
def __init__(self, **kw): self.__dict__.update(kw)
46+
def __getitem__(self, k): return self.__dict__[k]
47+
def __setitem__(self, k, v): self.__dict__[k] = v
48+
def __delitem__(self, k): del self.__dict__[k]
49+
def __contains__(self, k): return k in self.__dict__
50+
def __iter__(self): return iter(self.__dict__)
51+
def __len__(self): return len(self.__dict__)
52+
def keys(self): return self.__dict__.keys()
53+
def values(self): return self.__dict__.values()
54+
def items(self): return self.__dict__.items()
55+
def get(self, k, d=None): return self.__dict__.get(k, d)
56+
def __repr__(self): return 'namespace(' + ', '.join(f'{k}={v!r}' for k, v in self.__dict__.items()) + ')'
57+
"""
58+
4059
_GET_TRANSFORMER_IMPL = """\
4160
def _make_get_transformer(transforms_registry, main_python, sandbox_base,
4261
non_python_harness, parent_call_stack, self_key=None):
@@ -86,6 +105,7 @@ def get_transformer(bblock_id, transform_id):
86105
[_sys.executable, '-m', 'pip', 'install', '--disable-pip-version-check',
87106
*pip_deps],
88107
check=True,
108+
capture_output=True,
89109
)
90110
91111
def _make_python_callable(
@@ -113,7 +133,7 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
113133
_tm = _types.SimpleNamespace(
114134
source_mime_type=None,
115135
target_mime_type=None,
116-
metadata=_types.SimpleNamespace(**_base_meta),
136+
metadata=_MNS(**_base_meta),
117137
context=_ctx,
118138
)
119139
if isinstance(content, bytes):
@@ -153,6 +173,7 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
153173
_subprocess.run(
154174
['npm', 'install', '--prefix', node_dir, *npm_deps],
155175
check=True,
176+
capture_output=True,
156177
)
157178
_sandbox_dir_str = _os.path.dirname(node_dir)
158179
@@ -245,7 +266,7 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
245266
"import sys as _sys, json as _json, base64 as _b64, traceback as _tb,"
246267
" types as _types, os as _os, subprocess as _subprocess, io as _io\n"
247268
"from pathlib import Path as _Path\n\n"
248-
) + _GET_TRANSFORMER_IMPL + """
269+
) + _MNS_CLASS_DEF + _GET_TRANSFORMER_IMPL + """
249270
250271
_req = _json.loads(_sys.stdin.buffer.read())
251272
_t_type = _req['type']
@@ -276,7 +297,7 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
276297
_tm = _types.SimpleNamespace(
277298
source_mime_type=_req.get('source_mime_type'),
278299
target_mime_type=_req.get('target_mime_type'),
279-
metadata=_types.SimpleNamespace(**_meta_dict),
300+
metadata=_MNS(**_meta_dict),
280301
context=_ctx_ns,
281302
)
282303
_ns = {'transform_metadata': _tm, 'input_data': _input, 'output_data': None,
@@ -352,7 +373,7 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
352373
_HARNESS_IMPORTS = (
353374
"import sys as _sys, json as _json, types as _types, base64 as _b64,"
354375
" io as _io, traceback as _tb, os as _os, subprocess as _subprocess\n"
355-
)
376+
) + _MNS_CLASS_DEF
356377

357378
# Static body of the persistent harness: get_transformer factory call + the stdin request loop.
358379
# Written as a plain string so braces don't need escaping; dynamic values (_TRANSFORM_CODE,
@@ -377,6 +398,8 @@ def _callable(content, source_mime_type=None, extra_metadata=None):
377398
_d = _req['metadata']
378399
if isinstance(_d.get('context'), dict):
379400
_d['context'] = _types.SimpleNamespace(**_d['context'])
401+
if isinstance(_d.get('metadata'), dict):
402+
_d['metadata'] = _MNS(**_d['metadata'])
380403
transform_metadata = _types.SimpleNamespace(**_d)
381404
_raw = _b64.b64decode(_req['input'])
382405
try:
@@ -466,7 +489,15 @@ def _send_raw(self, req_line: bytes) -> dict | None:
466489
self._proc.stdin.flush()
467490
resp_line = self._proc.stdout.readline()
468491
if resp_line:
469-
return json.loads(resp_line)
492+
try:
493+
return json.loads(resp_line)
494+
except json.JSONDecodeError as e:
495+
return {
496+
'output': None,
497+
'success': False,
498+
'stderr': f'Subprocess wrote invalid JSON ({e}): {resp_line!r}',
499+
'binary': False,
500+
}
470501
except (BrokenPipeError, OSError):
471502
pass
472503
return None

0 commit comments

Comments
 (0)