From 93ef157ea5adf1c91ef3c3ace4497fc2899671e4 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 08:19:05 +0200 Subject: [PATCH 01/12] fix: marshal Undefined and SafeString DOM values in Lua `domValue_push` handled `Null`, `Boolean`, `Integer`, `String`, `Array`, and `Object`, and aborted via `MRDOCS_UNREACHABLE` for any other kind. Reading a field whose value is `Undefined` or `SafeString` therefore crashed a Lua script. This maps `Undefined` to `nil`, as `Null` already is, and a `SafeString` the way a `String` is, which matches what happens in JavaScript. --- src/lib/Support/Lua.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lib/Support/Lua.cpp b/src/lib/Support/Lua.cpp index 81313bcd20..d65e55dd37 100644 --- a/src/lib/Support/Lua.cpp +++ b/src/lib/Support/Lua.cpp @@ -802,11 +802,20 @@ domValue_push( { case dom::Kind::Null: return lua_pushnil(A); + case dom::Kind::Undefined: + // Lua has a single nullary value, so a missing field maps to + // `nil` just as `Null` does. A read of an absent field (for + // example the global namespace's name) yields `Undefined` and + // must not abort. + return lua_pushnil(A); case dom::Kind::Boolean: return lua_pushboolean(A, value.getBool()); case dom::Kind::Integer: return lua_pushnumber(A, value.getInteger()); case dom::Kind::String: + case dom::Kind::SafeString: + // A `SafeString` is a string already marked safe for an output + // format; to a Lua script it is just its bytes. return luaM_pushstring(A, value.getString()); case dom::Kind::Array: return domArray_push(A, value.getArray()); From c7b4eed04fbf1379d7f6375a6706aca0cd5cff4b Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 10:05:21 +0200 Subject: [PATCH 02/12] feat: support script-driven generators This adds a generator flavor backed by a user script. A directory under /generator// whose mrdocs-generator.yml names a script installs a generator that hands the whole emit to a Lua or JavaScript `generate(corpus, output)` function: the script walks the corpus and writes files through the output object, so it can produce output shapes a per-page generator cannot, such as a single artifact aggregated across every symbol. The manifest parser moves into a shared `GeneratorManifest`, so the data-driven and script-driven discovery passes read the same file. A manifest that names a script is skipped by the data-driven pass and installed by the script pass. The output object exposes a single write method, resolved under the output directory and forbidden from escaping it. Both languages receive it as the second argument to generate; on the Lua side it is also bound as a global and passed from there, because the Lua bridge cannot carry a callable as a plain value. --- src/lib/Gen/GeneratorManifest.cpp | 291 ++++++++++++++++++ src/lib/Gen/GeneratorManifest.hpp | 129 ++++++++ src/lib/Gen/hbs/DataDrivenGenerators.cpp | 205 +++--------- src/lib/Gen/hbs/DataDrivenGenerators.hpp | 38 +-- src/lib/Gen/script/OutputSink.cpp | 85 +++++ src/lib/Gen/script/OutputSink.hpp | 63 ++++ src/lib/Gen/script/ScriptGenerator.cpp | 147 +++++++++ src/lib/Gen/script/ScriptGenerator.hpp | 101 ++++++ src/lib/Gen/script/ScriptGeneratorJs.cpp | 102 ++++++ src/lib/Gen/script/ScriptGeneratorLua.cpp | 146 +++++++++ src/lib/Gen/script/ScriptRunner.hpp | 68 ++++ src/test/TestRunner.cpp | 8 + src/test/lib/Gen/hbs/DataDrivenGenerators.cpp | 34 +- src/tool/GenerateAction.cpp | 13 +- 14 files changed, 1210 insertions(+), 220 deletions(-) create mode 100644 src/lib/Gen/GeneratorManifest.cpp create mode 100644 src/lib/Gen/GeneratorManifest.hpp create mode 100644 src/lib/Gen/script/OutputSink.cpp create mode 100644 src/lib/Gen/script/OutputSink.hpp create mode 100644 src/lib/Gen/script/ScriptGenerator.cpp create mode 100644 src/lib/Gen/script/ScriptGenerator.hpp create mode 100644 src/lib/Gen/script/ScriptGeneratorJs.cpp create mode 100644 src/lib/Gen/script/ScriptGeneratorLua.cpp create mode 100644 src/lib/Gen/script/ScriptRunner.hpp diff --git a/src/lib/Gen/GeneratorManifest.cpp b/src/lib/Gen/GeneratorManifest.cpp new file mode 100644 index 0000000000..2e624ebdad --- /dev/null +++ b/src/lib/Gen/GeneratorManifest.cpp @@ -0,0 +1,291 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "GeneratorManifest.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs { + +namespace { + +// Read a scalar node into a `std::string`. +std::string +scalarText(llvm::yaml::ScalarNode& node) +{ + llvm::SmallString<32> buf; + llvm::StringRef const text = node.getValue(buf); + return std::string(text.data(), text.size()); +} + +// Forward declaration for the recursive conversion below. +dom::Value yamlToDom(llvm::yaml::Node* node); + +// Add one key/value pair of a YAML mapping to `obj`, recursing on the +// value. A non-scalar key is skipped. +void +addMappingEntry(llvm::yaml::KeyValueNode& entry, dom::Object& obj) +{ + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(entry.getKey()); + if (keyNode) + { + obj.set(scalarText(*keyNode), yamlToDom(entry.getValue())); + } +} + +// Convert a YAML node to a DOM value. +dom::Value +yamlToDom(llvm::yaml::Node* node) +{ + dom::Value result(nullptr); + if (node && !llvm::isa(node)) + { + if (llvm::yaml::ScalarNode* const scalar = + llvm::dyn_cast(node)) + { + result = dom::Value(scalarText(*scalar)); + } + else if (llvm::yaml::SequenceNode* const sequence = + llvm::dyn_cast(node)) + { + dom::Array array; + for (llvm::yaml::Node& element : *sequence) + { + array.emplace_back(yamlToDom(&element)); + } + result = dom::Value(std::move(array)); + } + else if (llvm::yaml::MappingNode* const mapping = + llvm::dyn_cast(node)) + { + dom::Object object; + for (llvm::yaml::KeyValueNode& entry : *mapping) + { + addMappingEntry(entry, object); + } + result = dom::Value(std::move(object)); + } + } + return result; +} + +// Parse a YAML mapping whose entries are non-empty byte-sequence keys +// mapped to replacement strings. An empty key is a hard error. +Expected +parseEscape( + llvm::yaml::MappingNode& node, + GeneratorManifest& manifest, + std::string_view yamlPath) +{ + Expected result; + for (llvm::yaml::KeyValueNode& entry : node) + { + if (result.has_value()) + { + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(entry.getKey()); + llvm::yaml::ScalarNode* const valNode = + llvm::dyn_cast_or_null(entry.getValue()); + if (!keyNode || !valNode) + { + result = Unexpected(formatError( + "{}: each 'escape' entry must be a scalar->scalar mapping", + yamlPath)); + } + else + { + std::string key = scalarText(*keyNode); + if (key.empty()) + { + result = Unexpected(formatError( + "{}: escape key must not be empty", yamlPath)); + } + else + { + manifest.escape.emplace_back( + std::move(key), scalarText(*valNode)); + } + } + } + } + return result; +} + +// Dispatch a single top-level manifest key to its handler. Unknown keys +// are ignored so future schema additions are non-breaking. +Expected +parseTopLevelEntry( + llvm::yaml::KeyValueNode& pair, + GeneratorManifest& manifest, + std::string_view yamlPath) +{ + Expected result; + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(pair.getKey()); + if (keyNode) + { + llvm::SmallString<16> keyBuf; + llvm::StringRef const key = keyNode->getValue(keyBuf); + if (key == "escape") + { + llvm::yaml::MappingNode* const escNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!escNode) + { + result = Unexpected(formatError( + "{}: 'escape' must be a mapping", yamlPath)); + } + else + { + result = parseEscape(*escNode, manifest, yamlPath); + } + } + else if (key == "script") + { + llvm::yaml::ScalarNode* const valNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!valNode) + { + result = Unexpected(formatError( + "{}: 'script' must be a scalar", yamlPath)); + } + else + { + manifest.script = scalarText(*valNode); + } + } + else if (key == "params") + { + llvm::yaml::MappingNode* const paramsNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!paramsNode) + { + result = Unexpected(formatError( + "{}: 'params' must be a mapping", yamlPath)); + } + else + { + manifest.params = yamlToDom(paramsNode).getObject(); + } + } + else if (key == "extends") + { + llvm::yaml::ScalarNode* const valNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!valNode) + { + result = Unexpected(formatError( + "{}: 'extends' must be a scalar", yamlPath)); + } + else + { + manifest.extends = scalarText(*valNode); + } + } + } + return result; +} + +} // (anon) + +Expected +loadGeneratorManifest(std::string_view yamlPath) +{ + MRDOCS_TRY(std::string text, files::getFileText(yamlPath)); + llvm::SourceMgr sm; + llvm::yaml::Stream stream(text, sm); + + GeneratorManifest manifest; + llvm::yaml::document_iterator docIt = stream.begin(); + if (docIt == stream.end()) + { + return manifest; + } + llvm::yaml::Node* const rootNode = docIt->getRoot(); + if (rootNode == nullptr || + llvm::isa(rootNode)) + { + // Empty document: a file with no content, only comments, or a + // literal `null`. All of these mean "no rules". + return manifest; + } + llvm::yaml::MappingNode* const root = + llvm::dyn_cast(rootNode); + if (!root) + { + return Unexpected(formatError( + "{}: top-level YAML node must be a mapping", yamlPath)); + } + for (llvm::yaml::KeyValueNode& pair : *root) + { + MRDOCS_TRY(parseTopLevelEntry(pair, manifest, yamlPath)); + } + return manifest; +} + +namespace { + +constexpr std::string_view metadataFileName = "mrdocs-generator.yml"; + +// Append every manifested subdirectory of `generatorDir` to `out`. +Expected +scanGeneratorDir( + std::string_view generatorDir, + std::vector& out) +{ + namespace fs = std::filesystem; + std::error_code iterEc; + fs::directory_iterator const end{}; + for (fs::directory_iterator it(generatorDir, iterEc); + !iterEc && it != end; + it.increment(iterEc)) + { + std::error_code typeEc; + if (!it->is_directory(typeEc)) + { + continue; + } + std::string const dir = it->path().string(); + std::string const yamlPath = files::appendPath( + dir, std::string(metadataFileName)); + if (!files::exists(yamlPath)) + { + continue; + } + MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorManifest(yamlPath)); + out.push_back(DiscoveredManifest{ dir, std::move(manifest) }); + } + return {}; +} + +} // (anon) + +Expected> +discoverGeneratorManifests(std::vector const& roots) +{ + std::vector out; + for (std::string const& root : roots) + { + std::string const dir = files::appendPath(root, "generator"); + if (files::exists(dir)) + { + MRDOCS_TRY(scanGeneratorDir(dir, out)); + } + } + return out; +} + +} // mrdocs diff --git a/src/lib/Gen/GeneratorManifest.hpp b/src/lib/Gen/GeneratorManifest.hpp new file mode 100644 index 0000000000..ac98f3da48 --- /dev/null +++ b/src/lib/Gen/GeneratorManifest.hpp @@ -0,0 +1,129 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP +#define MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs { + +/** The parsed contents of a generator manifest. + + A manifest is the mrdocs-generator.yml that an addon directory + under /generator// exposes to declare a generator. The + two generator flavors read disjoint fields of the same file: + + @li A data-driven (Handlebars) generator reads the escape rules + and the parent it `extends`. + + @li A script-driven generator reads the script-file path and its + parameters. + + The presence of the `script` entry is what distinguishes the two: a + manifest that names a `script` is a script-driven generator, + otherwise it is data-driven. +*/ +struct GeneratorManifest +{ + /** The entry file of a script-driven generator. + + Holds the value of the manifest's optional `script` key, a path + relative to the generator directory. Empty when the manifest + declares no `script`, in which case the directory is a + data-driven generator. + */ + std::optional script; + + /** The escape rules of a data-driven generator. + + Each pair maps a byte-sequence source to its replacement string, + in manifest order. Empty when no escape rules are declared. + */ + std::vector> escape; + + /** The parent a data-driven generator inherits from. + + Holds the manifest's optional `extends` key: the id of another + generator whose partials and helpers this one falls back to, + after its own directory but before `common/`. Empty when no + parent is declared. A script-driven generator ignores this field. + */ + std::string extends; + + /** The parameters of a script-driven generator. + + Holds the manifest's optional `params` mapping, passed to the + entry script as its `params` argument. Mapping values may be + nested objects or arrays; a scalar value is a string. Empty when + the manifest declares no `params`. A data-driven generator + ignores this field. + */ + dom::Object params; +}; + +/** Parse a generator manifest into plain data. + + Read the file at `yamlPath` and return its contents. The file is + expected to contain a top-level mapping. The optional `escape` key + holds a sub-mapping from byte-sequence keys to replacement strings; + keys may be one or more bytes long, and an empty key is a hard error. + The optional `script` key holds the entry-file path as a scalar. The + optional `extends` key names a parent generator as a scalar. The + optional `params` key holds a mapping of generator-specific + parameters; its values may be nested, and a scalar value is read as a + string. Unknown top-level keys are ignored so future schema additions + are non-breaking. + + An empty document (an empty file, comments only, or a literal `null`) + yields an empty manifest. +*/ +Expected +loadGeneratorManifest(std::string_view yamlPath); + +/** A generator directory paired with its parsed manifest. +*/ +struct DiscoveredManifest +{ + /** The generator directory, of the form /generator/. + */ + std::string dir; + + /** The parsed contents of the directory's manifest. + */ + GeneratorManifest manifest; +}; + +/** Find every addon generator directory that ships a manifest. + + For each addon root, walk the immediate subdirectories of + /generator/. A subdirectory is reported when it ships a + mrdocs-generator.yml; the manifest is parsed and returned alongside + its directory. Directories without a manifest (like the built-in + common/) are skipped. + + The presence of a `script` entry distinguishes the two generator + flavors, so a caller installs the flavor it owns and ignores the + other. Roots are searched in order, so the result preserves addon + precedence. +*/ +Expected> +discoverGeneratorManifests(std::vector const& roots); + +} // mrdocs + +#endif diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.cpp b/src/lib/Gen/hbs/DataDrivenGenerators.cpp index e6574abb53..13f0839f5d 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.cpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.cpp @@ -11,203 +11,68 @@ #include "DataDrivenGenerators.hpp" #include "AddonPaths.hpp" #include "HandlebarsGenerator.hpp" +#include #include #include -#include -#include -#include -#include -#include #include #include #include +#include +#include namespace mrdocs::hbs { namespace { -constexpr std::string_view metadataFileName = "mrdocs-generator.yml"; - -// Populate `map` from a YAML mapping whose entries are non-empty -// byte-sequence keys mapped to replacement strings. An empty key -// is a hard error. -Expected -populateEscapeFromMapping( - llvm::yaml::MappingNode& node, - EscapeMap& map, - std::string_view yamlPath) +// Build an `EscapeMap` from the manifest's ordered `escape` rules. +EscapeMap +toEscapeMap( + std::vector> const& rules) { - for (llvm::yaml::KeyValueNode& entry : node) + EscapeMap map; + for (std::pair const& rule : rules) { - llvm::yaml::ScalarNode* keyNode = - llvm::dyn_cast_or_null(entry.getKey()); - llvm::yaml::ScalarNode* valNode = - llvm::dyn_cast_or_null(entry.getValue()); - if (!keyNode || !valNode) - { - return Unexpected(formatError( - "{}: each 'escape' entry must be a scalar->scalar mapping", - yamlPath)); - } - llvm::SmallString<8> keyBuf; - llvm::SmallString<32> valBuf; - llvm::StringRef const keyStr = keyNode->getValue(keyBuf); - llvm::StringRef const valStr = valNode->getValue(valBuf); - if (keyStr.empty()) - { - return Unexpected(formatError( - "{}: escape key must not be empty", - yamlPath)); - } - map.set( - std::string_view(keyStr.data(), keyStr.size()), - std::string_view(valStr.data(), valStr.size())); + map.set(rule.first, rule.second); } - return {}; -} - -// Install a HandlebarsGenerator for the data-driven format in `dir`, -// when `dir` opts in by shipping an `mrdocs-generator.yml`. -// -// The presence of the manifest is the explicit opt-in: a directory -// under /generator/ becomes a generator only when it ships -// this file. Directories that hold shared assets (the built-in -// `common/` is the canonical example) simply don't declare a manifest, -// and discovery skips them. -// -// The generator registry is process-global and is not cleared between -// runs in the same process. `installGenerator` fails when the id is -// already taken, whether by a built-in or by a generator an earlier -// addon root installed under the same name. That is the -// first-writer-wins layering we want, so a duplicate id is a silent -// skip rather than an error (a null generator is the only other -// failure it reports, and we never pass one). In the test executable -// this also means the first test to install an id wins for the rest -// of the process; two fixtures cannot ship competing generators of -// the same name. -Expected -maybeRegister(std::filesystem::path const& dir) -{ - std::string const yamlPath = files::appendPath( - dir.string(), std::string(metadataFileName)); - if (!files::exists(yamlPath)) - { - return {}; - } - std::string const name = dir.filename().string(); - MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorMetadata(yamlPath)); - (void)installGenerator( - std::make_unique( - name, name, name, - std::move(manifest.escape), - std::move(manifest.extends))); - return {}; -} - -// Scan a single /generator/ directory. -Expected -scanGeneratorDir(std::string_view generatorDir) -{ - namespace fs = std::filesystem; - std::error_code iterEc; - fs::directory_iterator const end{}; - for (fs::directory_iterator it(generatorDir, iterEc); - !iterEc && it != end; - it.increment(iterEc)) - { - std::error_code typeEc; - if (!it->is_directory(typeEc)) - { - continue; - } - MRDOCS_TRY(maybeRegister(it->path())); - } - return {}; + return map; } } // (anon) -Expected +Expected loadGeneratorMetadata(std::string_view yamlPath) { - MRDOCS_TRY(std::string text, files::getFileText(yamlPath)); - llvm::SourceMgr sm; - llvm::yaml::Stream stream(text, sm); - - GeneratorManifest manifest; - llvm::yaml::document_iterator docIt = stream.begin(); - if (docIt == stream.end()) - { - return manifest; - } - llvm::yaml::Node* const rootNode = docIt->getRoot(); - if (rootNode == nullptr || - llvm::isa(rootNode)) - { - // Empty document: file with no content, only comments, or a - // literal `null`. All of these mean "no rules". - return manifest; - } - llvm::yaml::MappingNode* const root = - llvm::dyn_cast(rootNode); - if (!root) - { - return Unexpected(formatError( - "{}: top-level YAML node must be a mapping", yamlPath)); - } - - for (llvm::yaml::KeyValueNode& pair : *root) - { - llvm::yaml::ScalarNode* const keyNode = - llvm::dyn_cast_or_null(pair.getKey()); - if (!keyNode) - { - continue; - } - llvm::SmallString<16> keyBuf; - llvm::StringRef const keyStr = keyNode->getValue(keyBuf); - if (keyStr == "extends") - { - llvm::yaml::ScalarNode* const extNode = - llvm::dyn_cast_or_null(pair.getValue()); - if (!extNode) - { - return Unexpected(formatError( - "{}: 'extends' must be a scalar", yamlPath)); - } - llvm::SmallString<32> valBuf; - llvm::StringRef const valStr = extNode->getValue(valBuf); - manifest.extends.assign(valStr.data(), valStr.size()); - continue; - } - if (keyStr == "escape") - { - llvm::yaml::MappingNode* const escNode = - llvm::dyn_cast_or_null(pair.getValue()); - if (!escNode) - { - return Unexpected(formatError( - "{}: 'escape' must be a mapping", yamlPath)); - } - MRDOCS_TRY(populateEscapeFromMapping(*escNode, manifest.escape, yamlPath)); - continue; - } - } - return manifest; + MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorManifest(yamlPath)); + return toEscapeMap(manifest.escape); } Expected discoverDataDrivenGenerators(Config::Settings const& settings) { - std::vector const roots = addon_paths::addonRoots(settings); - for (std::string const& root : roots) + MRDOCS_TRY( + std::vector found, + discoverGeneratorManifests(addon_paths::addonRoots(settings))); + for (DiscoveredManifest const& d : found) { - std::string const dir = files::appendPath(root, "generator"); - if (!files::exists(dir)) + // A manifest that names a `script` is a script-driven generator; + // that flavor is installed by its own discovery pass. + // + // The generator registry is process-global and is not cleared + // between runs in the same process. `installGenerator` fails when + // the id is already taken, whether by a built-in or by an + // earlier addon root's generator of the same name. That is the + // first-writer-wins layering we want, so a duplicate id is a + // silent skip rather than an error (a `null` generator is the only + // other failure it reports, and we never pass one). + if (!d.manifest.script) { - continue; + std::string const name(files::getFileName(d.dir)); + (void)installGenerator( + std::make_unique( + name, name, name, + toEscapeMap(d.manifest.escape), + d.manifest.extends)); } - MRDOCS_TRY(scanGeneratorDir(dir)); } return {}; } diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.hpp b/src/lib/Gen/hbs/DataDrivenGenerators.hpp index 2077ec8c63..0158f18396 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.hpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.hpp @@ -33,6 +33,10 @@ namespace mrdocs::hbs { (the built-in `common/` is the canonical example) don't declare a manifest and are skipped. + 3. Its manifest does not name a `script`. A manifest with a `script` + key declares a script-driven generator, which is installed by + `discoverScriptGenerators` instead, so it is skipped here. + For each accepted directory, a `HandlebarsGenerator` is constructed with id, file extension, and display name all set to ``, and installed into the global registry. Escape rules are read from @@ -44,36 +48,14 @@ namespace mrdocs::hbs { Expected discoverDataDrivenGenerators(Config::Settings const& settings); -/** Parsed contents of a single `mrdocs-generator.yml`. -*/ -struct GeneratorManifest -{ - /** Id of a generator whose partials and helpers this one falls back - to, after its own directory but before `common/`. Empty when - the generator stands alone. - */ - std::string extends; - - /** Per-pattern escape rules to apply to rendered output values. - */ - EscapeMap escape; -}; - -/** Load mrdocs-generator.yml and return its parsed contents. - - The file is expected to contain a top-level mapping. Recognized - top-level keys: - - - `extends:` (optional scalar) names another generator this one - inherits partials and helpers from. Layouts do not inherit. - - `escape:` (optional mapping) holds a sub-mapping from byte-sequence - keys to replacement strings. Keys may be one or more bytes long; - an empty key is a hard error. +/** Load mrdocs-generator.yml and return the resulting `EscapeMap`. - Unknown top-level keys are ignored so future schema additions are - non-breaking. + A thin convenience over `loadGeneratorManifest` (see + ) that keeps only the escape rules, + for callers that render output and don't need the other manifest + fields. Parsing rules and errors are as documented there. */ -Expected +Expected loadGeneratorMetadata(std::string_view yamlPath); } // namespace mrdocs::hbs diff --git a/src/lib/Gen/script/OutputSink.cpp b/src/lib/Gen/script/OutputSink.cpp new file mode 100644 index 0000000000..510e10ef8d --- /dev/null +++ b/src/lib/Gen/script/OutputSink.cpp @@ -0,0 +1,85 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "OutputSink.hpp" +#include +#include +#include + +namespace mrdocs::script { + +OutputSink:: +OutputSink(std::string_view outputDir) + : root_(files::normalizePath(outputDir)) +{ +} + +Expected +OutputSink:: +resolveUnderRoot(std::string_view relPath) const +{ + Expected result; + if (relPath.empty()) + { + result = Unexpected(formatError( + "output.write: path must not be empty")); + } + else if (files::isAbsolute(relPath)) + { + result = Unexpected(formatError( + "output.write: path '{}' must be relative", relPath)); + } + else + { + std::string const full = files::normalizePath( + files::appendPath(root_, relPath)); + if (!full.starts_with(root_)) + { + result = Unexpected(formatError( + "output.write: path '{}' escapes the output directory", + relPath)); + } + else + { + result = full; + } + } + return result; +} + +Expected +OutputSink:: +write(std::string_view relPath, std::string_view contents) +{ + MRDOCS_TRY(std::string full, resolveUnderRoot(relPath)); + MRDOCS_TRY(files::createDirectory(files::getParentDir(full))); + + std::ofstream os(full, std::ios::binary | std::ios::trunc); + Expected result; + if (!os) + { + result = Unexpected(formatError( + "output.write: cannot open '{}' for writing", full)); + } + else + { + os.write( + contents.data(), + static_cast(contents.size())); + if (!os) + { + result = Unexpected(formatError( + "output.write: failed writing '{}'", full)); + } + } + return result; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/OutputSink.hpp b/src/lib/Gen/script/OutputSink.hpp new file mode 100644 index 0000000000..808c6b7c8b --- /dev/null +++ b/src/lib/Gen/script/OutputSink.hpp @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_OUTPUTSINK_HPP +#define MRDOCS_LIB_GEN_SCRIPT_OUTPUTSINK_HPP + +#include +#include +#include +#include + +namespace mrdocs::script { + +/** The file-writing API handed to a script-driven generator. + + A script-driven generator owns its output structure: it decides + which files to write and what to put in them. This class is the only + door it has to the filesystem, bound into the script as the `write` + method of the `output` object. Every path is resolved under a single + output directory and may not escape it; i.e., a script cannot write + "anywhere on disk". +*/ +class OutputSink +{ + // The output directory, normalized and absolute, without a trailing + // separator. + std::string root_; + + // Resolve `relPath` under the output directory. Reject an empty path, + // an absolute path, or a path that escapes the directory. + Expected + resolveUnderRoot(std::string_view relPath) const; + +public: + /** Construct a sink rooted at the given output directory. + */ + explicit + OutputSink(std::string_view outputDir); + + /** Write `contents` to `relPath`, resolved under the output directory. + + Create any missing parent directories. Reject an absolute path + or one that escapes the output directory. + + @param relPath The destination path, relative to the output + directory. + @param contents The bytes to write. + @return Success, or an error describing why the write failed. + */ + Expected + write(std::string_view relPath, std::string_view contents); +}; + +} // mrdocs::script + +#endif diff --git a/src/lib/Gen/script/ScriptGenerator.cpp b/src/lib/Gen/script/ScriptGenerator.cpp new file mode 100644 index 0000000000..8f1649343d --- /dev/null +++ b/src/lib/Gen/script/ScriptGenerator.cpp @@ -0,0 +1,147 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptGenerator.hpp" +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +namespace { + +// Build the read-only corpus DOM a `generate(corpus, output)` entry +// point receives. This mirrors what an extension script sees: a +// `symbols` array of lazy per-symbol objects, each tagged with its flat +// `_id` so a script can form stable per-symbol URLs. +dom::Value +buildScriptCorpus(Corpus const& corpus, DomCorpus const& domCorpus) +{ + dom::Array symbols; + for (Symbol const& sym : corpus) + { + dom::Value value = domCorpus.get(sym.id); + value.getObject().set("_id", toBase16Str(sym.id)); + symbols.emplace_back(std::move(value)); + } + dom::Object corpusObj; + corpusObj.set("symbols", std::move(symbols)); + return dom::Value(std::move(corpusObj)); +} + +} // (anon) + +ScriptGenerator:: +ScriptGenerator(std::string id, std::string scriptPath, dom::Object params) + : id_(std::move(id)) + , scriptPath_(std::move(scriptPath)) + , params_(std::move(params)) +{ +} + +std::string_view +ScriptGenerator:: +id() const noexcept +{ + return id_; +} + +std::string_view +ScriptGenerator:: +displayName() const noexcept +{ + return id_; +} + +std::string_view +ScriptGenerator:: +fileExtension() const noexcept +{ + // A script-driven generator names its own output files, so there's + // no single extension. Report the id for diagnostics. + return id_; +} + +Expected +ScriptGenerator:: +build(std::string_view outputPath, Corpus const& corpus) const +{ + OutputSink sink(outputPath); + DomCorpus domCorpus(corpus); + dom::Value corpusValue = buildScriptCorpus(corpus, domCorpus); + dom::Value const config(corpus.config.object()); + dom::Value const params(params_); + Expected result; + if (scriptPath_.ends_with(".lua")) + { + result = runLuaGenerator( + corpusValue, scriptPath_, sink, config, params); + } + else if (scriptPath_.ends_with(".js")) + { + result = runJsGenerator( + corpusValue, scriptPath_, sink, config, params); + } + else + { + result = Unexpected(formatError( + "generator '{}': script '{}' must be a .lua or .js file", + id_, scriptPath_)); + } + return result; +} + +Expected +ScriptGenerator:: +buildOne(std::ostream&, Corpus const&) const +{ + return Unexpected(formatError( + "generator '{}' is script-driven and does not support " + "single-page output", id_)); +} + +Expected +discoverScriptGenerators(Config::Settings const& settings) +{ + MRDOCS_TRY( + std::vector found, + discoverGeneratorManifests(hbs::addon_paths::addonRoots(settings))); + for (DiscoveredManifest const& d : found) + { + // Only manifests that name a `script` are script-driven + // generators; the data-driven pass installs the rest. + // First-writer-wins, exactly as the data-driven pass: a + // duplicate id is a silent skip, and we never pass a `null`. + if (d.manifest.script) + { + std::string const name(files::getFileName(d.dir)); + std::string scriptPath = + files::appendPath(d.dir, *d.manifest.script); + (void)installGenerator( + std::make_unique( + name, std::move(scriptPath), d.manifest.params)); + } + } + return {}; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGenerator.hpp b/src/lib/Gen/script/ScriptGenerator.hpp new file mode 100644 index 0000000000..ff7a2aa44b --- /dev/null +++ b/src/lib/Gen/script/ScriptGenerator.hpp @@ -0,0 +1,101 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP +#define MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +/** A generator whose output is produced by a user script. + + A script-driven generator hands the whole emit to a Lua or + JavaScript entry point of the form + `generate(corpus, output, config, params)`: the script traverses the + corpus and writes whatever files it wants through the `output` + object, optionally reading the resolved `config` and its own + `params`. Because the script owns the output structure, it can + produce shapes the per-page generators cannot, such as a single + artifact aggregated across all symbols (a search index, for example). +*/ +class ScriptGenerator + : public Generator +{ + std::string id_; + // The absolute path to the Lua or JavaScript entry script. + std::string scriptPath_; + // The generator's own parameters, from the manifest's `params` + // mapping; passed to the entry script as its `params` argument. + dom::Object params_; + +public: + /** Construct a script-driven generator. + + @param id The generator id, used to select it on the command + line. + @param scriptPath The absolute path to the entry script. + @param params The generator's own parameters, from its manifest. + */ + ScriptGenerator( + std::string id, + std::string scriptPath, + dom::Object params); + + std::string_view + id() const noexcept override; + + std::string_view + displayName() const noexcept override; + + std::string_view + fileExtension() const noexcept override; + + /** Run the entry script, which owns the whole emit. + */ + Expected + build( + std::string_view outputPath, + Corpus const& corpus) const override; + + /** Reject single-page output. + + A script-driven generator owns its output structure and writes + whatever files it wants, so there is no single-stream form. + */ + Expected + buildOne( + std::ostream& os, + Corpus const& corpus) const override; +}; + +/** Discover script-driven generators and install them. + + For each configured addon root, walk the immediate subdirectories of + /generator/. A subdirectory becomes a script-driven generator + when its `mrdocs-generator.yml` names an entry script. The generator + id, used to select it on the command line, is the subdirectory name. + + Should be called once after the configuration is resolved and before + a generator is looked up by id. +*/ +Expected +discoverScriptGenerators(Config::Settings const& settings); + +} // mrdocs::script + +#endif diff --git a/src/lib/Gen/script/ScriptGeneratorJs.cpp b/src/lib/Gen/script/ScriptGeneratorJs.cpp new file mode 100644 index 0000000000..cd34361e29 --- /dev/null +++ b/src/lib/Gen/script/ScriptGeneratorJs.cpp @@ -0,0 +1,102 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" + +#include +#include +#include + +#include +#include + +namespace mrdocs::script { + +namespace { + +// Build the `output` object passed as the second argument to `generate`. +// The JavaScript wrapper exposes a `dom::Function` as a callable proxy, +// so unlike the Lua side this needs no escape hatch: `write` is a variadic +// invocable that routes to the sink. The sink outlives the call (it is a +// local in `runJsGenerator`), so capturing it by pointer is safe. +dom::Object +buildJsOutputApi(OutputSink& sink) +{ + OutputSink* sinkPtr = &sink; + dom::Object api; + api.set("write", dom::Value(dom::makeVariadicInvocable( + [sinkPtr](dom::Array const& args) -> Expected + { + if (args.size() < 2) + { + return Unexpected(Error( + "output.write: expected (path, contents)")); + } + dom::Value const path = args.get(0); + dom::Value const body = args.get(1); + if (!path.isString() || !body.isString()) + { + return Unexpected(Error( + "output.write: path and contents must be strings")); + } + Expected result = sinkPtr->write( + path.getString().get(), body.getString().get()); + if (!result) + { + return Unexpected(result.error()); + } + return dom::Value(); + }))); + return api; +} + +} // (anon) + +Expected +runJsGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params) +{ + js::Context ctx; + js::Scope scope(ctx); + + MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); + if (Expected exp = scope.script(script); !exp) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, exp.error().message())); + } + + // Unlike an extension, a generator must define `generate`. + Expected fn = scope.getGlobal("generate"); + if (!fn || !fn->isFunction()) + { + return Unexpected(formatError( + "generator '{}': script must define a 'generate' function", + scriptPath)); + } + + Expected result = + fn->call(corpus, buildJsOutputApi(sink), config, params); + if (!result) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, result.error().message())); + } + return {}; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGeneratorLua.cpp b/src/lib/Gen/script/ScriptGeneratorLua.cpp new file mode 100644 index 0000000000..306c2273f4 --- /dev/null +++ b/src/lib/Gen/script/ScriptGeneratorLua.cpp @@ -0,0 +1,146 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +extern "C" { +#include +#include +} + +namespace mrdocs::script { + +namespace { + +// Lua adapter for `OutputSink::write`. On failure, the script aborts via +// `luaL_error`; the host turns that into an `Unexpected` when `lua_pcall` +// returns non-OK. The sink pointer is carried as the closure's single +// upvalue. +int +luaWrite(lua_State* L) +{ + OutputSink* sink = static_cast( + lua_touserdata(L, lua_upvalueindex(1))); + if (lua_type(L, 1) != LUA_TSTRING || + lua_type(L, 2) != LUA_TSTRING) + { + return luaL_error(L, + "output.write: expected (string path, string contents)"); + } + std::size_t pathLen = 0; + char const* pathData = lua_tolstring(L, 1, &pathLen); + std::size_t bodyLen = 0; + char const* bodyData = lua_tolstring(L, 2, &bodyLen); + + Expected result = sink->write( + std::string_view(pathData, pathLen), + std::string_view(bodyData, bodyLen)); + if (!result) + { + return luaL_error(L, "%s", result.error().message().c_str()); + } + return 0; +} + +// Build the `output` global table and bind its `write` method. +// +// We register the C closure directly on the raw `lua_State` (via the +// `Context::nativeState()` escape hatch) because the wrapper cannot carry +// a native callable through a DOM value: `domValue_push` has no function +// case. The closure carries the sink pointer as its single upvalue. +void +registerLuaOutputApi(lua_State* L, OutputSink& sink) +{ + lua_newtable(L); + + lua_pushlightuserdata(L, &sink); + lua_pushcclosure(L, &luaWrite, 1); + lua_setfield(L, -2, "write"); + + lua_setglobal(L, "output"); +} + +} // (anon) + +Expected +runLuaGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params) +{ + lua::Context ctx; + + // Register the `output` global before loading the script so + // top-level code can reference it, and so we can pass it as the + // second argument below. + registerLuaOutputApi( + static_cast(ctx.nativeState()), sink); + + lua::Scope scope(ctx); + MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); + MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); + + Expected chunkResult = chunk.call(); + if (!chunkResult) + { + return Unexpected(chunkResult.error()); + } + + // Fetch the `output` global so it can be passed as the second + // argument. It must outlive the `generate` call below, so hold it + // here rather than moving it out. + Expected output = scope.getGlobal("output"); + if (!output) + { + return Unexpected(output.error()); + } + + auto callGenerate = + [&](lua::Function&& fn) -> Expected + { + Expected result = + fn.call(corpus, *output, config, params); + if (!result) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, result.error().message())); + } + return {}; + }; + + // A generator must define a global `generate` function, the same + // shape JavaScript requires. Accepting only the named global (rather + // than also a function the chunk returns) keeps one convention across + // both languages and leaves room for a script to expose more than one + // named entry point later. + Expected generateFn = scope.getGlobal("generate"); + if (!generateFn || !generateFn->isFunction()) + { + return Unexpected(formatError( + "generator '{}': script must define a 'generate' function", + scriptPath)); + } + return callGenerate(lua::Function(std::move(*generateFn))); +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptRunner.hpp b/src/lib/Gen/script/ScriptRunner.hpp new file mode 100644 index 0000000000..a1dae4ac4f --- /dev/null +++ b/src/lib/Gen/script/ScriptRunner.hpp @@ -0,0 +1,68 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP +#define MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP + +#include +#include +#include +#include + +namespace mrdocs::script { + +class OutputSink; + +/** Run a Lua entry script's `generate(corpus, output, config, params)`. + + Build a Lua context, expose the output writer as the `output` global, + evaluate the script, and call its `generate` function with the + corpus, the writer, the resolved configuration, and the generator's + own parameters. A missing `generate` function is an error. + + @param corpus The read-only corpus DOM passed as the first argument. + @param scriptPath The absolute path to the Lua entry script. + @param sink The file-writing API exposed to the script. + @param config The resolved configuration DOM, as templates see it. + @param params The generator's own parameters, from its manifest. +*/ +Expected +runLuaGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params); + +/** Run a JS entry script's `generate(corpus, output, config, params)`. + + Build a JavaScript context, evaluate the script, and call its + `generate` function with the corpus, an `output` object whose + `write` method routes to the writer, the resolved configuration, and + the generator's own parameters. A missing `generate` function is an + error. + + @param corpus The read-only corpus DOM passed as the first argument. + @param scriptPath The absolute path to the JavaScript entry script. + @param sink The file-writing API exposed to the script. + @param config The resolved configuration DOM, as templates see it. + @param params The generator's own parameters, from its manifest. +*/ +Expected +runJsGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params); + +} // mrdocs::script + +#endif diff --git a/src/test/TestRunner.cpp b/src/test/TestRunner.cpp index 4bf74862bb..724696b1a5 100644 --- a/src/test/TestRunner.cpp +++ b/src/test/TestRunner.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -189,6 +190,13 @@ handleFile( { return report::error("{}: \"{}\"", discovered.error(), filePath); } + Expected scriptsDiscovered = + script::discoverScriptGenerators(loaded->settings); + if (!scriptsDiscovered) + { + return report::error( + "{}: \"{}\"", scriptsDiscovered.error(), filePath); + } // The generator(s) come from the test's merged configuration. Accept // the comma-separated form and fall back to a single xml run. diff --git a/src/test/lib/Gen/hbs/DataDrivenGenerators.cpp b/src/test/lib/Gen/hbs/DataDrivenGenerators.cpp index 299a431ce5..f1edc1dae1 100644 --- a/src/test/lib/Gen/hbs/DataDrivenGenerators.cpp +++ b/src/test/lib/Gen/hbs/DataDrivenGenerators.cpp @@ -59,12 +59,12 @@ struct DataDrivenGeneratorsTest files::appendPath(td.path(), "g.yml"); writeFile(path, ""); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(result.has_value()); if (result) { // Empty map: every char passes through. - BOOST_TEST(applyEscape(result->escape, "abc*_") == "abc*_"); + BOOST_TEST(applyEscape(*result, "abc*_") == "abc*_"); } } @@ -79,11 +79,11 @@ struct DataDrivenGeneratorsTest // explicitly tolerates extra keys for forward compatibility. writeFile(path, "displayName: Markdown\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(result.has_value()); if (result) { - BOOST_TEST(applyEscape(result->escape, "abc") == "abc"); + BOOST_TEST(applyEscape(*result, "abc") == "abc"); } } @@ -101,12 +101,12 @@ struct DataDrivenGeneratorsTest " '*': '\\*'\n" " '_': '\\_'\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(result.has_value()); if (result) { - BOOST_TEST(applyEscape(result->escape, "*foo_bar*") == "\\*foo\\_bar\\*"); - BOOST_TEST(applyEscape(result->escape, "no specials") == "no specials"); + BOOST_TEST(applyEscape(*result, "*foo_bar*") == "\\*foo\\_bar\\*"); + BOOST_TEST(applyEscape(*result, "no specials") == "no specials"); } } @@ -120,7 +120,7 @@ struct DataDrivenGeneratorsTest // Top-level scalar is rejected. writeFile(path, "just a string\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(!result.has_value()); } @@ -134,7 +134,7 @@ struct DataDrivenGeneratorsTest // 'escape:' must be a mapping, not a scalar. writeFile(path, "escape: nope\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(!result.has_value()); } @@ -154,15 +154,15 @@ struct DataDrivenGeneratorsTest " '**': ''\n" " '*': ''\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(result.has_value()); if (result) { - BOOST_TEST(applyEscape(result->escape, "**foo**") == "foo"); - BOOST_TEST(applyEscape(result->escape, "*bar*") == "bar"); + BOOST_TEST(applyEscape(*result, "**foo**") == "foo"); + BOOST_TEST(applyEscape(*result, "*bar*") == "bar"); // A leftover lone `*` after a `**` match falls back to the // single-byte rule. - BOOST_TEST(applyEscape(result->escape, "***") == ""); + BOOST_TEST(applyEscape(*result, "***") == ""); } } @@ -180,11 +180,11 @@ struct DataDrivenGeneratorsTest "escape:\n" " '\xC3\xA9': 'e'\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(result.has_value()); if (result) { - BOOST_TEST(applyEscape(result->escape, "caf\xC3\xA9") == "cafe"); + BOOST_TEST(applyEscape(*result, "caf\xC3\xA9") == "cafe"); } } @@ -199,7 +199,7 @@ struct DataDrivenGeneratorsTest "escape:\n" " '': 'x'\n"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(!result.has_value()); } @@ -211,7 +211,7 @@ struct DataDrivenGeneratorsTest std::string const path = files::appendPath(td.path(), "does-not-exist.yml"); - Expected result = loadGeneratorMetadata(path); + Expected result = loadGeneratorMetadata(path); BOOST_TEST(!result.has_value()); } diff --git a/src/tool/GenerateAction.cpp b/src/tool/GenerateAction.cpp index 7789f7385b..50e148b8dc 100644 --- a/src/tool/GenerateAction.cpp +++ b/src/tool/GenerateAction.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -49,14 +50,16 @@ DoGenerateAction( // -------------------------------------------------------------- // - // Discover data-driven generators + // Discover addon-defined generators // // -------------------------------------------------------------- - // Each /generator// directory that ships its own - // Handlebars layouts is registered as an additional generator - // (subject to id and layout-template checks) before the user- - // requested generator is looked up below. + // Each /generator// directory that ships a + // mrdocs-generator.yml is registered as an additional generator + // before the user-requested generator is looked up below. A manifest + // that declares `escape` rules is a data-driven Handlebars generator; + // a manifest that names a `script` is a script-driven generator. MRDOCS_TRY(hbs::discoverDataDrivenGenerators(config->settings())); + MRDOCS_TRY(script::discoverScriptGenerators(config->settings())); // -------------------------------------------------------------- // From ef56f950f79f1f07f795a32cf9e5d4d3b1b916a9 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 11:59:34 +0200 Subject: [PATCH 03/12] test: add unit tests for script-driven generators This covers discovery (a script manifest installs a `ScriptGenerator`), the output writer (writes under the root, rejects absolute and escaping paths), and both runners against a synthetic corpus, asserting the file they emit. A regression test reads a symbol with no name field, exercising the `Undefined`-to-`nil` marshaling. --- src/test/lib/Gen/script/ScriptGenerator.cpp | 487 ++++++++++++++++++++ 1 file changed, 487 insertions(+) create mode 100644 src/test/lib/Gen/script/ScriptGenerator.cpp diff --git a/src/test/lib/Gen/script/ScriptGenerator.cpp b/src/test/lib/Gen/script/ScriptGenerator.cpp new file mode 100644 index 0000000000..f6d32f3911 --- /dev/null +++ b/src/test/lib/Gen/script/ScriptGenerator.cpp @@ -0,0 +1,487 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +namespace { + +// Write `content` verbatim to `path`. Pre-existing files are truncated. +void +writeFile(std::string_view path, std::string_view content) +{ + std::ofstream os(std::string{path}, std::ios::binary | std::ios::trunc); + os.write(content.data(), + static_cast(content.size())); +} + +// The `config` and `params` arguments a generator receives. Tests that +// exercise only the `corpus` and `output` path pass empty objects. +dom::Value +emptyObject() +{ + return dom::Value(dom::Object()); +} + +// A minimal `Config` whose `object()` returns a canned DOM object. It +// lets a `build()`-level test assert what `generate` sees as `config` +// without creating a real `ConfigImpl`. +struct StubConfig + : Config +{ + Config::Settings settings_; + dom::Object configObject; + mutable ThreadPool pool; + + ThreadPool& + threadPool() const noexcept override + { + return pool; + } + Config::Settings const& + settings() const noexcept override + { + return settings_; + } + dom::Object const& + object() const override + { + return configObject; + } +}; + +// An empty corpus: `build()` iterates no symbols, so it never reflects +// a real `Symbol`; it carries only the `Config` that `build()` reads +// `config` from. +struct StubCorpus + : Corpus +{ + explicit + StubCorpus(Config const& config) + : Corpus(config) + { + } + + static Symbol const* + noNext(Corpus const*, Symbol const*) + { + return nullptr; + } + iterator + begin() const noexcept override + { + return iterator(this, nullptr, &noNext); + } + iterator + end() const noexcept override + { + return iterator(this, nullptr, &noNext); + } + std::size_t + size() const noexcept override + { + return 0; + } + Expected + lookup(SymbolID const&, std::string_view) const override + { + return Unexpected(Error("stub corpus has no symbols")); + } + Symbol const* + find(SymbolID const&) const noexcept override + { + return nullptr; + } + void + qualifiedName(Symbol const&, std::string&) const override + { + } + void + qualifiedName(Symbol const&, SymbolID const&, std::string&) const override + { + } +}; + +// A two-symbol corpus shaped like what `generate(corpus, output)` sees: +// a `symbols` array whose entries carry a `name` and a flat `_id`. +dom::Value +makeCorpus() +{ + dom::Object foo; + foo.set("name", std::string("foo")); + foo.set("_id", std::string("0001")); + dom::Object bar; + bar.set("name", std::string("bar")); + bar.set("_id", std::string("0002")); + dom::Array symbols; + symbols.emplace_back(dom::Value(std::move(foo))); + symbols.emplace_back(dom::Value(std::move(bar))); + dom::Object corpus; + corpus.set("symbols", std::move(symbols)); + return dom::Value(std::move(corpus)); +} + +// A Lua generator that emits one aggregated artifact across all symbols, +// the canonical thing a per-page generator cannot produce. +constexpr std::string_view luaIndex = R"LUA( +function generate(corpus, output) + local parts = {} + for _, sym in ipairs(corpus.symbols) do + parts[#parts + 1] = '{"name":"' .. sym.name .. '","id":"' .. sym._id .. '"}' + end + output.write("search-index.json", "[" .. table.concat(parts, ",") .. "]") +end +)LUA"; + +// The same generator in JavaScript, using the global-function shape. +constexpr std::string_view jsIndex = R"JS( +function generate(corpus, output) { + var parts = []; + for (var i = 0; i < corpus.symbols.length; i++) { + var s = corpus.symbols[i]; + parts.push('{"name":"' + s.name + '","id":"' + s._id + '"}'); + } + output.write("search-index.json", "[" + parts.join(",") + "]"); +} +)JS"; + +constexpr std::string_view expectedJson = + R"([{"name":"foo","id":"0001"},{"name":"bar","id":"0002"}])"; + +} // (anon) + +struct ScriptGeneratorTest +{ + // + // OutputSink + // + + void + testSinkWritesUnderRoot() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // A nested relative path is created and written. + BOOST_TEST(sink.write("a/b/out.txt", "hello").has_value()); + Expected got = + files::getFileText(files::appendPath(td.path(), "a", "b", "out.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "hello"); + } + } + + void + testSinkRejectsAbsolutePath() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // An absolute path is rejected even when it points inside root. + std::string const abs = files::appendPath(td.path(), "x.txt"); + BOOST_TEST(!sink.write(abs, "no").has_value()); + } + + void + testSinkRejectsEscape() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // A path that climbs out of the output directory is rejected. + BOOST_TEST(!sink.write("../escaped.txt", "no").has_value()); + } + + // + // runLuaGenerator / runJsGenerator + // + + void + testLuaGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, luaIndex); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runLuaGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "search-index.json")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == expectedJson); + } + } + + void + testJsGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.js"); + writeFile(script, jsIndex); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runJsGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "search-index.json")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == expectedJson); + } + } + + void + testLuaReadsMissingFieldAsNil() + { + // A symbol object without a `name` field: `get("name")` yields + // `Undefined`, which Lua must marshal as `nil` rather than abort. + // The global namespace has no name, so a real corpus hits this. + dom::Object noName; + noName.set("_id", std::string("0009")); + dom::Array symbols; + symbols.emplace_back(dom::Value(std::move(noName))); + dom::Object corpusObj; + corpusObj.set("symbols", std::move(symbols)); + dom::Value const corpus(std::move(corpusObj)); + + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +function generate(corpus, output) + local s = corpus.symbols[1] + output.write("out.txt", "name=" .. (s.name or "NONE")) +end +)LUA"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runLuaGenerator( + corpus, script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "out.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "name=NONE"); + } + } + + void + testMissingGenerateIsError() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "empty.lua"); + writeFile(script, "-- this script defines no generate function\n"); + OutputSink sink(files::appendPath(td.path(), "out")); + // A generator must define `generate`; its absence is an error. + BOOST_TEST(!runLuaGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()) + .has_value()); + } + + void + testLuaReceivesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +function generate(corpus, output, config, params) + output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) +end +)LUA"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + dom::Object config; + config.set("multipage", true); + dom::Object params; + params.set("greeting", std::string("hi")); + Expected result = runLuaGenerator( + makeCorpus(), script, sink, + dom::Value(std::move(config)), dom::Value(std::move(params))); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + void + testJsReceivesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.js"); + writeFile(script, R"JS( +function generate(corpus, output, config, params) { + output.write("o.txt", String(config.multipage) + "|" + params.greeting); +} +)JS"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + dom::Object config; + config.set("multipage", true); + dom::Object params; + params.set("greeting", std::string("hi")); + Expected result = runJsGenerator( + makeCorpus(), script, sink, + dom::Value(std::move(config)), dom::Value(std::move(params))); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + // The full `build()` path: `config` comes from + // `corpus.config.object()` and `params` from the generator's + // manifest, both reaching the script. + void + testBuildPassesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen-build"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +function generate(corpus, output, config, params) + output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) +end +)LUA"); + + StubConfig config; + config.configObject.set("multipage", true); + StubCorpus corpus(config); + + dom::Object params; + params.set("greeting", std::string("hi")); + ScriptGenerator gen("build-selftest", script, std::move(params)); + + std::string const outDir = files::appendPath(td.path(), "out"); + Expected result = gen.build(outDir, corpus); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + // + // discoverScriptGenerators + // + + void + testDiscoveryRegistersScriptGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen-disc"); + BOOST_TEST(td); + // Lay out /generator// with a script manifest. The id + // is unusual so it does not collide with the process-global + // registry shared across the test binary. + std::string const id = "mrdocs-script-generator-selftest"; + std::string const genDir = + files::appendPath(td.path(), "generator", id); + BOOST_TEST(files::createDirectory(genDir).has_value()); + writeFile( + files::appendPath(genDir, "mrdocs-generator.yml"), + "script: g.lua\n"); + writeFile(files::appendPath(genDir, "g.lua"), luaIndex); + + Config::Settings settings; + settings.addons = std::string(td.path()); + BOOST_TEST(discoverScriptGenerators(settings).has_value()); + BOOST_TEST(findGenerator(id) != nullptr); + } + + void + testManifestParamsParsed() + { + ScopedTempDirectory td("mrdocs-scriptgen-manifest"); + BOOST_TEST(td); + std::string const yml = + files::appendPath(td.path(), "mrdocs-generator.yml"); + writeFile(yml, "script: g.lua\nparams:\n greeting: hi\n"); + Expected manifest = loadGeneratorManifest(yml); + BOOST_TEST(manifest.has_value()); + if (manifest) + { + dom::Value const greeting = manifest->params.get("greeting"); + BOOST_TEST(greeting.isString()); + if (greeting.isString()) + { + BOOST_TEST(greeting.getString().get() == "hi"); + } + } + } + + void + run() + { + testSinkWritesUnderRoot(); + testSinkRejectsAbsolutePath(); + testSinkRejectsEscape(); + testLuaGenerator(); + testJsGenerator(); + testLuaReadsMissingFieldAsNil(); + testMissingGenerateIsError(); + testLuaReceivesConfigAndParams(); + testJsReceivesConfigAndParams(); + testBuildPassesConfigAndParams(); + testDiscoveryRegistersScriptGenerator(); + testManifestParamsParsed(); + } +}; + +TEST_SUITE( + ScriptGeneratorTest, + "clang.mrdocs.script.ScriptGenerator"); + +} // namespace mrdocs::script From 73ba40110c58827307688c67aa80c315a3898b4f Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 13:01:07 +0200 Subject: [PATCH 04/12] docs: document script-driven generators --- docs/modules/ROOT/nav.adoc | 1 + .../extensions/data-driven-generators.adoc | 2 + .../extensions/script-driven-generators.adoc | 76 +++++++++++++++++++ docs/mrdocs.schema.json | 2 +- src/lib/ConfigOptions.json | 2 +- 5 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 docs/modules/ROOT/pages/extensions/script-driven-generators.adoc diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index a7f35d774c..109e8db934 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -26,6 +26,7 @@ ** xref:extensions/corpus-extensions.adoc[Corpus Extensions] ** xref:extensions/handlebars-extensions.adoc[Handlebars Extensions] ** xref:extensions/data-driven-generators.adoc[Data-Driven Generators] +** xref:extensions/script-driven-generators.adoc[Script-Driven Generators] ** xref:extensions/antora.adoc[Antora Extensions] ** xref:extensions/as-library.adoc[Mr.Docs as a Library] ** xref:extensions/dom-reference.adoc[DOM Reference] diff --git a/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc b/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc index f9bf461fc1..479e5c1cf9 100644 --- a/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc +++ b/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc @@ -246,4 +246,6 @@ include::example$data-driven-generators/tex/simple.tex[] ---- ====== +To build the output structure yourself, e.g. one file per namespace or a single aggregated artifact like a search index, hand the whole emit to a script instead of rendering one page per symbol. See xref:extensions/script-driven-generators.adoc[Script-driven generators]. + diff --git a/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc b/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc new file mode 100644 index 0000000000..042bf1acb9 --- /dev/null +++ b/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc @@ -0,0 +1,76 @@ += Script-driven generators + +A data-driven generator renders one page per symbol from templates. When you need a different output structure, e.g. one file per namespace, or a single artifact aggregated across every symbol, such as a search index, a template generator cannot express it, because the page-per-symbol shape is fixed by the host. A script-driven generator hands the whole emit to a Lua or JavaScript script, which traverses the corpus and writes whatever files it wants. No C++ and no templates are involved. + +A generator directory is script-driven when its mrdocs-generator.yml names an entry script: + +[source,yaml] +---- +script: generator.lua +---- + +The `script` key holds a path to a Lua (.lua) or JavaScript (.js) file, relative to the generator directory. Naming a script is what distinguishes the two flavors: a manifest with a `script` key is script-driven, otherwise the directory is a data-driven (template) generator. As with template generators, the directory name is the generator id you select with `--generator`. + +== The `generate` entry point + +The script defines a single entry point, a function named `generate`: + +[source,lua] +---- +function generate(corpus, output, config, params) + -- ... +end +---- + +`corpus.symbols` is the array of every symbol. Each symbol carries the same fields the template and helper layers see, plus a flat `_id` string suitable as a stable per-symbol URL fragment. + +`output.write(relativePath, contents)` writes one file under the configured output directory, which is the path specified with `--output` on the command line, or with the `output` key in the config file; that's the same location the built-in generators write to. The path is resolved relative to that directory and may not escape it; an absolute path or one that climbs above the output directory is rejected. Parent directories are created as needed. + +Because the script owns the output, it also owns what a per-page generator would otherwise do for it: the URLs it emits, and any escaping of the content it writes. The host does not apply an escape map to a script-driven generator's output. + +`config` is the resolved configuration: the same object the templates receive, holding every value from the config file and the command line. See xref:configuration/reference.adoc[the configuration reference] for the available keys. + +`params` is this generator's own parameters, read from the optional `params:` mapping in its mrdocs-generator.yml. A scalar value is a string (a script coerces numbers or booleans itself); nested mappings and sequences become objects and arrays. It is an empty object when the manifest declares no parameters. For example: + +[source,yaml] +---- +script: generator.lua +params: + title: API Reference +---- + +makes `params.title` available to the script. + +`config` and `params` are trailing arguments, so a generator that needs neither can omit them, and use `function generate(corpus, output)`. + +Both Lua and JavaScript look up `generate` as a global function, so a generator must define one; a value the script returns is not used. Requiring the named global keeps one convention across the two languages and leaves room for a script to expose more than one named entry point later. + +Unlike a corpus-transform extension, whose hook is optional, a generator must define a `generate` function: selecting the generator is a request for output, so a missing entry point is an error. + +== Example: a search index + +This generator emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce: + +[source,lua] +---- +-- Quote a string as a JSON value. +local function json_string(s) + s = s:gsub('\\', '\\\\'):gsub('"', '\\"') + return '"' .. s .. '"' +end + +function generate(corpus, output) + local entries = {} + for _, sym in ipairs(corpus.symbols) do + local name = sym.name or "" + if name ~= "" then + entries[#entries + 1] = + '{"name":' .. json_string(name) .. + ',"url":' .. json_string(sym._id .. ".html") .. "}" + end + end + output.write( + "search-index.json", + "[" .. table.concat(entries, ",") .. "]") +end +---- diff --git a/docs/mrdocs.schema.json b/docs/mrdocs.schema.json index ce32a2a957..d95bf2a4e6 100644 --- a/docs/mrdocs.schema.json +++ b/docs/mrdocs.schema.json @@ -282,7 +282,7 @@ "default": [ "html" ], - "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "title": "Generator(s) used to create the documentation" }, "global-namespace-index": { diff --git a/src/lib/ConfigOptions.json b/src/lib/ConfigOptions.json index 2d78701c5b..96216f6a91 100644 --- a/src/lib/ConfigOptions.json +++ b/src/lib/ConfigOptions.json @@ -443,7 +443,7 @@ { "name": "generator", "brief": "Generator(s) used to create the documentation", - "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "type": "string-list", "default": ["html"] }, From 82de2c0ef50c9b2f67d1534ab48a49a3b060143d Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Tue, 9 Jun 2026 15:13:35 +0200 Subject: [PATCH 05/12] docs: add a runnable script-driven generator example This adds a self-contained search-index generator that the docs page includes and CI runs. The extensions/script-driven-generators.adoc example section now includes the manifest and the generate.lua from this fixture, so the documented example is exactly the one the test runs. --- CMakeLists.txt | 13 +++++++ docs/antora-playbook.yml | 2 ++ .../extensions/script-driven-generators.adoc | 36 ++++++++----------- .../generator/search-index/generate.lua | 20 +++++++++++ .../search-index/mrdocs-generator.yml | 1 + .../script-driven/search-index/mrdocs.yml | 9 +++++ .../script-driven/search-index/run.sh | 2 ++ .../script-driven/search-index/simple.cpp | 16 +++++++++ 8 files changed, 78 insertions(+), 21 deletions(-) create mode 100644 examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua create mode 100644 examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml create mode 100644 examples/generators/script-driven/search-index/mrdocs.yml create mode 100644 examples/generators/script-driven/search-index/run.sh create mode 100644 examples/generators/script-driven/search-index/simple.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f14c094af5..a688422a3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -563,6 +563,19 @@ if (MRDOCS_BUILD_TESTS) endforeach () endforeach () + #------------------------------------------------- + # Script-driven generator example + #------------------------------------------------- + add_test( + NAME mrdocs-generator-script-driven-search-index + COMMAND bash run.sh + --addons=${CMAKE_CURRENT_SOURCE_DIR}/share/mrdocs/addons + --output=${CMAKE_CURRENT_BINARY_DIR}/generator-examples/search-index/reference-output + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/examples/generators/script-driven/search-index + ) + set_property(TEST mrdocs-generator-script-driven-search-index PROPERTY + ENVIRONMENT "MRDOCS=$") + #------------------------------------------------- # Library-usage examples # diff --git a/docs/antora-playbook.yml b/docs/antora-playbook.yml index 76a346cf4a..a67066dad3 100644 --- a/docs/antora-playbook.yml +++ b/docs/antora-playbook.yml @@ -71,6 +71,8 @@ antora: strip_page_wrapper: true - source: examples/generators/data-driven target: data-driven-generators + - source: examples/generators/script-driven + target: script-driven-generators - source: examples target: examples - require: ./extensions/commands-registry-extension.js diff --git a/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc b/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc index 042bf1acb9..90b14d633f 100644 --- a/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc +++ b/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc @@ -49,28 +49,22 @@ Unlike a corpus-transform extension, whose hook is optional, a generator must de == Example: a search index -This generator emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce: +A complete, runnable example lives at `examples/generators/script-driven/search-index/`. It emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce. -[source,lua] +The manifest names the script: + +.`addons/generator/search-index/mrdocs-generator.yml` +[source,yaml] +---- +include::example$script-driven-generators/search-index/addons/generator/search-index/mrdocs-generator.yml[] ---- --- Quote a string as a JSON value. -local function json_string(s) - s = s:gsub('\\', '\\\\'):gsub('"', '\\"') - return '"' .. s .. '"' -end -function generate(corpus, output) - local entries = {} - for _, sym in ipairs(corpus.symbols) do - local name = sym.name or "" - if name ~= "" then - entries[#entries + 1] = - '{"name":' .. json_string(name) .. - ',"url":' .. json_string(sym._id .. ".html") .. "}" - end - end - output.write( - "search-index.json", - "[" .. table.concat(entries, ",") .. "]") -end +The script itself: + +.`addons/generator/search-index/generate.lua` +[source,lua] ---- +include::example$script-driven-generators/search-index/addons/generator/search-index/generate.lua[] +---- + +Select it with `--generator=search-index`; it writes search-index.json into the output directory. diff --git a/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua b/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua new file mode 100644 index 0000000000..d6a468ed4f --- /dev/null +++ b/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua @@ -0,0 +1,20 @@ +-- Quote a string as a JSON value. +local function json_string(s) + s = s:gsub('\\', '\\\\'):gsub('"', '\\"') + return '"' .. s .. '"' +end + +function generate(corpus, output) + local entries = {} + for _, sym in ipairs(corpus.symbols) do + local name = sym.name or "" + if name ~= "" then + entries[#entries + 1] = + '{"name":' .. json_string(name) .. + ',"url":' .. json_string(sym._id .. ".html") .. "}" + end + end + output.write( + "search-index.json", + "[" .. table.concat(entries, ",") .. "]") +end diff --git a/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml b/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml new file mode 100644 index 0000000000..79a2356a56 --- /dev/null +++ b/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml @@ -0,0 +1 @@ +script: generate.lua diff --git a/examples/generators/script-driven/search-index/mrdocs.yml b/examples/generators/script-driven/search-index/mrdocs.yml new file mode 100644 index 0000000000..5086ba1fb3 --- /dev/null +++ b/examples/generators/script-driven/search-index/mrdocs.yml @@ -0,0 +1,9 @@ +addons-supplemental: + - addons +generator: search-index +multipage: false +show-namespaces: false +warn-if-undocumented: false +source-root: . +input: + - . diff --git a/examples/generators/script-driven/search-index/run.sh b/examples/generators/script-driven/search-index/run.sh new file mode 100644 index 0000000000..fbe147c2c9 --- /dev/null +++ b/examples/generators/script-driven/search-index/run.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec "${MRDOCS:-mrdocs}" --config=mrdocs.yml "$@" diff --git a/examples/generators/script-driven/search-index/simple.cpp b/examples/generators/script-driven/search-index/simple.cpp new file mode 100644 index 0000000000..46d7894f55 --- /dev/null +++ b/examples/generators/script-driven/search-index/simple.cpp @@ -0,0 +1,16 @@ +/// A vector in the Euclidean plane. +struct Vector +{ + /** The length (magnitude) of the vector. + + @return The Euclidean length. + */ + double length() const; + + /** Scale the vector componentwise. + + @param sx Factor applied to the x component. + @param sy Factor applied to the y component. + */ + void scale(double sx, double sy); +}; From bd19a89cb59798b6571066d97257096194b0e232 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Thu, 11 Jun 2026 12:19:40 +0200 Subject: [PATCH 06/12] feat(extensions): declare corpus transforms with register_transform This replaces the reserved-name `transform_corpus(corpus)` entry point with an explicit `register_transform(fn)` call. A script may register any number of transforms; each runs once, in registration order, against a navigable DOM view of the corpus it can read and mutate in place. The function is captured as a `dom::Function` on both languages. Lua anchors it in `LUA_REGISTRYINDEX` via the new `lua::makeCallable`, not a storage global, so no new ownerless global state is introduced. A script that registers nothing warns and is otherwise a no-op, so an empty script is tolerated. --- .../pages/extensions/corpus-extensions.adoc | 10 +- include/mrdocs/Support/Lua.hpp | 21 +++ src/lib/Extensions/AddonDiscovery.cpp | 82 +++++++++--- src/lib/Extensions/AddonDiscovery.hpp | 10 +- src/lib/Extensions/CorpusDom.hpp | 4 +- src/lib/Extensions/JsBinding.cpp | 91 +++++++++++-- src/lib/Extensions/JsBinding.hpp | 10 +- src/lib/Extensions/LuaBinding.cpp | 126 ++++++++++++++---- src/lib/Extensions/LuaBinding.hpp | 10 +- src/lib/Extensions/RunExtensions.hpp | 32 ++--- src/lib/Support/Lua.cpp | 48 +++++++ .../addons/extensions/transforms.js | 35 +++++ .../js-register-transform/mrdocs.yml | 6 + .../register_transform.cpp | 3 + .../register_transform.xml | 46 +++++++ .../js-set-name/addons/extensions/rename.js | 4 +- .../lua-clear-doc/addons/extensions/clear.lua | 4 +- .../addons/extensions/empty.lua | 6 +- .../addons/extensions/non_transform.lua | 6 +- .../lua-empty-script/empty_script.cpp | 4 +- .../lua-empty-script/empty_script.xml | 13 +- .../addons/primary/extensions/zzz-primary.lua | 4 +- .../extensions/aaa-supplemental.lua | 4 +- .../addons/extensions/transforms.lua | 27 ++++ .../lua-register-transform/mrdocs.yml | 6 + .../register_transform.cpp | 3 + .../register_transform.xml | 46 +++++++ .../lua-set-name/addons/extensions/rename.lua | 4 +- .../addons/extensions/replace_return.lua | 4 +- .../addons/extensions/brief_from_name.js | 31 +++-- .../addons/extensions/brief_from_name.lua | 4 +- .../entry-point/addons/extensions/noop.js | 4 +- .../entry-point/addons/extensions/noop.lua | 4 +- .../addons/extensions/parse_format_relates.js | 43 +++--- .../extensions/parse_format_relates.lua | 14 +- .../addons/extensions/subclass_tree.js | 18 +-- .../addons/extensions/subclass_tree.lua | 11 +- 37 files changed, 607 insertions(+), 191 deletions(-) create mode 100644 test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js create mode 100644 test-files/golden-tests/extensions/js-register-transform/mrdocs.yml create mode 100644 test-files/golden-tests/extensions/js-register-transform/register_transform.cpp create mode 100644 test-files/golden-tests/extensions/js-register-transform/register_transform.xml create mode 100644 test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua create mode 100644 test-files/golden-tests/extensions/lua-register-transform/mrdocs.yml create mode 100644 test-files/golden-tests/extensions/lua-register-transform/register_transform.cpp create mode 100644 test-files/golden-tests/extensions/lua-register-transform/register_transform.xml diff --git a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc index 7421cb442d..625fd4ae86 100644 --- a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc +++ b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc @@ -16,7 +16,7 @@ Both scripting languages reach the same `mrdocs` API. The choice is a trade-off, == Accessing the corpus -A script extends Mr.Docs by defining a function named `transform_corpus(corpus)`. Mr.Docs calls it once per loaded script with a flat read-only view of the corpus. A script that doesn't define `transform_corpus` is silently ignored at this step. +A script extends Mr.Docs by calling `register_transform(fn)` with a function that takes the corpus. Mr.Docs invokes each registered function once, in registration order, with a flat view of the corpus. A script can register several transforms, or none at all; if it registers nothing, Mr.Docs warns that the script had no effect and moves on. [tabs] ====== @@ -48,7 +48,7 @@ JavaScript:: .`addons/extensions/count_by_kind.js` [source,javascript] ---- -function transform_corpus(corpus) { +register_transform(function(corpus) { var counts = {}; for (var i = 0; i < corpus.symbols.length; ++i) { var k = corpus.symbols[i].kind; @@ -57,7 +57,7 @@ function transform_corpus(corpus) { for (var k in counts) { console.log(k + ": " + counts[k]); } -} +}); ---- Lua:: @@ -65,7 +65,7 @@ Lua:: .`addons/extensions/count_by_kind.lua` [source,lua] ---- -function transform_corpus(corpus) +register_transform(function(corpus) local counts = {} for _, sym in ipairs(corpus.symbols) do counts[sym.kind] = (counts[sym.kind] or 0) + 1 @@ -73,7 +73,7 @@ function transform_corpus(corpus) for k, v in pairs(counts) do print(k .. ": " .. v) end -end +end) ---- ====== diff --git a/include/mrdocs/Support/Lua.hpp b/include/mrdocs/Support/Lua.hpp index ee5a8f4f00..49ee4a1fa1 100644 --- a/include/mrdocs/Support/Lua.hpp +++ b/include/mrdocs/Support/Lua.hpp @@ -674,6 +674,27 @@ registerHelper( Context& ctx, std::string_view script); +/** Expose a registry-anchored Lua function as a dom::Function. + + `ref` must be a reference obtained from + `luaL_ref(L, LUA_REGISTRYINDEX)` for a function value, where `L` is + `ctx`'s native state. The returned function invokes that Lua function, + marshalling its arguments and result as DOM values, so a Lua callable + can be held and called like any other @ref dom::Function. + + The returned function takes ownership of the reference and keeps the + context alive: the Lua function is released (`luaL_unref`) when the + last copy of the returned function is destroyed. The anchor lives in + the registry, so no Lua global is introduced. + + @param ctx The context whose registry holds the function. + @param ref The `luaL_ref` reference to the function. + @return A dom::Function that calls the anchored Lua function. +*/ +[[nodiscard]] MRDOCS_DECL +dom::Function +makeCallable(Context ctx, int ref); + } // lua } // mrdocs diff --git a/src/lib/Extensions/AddonDiscovery.cpp b/src/lib/Extensions/AddonDiscovery.cpp index de50f45db0..b69d6e57a6 100644 --- a/src/lib/Extensions/AddonDiscovery.cpp +++ b/src/lib/Extensions/AddonDiscovery.cpp @@ -12,40 +12,88 @@ #include +#include #include #include +#include +#include namespace mrdocs { +namespace { + +// Append the entry script for one immediate child of an extensions/ +// directory: a .lua or .js file is an extension. Anything else is +// ignored. +void +collectEntry( + std::filesystem::directory_entry const& entry, + std::vector& scripts) +{ + std::error_code ec; + if (entry.is_regular_file(ec)) + { + std::string path = entry.path().string(); + if (path.ends_with(".lua") || path.ends_with(".js")) + { + scripts.push_back(std::move(path)); + } + } +} + +// Append every entry script found in one extensions/ directory to +// `scripts`, returning any filesystem error hit while iterating. +Expected +collectFromDirectory( + std::string const& dir, + std::vector& scripts) +{ + namespace fs = std::filesystem; + std::error_code ec; + fs::directory_iterator const end{}; + for (fs::directory_iterator it(dir, ec); + !ec && it != end; + it.increment(ec)) + { + collectEntry(*it, scripts); + } + Expected result; + if (ec) + { + result = Unexpected(formatError("{}: {}", dir, ec.message())); + } + return result; +} + +} // (anon) + Expected> collectExtensionScripts(Config const& config) { std::vector scripts; + Expected status; std::vector const roots = addonRoots(config); for (std::string const& root : roots) { std::string const dir = files::appendPath(root, "extensions"); - if (files::exists(dir)) + if (status.has_value() && files::exists(dir)) { - Expected exp = forEachFile(dir, true, - [&](std::string_view pathName) -> Expected - { - if (pathName.ends_with(".lua") || - pathName.ends_with(".js")) - { - scripts.emplace_back(pathName); - } - return {}; - }); - if (!exp) - { - return Unexpected(exp.error()); - } + status = collectFromDirectory(dir, scripts); } } - std::sort(scripts.begin(), scripts.end()); - return scripts; + + Expected> result; + if (status.has_value()) + { + std::sort(scripts.begin(), scripts.end()); + result = std::move(scripts); + } + else + { + result = Unexpected(status.error()); + } + return result; } } // mrdocs diff --git a/src/lib/Extensions/AddonDiscovery.hpp b/src/lib/Extensions/AddonDiscovery.hpp index 897dafa454..17ed767207 100644 --- a/src/lib/Extensions/AddonDiscovery.hpp +++ b/src/lib/Extensions/AddonDiscovery.hpp @@ -18,12 +18,12 @@ namespace mrdocs { -/** Return the extension scripts across every addon root. +/** Return the extension entry scripts across every addon root. - Walks `/extensions/` under each addon root and gathers every - `.lua` and `.js` file, sorted alphabetically by full path. The two - languages are interleaved so behavior doesn't depend on which - language a user happens to write in - only on file names. + Walks the immediate children of the extensions/ directory under each + addon root: a .lua or .js file is an extension. The result is sorted + alphabetically by full path, interleaving the two languages so + behavior depends only on file names. */ Expected> collectExtensionScripts(Config const& config); diff --git a/src/lib/Extensions/CorpusDom.hpp b/src/lib/Extensions/CorpusDom.hpp index 8620e44d03..fd61f5b6e0 100644 --- a/src/lib/Extensions/CorpusDom.hpp +++ b/src/lib/Extensions/CorpusDom.hpp @@ -15,8 +15,8 @@ namespace mrdocs { class CorpusImpl; -/** Build the `corpus` argument passed to a script's - `transform_corpus(corpus)` entry point. +/** Build the `corpus` argument passed to each registered corpus + transform. The returned value is a small object: diff --git a/src/lib/Extensions/JsBinding.cpp b/src/lib/Extensions/JsBinding.cpp index 5ca79d46d9..f4bac662d2 100644 --- a/src/lib/Extensions/JsBinding.cpp +++ b/src/lib/Extensions/JsBinding.cpp @@ -17,9 +17,64 @@ #include #include #include +#include + +#include +#include namespace mrdocs { +namespace { + +// Bind `register_transform` as the script-facing entry point before the +// script runs. A JavaScript function bridges to a `dom::Function`, so +// each registered callable is captured as a DOM value in `transforms`, +// in registration order. The collector outlives every invocation because +// the script (which does the registering) runs while it is in scope. +void +registerJsExtensionApi(js::Scope& scope, dom::Array& transforms) +{ + scope.setGlobal( + "register_transform", + dom::Value(dom::makeVariadicInvocable( + [&transforms](dom::Array const& args) + -> Expected + { + Expected result; + if (args.empty() || !args.get(0).isFunction()) + { + result = Unexpected(Error( + "register_transform: expected a function argument")); + } + else + { + transforms.push_back(args.get(0)); + } + return result; + }))); +} + +// Invoke one registered transform with the corpus, tagging any failure +// with the script path for context. +Expected +invokeTransform( + dom::Value const& transform, + dom::Value const& corpus, + std::string const& scriptPath) +{ + Expected invoked = transform.getFunction().try_invoke(corpus); + Expected result; + if (!invoked.has_value()) + { + result = Unexpected(formatError( + "extension '{}': {}", + scriptPath, invoked.error().message())); + } + return result; +} + +} // (anon) + Expected runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) { @@ -28,6 +83,9 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) js::registerStdGlobals(scope); + dom::Array transforms; + registerJsExtensionApi(scope, transforms); + // The corpus argument is a small navigable object: an array of // per-symbol proxies plus `get(id)` / `lookup(name)` functions. // Everything else a script does runs through that proxy: direct @@ -35,27 +93,34 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) dom::Value corpusValue = buildCorpusDom(corpus); MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); - if (Expected exp = scope.script(script); !exp) + + // Running the script is what calls `register_transform`. + Expected result = scope.script(script); + if (!result.has_value()) { - return Unexpected(formatError( + result = Unexpected(formatError( "extension '{}': {}", - scriptPath, exp.error().message())); + scriptPath, result.error().message())); } - - Expected fn = scope.getGlobal("transform_corpus"); - if (!fn || !fn->isFunction()) + else if (transforms.empty()) { - return {}; + // A discovered script that registers nothing is almost always a + // mistake (a misspelled `register_transform`, or a guard that + // skipped it), so flag it rather than silently doing nothing. + report::warn("extension '{}' registered nothing", scriptPath); } - Expected result = fn->call(corpusValue); - if (!result) + // Invoke each declared transform with the corpus, in registration + // order, stopping at the first failure. + for (std::size_t i = 0; i < transforms.size(); ++i) { - return Unexpected(formatError( - "extension '{}': {}", - scriptPath, result.error().message())); + if (result.has_value()) + { + result = invokeTransform( + transforms.get(i), corpusValue, scriptPath); + } } - return {}; + return result; } } // mrdocs diff --git a/src/lib/Extensions/JsBinding.hpp b/src/lib/Extensions/JsBinding.hpp index 07d18fdb5b..e761a96356 100644 --- a/src/lib/Extensions/JsBinding.hpp +++ b/src/lib/Extensions/JsBinding.hpp @@ -21,10 +21,12 @@ class CorpusImpl; /** Run one JavaScript extension script against the corpus. - Builds a fresh JS context, exposes the `mrdocs` global object, - evaluates the script, and invokes `transform_corpus(corpus)` if - defined. A script that defines no such function is silently - skipped, so an empty `.js` file is valid. + Build a fresh JS context, evaluate the script, and run every corpus + transform it declares by calling `register_transform(fn)`. Each + registered function is invoked once, in registration order, with a + navigable DOM view of the corpus that it can read and mutate in place. + A script that registers nothing causes a warning and otherwise has no + effect, so an empty .js file is tolerated. */ Expected runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath); diff --git a/src/lib/Extensions/LuaBinding.cpp b/src/lib/Extensions/LuaBinding.cpp index 78261f94c9..17d0eb0e49 100644 --- a/src/lib/Extensions/LuaBinding.cpp +++ b/src/lib/Extensions/LuaBinding.cpp @@ -16,15 +16,96 @@ #include #include #include +#include +#include #include +#include + +extern "C" { +#include +#include +} namespace mrdocs { +namespace { + +// Collects the corpus transforms an extension declares through +// `register_transform`. Each registered Lua function is anchored in the +// Lua registry (never a global) and exposed as a `dom::Function` over the +// live corpus, so both scripting languages funnel through one call path. +// The collector rides as the `register_transform` closure's single +// upvalue. +struct LuaRegistrations +{ + lua::Context* ctx = nullptr; + std::vector transforms; +}; + +// `register_transform(fn)`: anchor `fn` in the registry and record it as +// a callable, so the host can invoke it once the chunk has run. +int +luaRegisterTransform(lua_State* L) +{ + LuaRegistrations* regs = static_cast( + lua_touserdata(L, lua_upvalueindex(1))); + int result = 0; + if (lua_type(L, 1) != LUA_TFUNCTION) + { + result = luaL_error(L, + "register_transform: expected a function argument"); + } + else + { + lua_pushvalue(L, 1); + int const ref = luaL_ref(L, LUA_REGISTRYINDEX); + regs->transforms.push_back(lua::makeCallable(*regs->ctx, ref)); + } + return result; +} + +// Bind `register_transform` as the script-facing entry point before the +// chunk runs, the same way a script generator's `output` object is bound. +// The collector pointer is carried as the closure's single upvalue. +void +registerLuaExtensionApi(lua::Context& ctx, LuaRegistrations& regs) +{ + regs.ctx = &ctx; + lua_State* L = static_cast(ctx.nativeState()); + lua_pushlightuserdata(L, ®s); + lua_pushcclosure(L, &luaRegisterTransform, 1); + lua_setglobal(L, "register_transform"); +} + +// Invoke one registered transform with the corpus, tagging any failure +// with the script path for context. +Expected +invokeTransform( + dom::Function const& transform, + dom::Value const& corpus, + std::string const& scriptPath) +{ + Expected invoked = transform.try_invoke(corpus); + Expected result; + if (!invoked.has_value()) + { + result = Unexpected(formatError( + "extension '{}': {}", + scriptPath, invoked.error().message())); + } + return result; +} + +} // (anon) + Expected runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) { lua::Context ctx; + LuaRegistrations regs; + registerLuaExtensionApi(ctx, regs); + lua::Scope scope(ctx); // The corpus argument is a small navigable object: an array of @@ -37,40 +118,29 @@ runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); - Expected chunkResult = chunk.call(); - if (!chunkResult) - { - return Unexpected(chunkResult.error()); - } + // Running the chunk's top-level code is what calls + // `register_transform`; the chunk's own return value is unused. + MRDOCS_TRY(chunk.call()); - // Resolve `transform_corpus`. Prefer the chunk's return value - // (the `return function(...) ... end` idiom); fall back to a - // same-named global (the `function name(...)` idiom). If neither - // yields a function, the extension has nothing to do. - auto callTransform = - [&](lua::Function&& fn) -> Expected - { - Expected result = fn.call(corpusValue); - if (!result) - { - return Unexpected(formatError( - "extension '{}': {}", - scriptPath, result.error().message())); - } - return {}; - }; - - if (chunkResult->isFunction()) + // A discovered script that registers nothing is almost always a + // mistake (a misspelled `register_transform`, or a guard that skipped + // it), so flag it rather than silently doing nothing. + if (regs.transforms.empty()) { - return callTransform(lua::Function(std::move(*chunkResult))); + report::warn("extension '{}' registered nothing", scriptPath); } - Expected global = scope.getGlobal("transform_corpus"); - if (!global || !global->isFunction()) + // Invoke each declared transform with the corpus, in registration + // order, stopping at the first failure. + Expected result; + for (dom::Function const& transform : regs.transforms) { - return {}; + if (result.has_value()) + { + result = invokeTransform(transform, corpusValue, scriptPath); + } } - return callTransform(lua::Function(std::move(*global))); + return result; } } // mrdocs diff --git a/src/lib/Extensions/LuaBinding.hpp b/src/lib/Extensions/LuaBinding.hpp index 2059dae6fa..96c0841183 100644 --- a/src/lib/Extensions/LuaBinding.hpp +++ b/src/lib/Extensions/LuaBinding.hpp @@ -21,10 +21,12 @@ class CorpusImpl; /** Run one Lua extension script against the corpus. - Builds a fresh Lua context, exposes the `mrdocs` global, evaluates - the script, and invokes `transform_corpus(corpus)` if defined. - A script that defines no such function is silently skipped, so an - empty `.lua` file is valid. + Build a fresh Lua context, evaluate the script, and run every corpus + transform it declares by calling `register_transform(fn)`. Each + registered function is invoked once, in registration order, with a + navigable DOM view of the corpus that it can read and mutate in place. + A script that registers nothing causes a warning and otherwise has no + effect, so an empty .lua file is tolerated. */ Expected runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath); diff --git a/src/lib/Extensions/RunExtensions.hpp b/src/lib/Extensions/RunExtensions.hpp index 470b3f21c9..b63506b36e 100644 --- a/src/lib/Extensions/RunExtensions.hpp +++ b/src/lib/Extensions/RunExtensions.hpp @@ -20,24 +20,20 @@ class CorpusImpl; /** Run user-provided extension scripts against the corpus. - Extensions live in /extensions/.{lua,js} for each - addon root declared in the configuration (primary `addons` plus - `addons-supplemental`). Each script may export a function named - `transform_corpus(corpus)`; the function is invoked once with a flat - DOM view of the corpus that the script can read, and may mutate the - corpus by calling functions on the pre-registered `mrdocs` global - table or object: - - - `mrdocs.set(symbol_id, field, value)` - assign a new value to - one of the allowlisted fields of a symbol. The setter validates - its arguments and raises an error on misuse. - - Any uncaught error inside a script aborts the build. Scripts are run - in alphabetical order by file path, with the two languages - interleaved so behavior doesn't depend on which language a user - chose. Extensions intentionally fire after all finalizers and - before any generator runs, so mutations are visible to every - output format. + Extensions are discovered under each addon root's extensions/ + directory (the primary addons plus addons-supplemental): a .lua or + .js file is an extension. Each script declares corpus transforms by + calling `register_transform(fn)`; every registered function is + invoked once, in registration order, with a navigable DOM view of the + corpus. A transform reads the corpus through that view and mutates it + by assigning to symbol fields (for example `sym.name = "..."`), which + writes through to the live symbol. + + Any uncaught error inside a script aborts the build. Scripts run in + alphabetical order by full path, with the two languages interleaved so + behavior doesn't depend on which language a user chose. Extensions + fire after all finalizers and before any generator runs, so mutations + are visible to every output format. */ Expected runExtensions(CorpusImpl& corpus); diff --git a/src/lib/Support/Lua.cpp b/src/lib/Support/Lua.cpp index d65e55dd37..4c7b45c2b7 100644 --- a/src/lib/Support/Lua.cpp +++ b/src/lib/Support/Lua.cpp @@ -1555,6 +1555,42 @@ invokeHelperRef( return result; } +// Invoke the registry-anchored function with every argument (there is no +// Handlebars options object to strip), converting the Lua result back to +// a `dom::Value`. Errors from `lua_pcall` surface as `Unexpected`. +static +Expected +invokeRef( + std::shared_ptr const& handle, + dom::Array const& args) +{ + Scope scope(handle->ctx); + Access A(scope); + + lua_rawgeti(A, LUA_REGISTRYINDEX, handle->ref); + + std::size_t const narg = args.size(); + for (std::size_t i = 0; i < narg; ++i) + { + Param p(args.get(i)); + Access::push(p, scope); + } + + // A default-constructed `Expected` holds a value; both branches below + // overwrite it, so the default is never observed. + Expected result; + if (lua_pcall(A, static_cast(narg), 1, 0) == LUA_OK) + { + result = luaToDom(A, lua_gettop(A)); + lua_pop(A, 1); + } + else + { + result = Unexpected(luaM_popError(A)); + } + return result; +} + } // detail Expected @@ -1617,6 +1653,18 @@ registerHelper( return {}; } +dom::Function +makeCallable(Context ctx, int ref) +{ + auto handle = std::make_shared( + std::move(ctx), ref); + return dom::makeVariadicInvocable( + [handle](dom::Array const& args) -> Expected + { + return detail::invokeRef(handle, args); + }); +} + //------------------------------------------------ void diff --git a/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js new file mode 100644 index 0000000000..7de823947d --- /dev/null +++ b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js @@ -0,0 +1,35 @@ +// Declare two corpus transforms with `register_transform`. Both run, in +// registration order, so one extension can contribute several +// transforms. The first renames every function; the second rewrites +// its brief. Mirrors the lua-register-transform fixture on the JS path. + +register_transform(function(corpus) +{ + for (var i = 0; i < corpus.symbols.length; ++i) + { + var sym = corpus.symbols[i]; + if (sym.kind === "function") + { + sym.name = "renamed_" + sym.name; + } + } +}); + +register_transform(function(corpus) +{ + for (var i = 0; i < corpus.symbols.length; ++i) + { + var sym = corpus.symbols[i]; + if (sym.kind === "function") + { + sym.doc = { + brief: { + children: [ + { kind: "text", + literal: "Brief from the second transform" } + ] + } + }; + } + } +}); diff --git a/test-files/golden-tests/extensions/js-register-transform/mrdocs.yml b/test-files/golden-tests/extensions/js-register-transform/mrdocs.yml new file mode 100644 index 0000000000..cde6450530 --- /dev/null +++ b/test-files/golden-tests/extensions/js-register-transform/mrdocs.yml @@ -0,0 +1,6 @@ +addons-supplemental: + - addons +generator: xml +multipage: false +warn-if-undocumented: false +source-root: . diff --git a/test-files/golden-tests/extensions/js-register-transform/register_transform.cpp b/test-files/golden-tests/extensions/js-register-transform/register_transform.cpp new file mode 100644 index 0000000000..9d0a6b2c22 --- /dev/null +++ b/test-files/golden-tests/extensions/js-register-transform/register_transform.cpp @@ -0,0 +1,3 @@ +/// A function whose name and brief are rewritten by two registered +/// transforms. +void target_function(); diff --git a/test-files/golden-tests/extensions/js-register-transform/register_transform.xml b/test-files/golden-tests/extensions/js-register-transform/register_transform.xml new file mode 100644 index 0000000000..18f3bc303b --- /dev/null +++ b/test-files/golden-tests/extensions/js-register-transform/register_transform.xml @@ -0,0 +1,46 @@ + + + + + + namespace + //////////////////////////8= + regular + + HuxRZuBJaL6YnoJa2a7IFMcNqvo= + + + + renamed_target_function + + + register_transform.cpp + register_transform.cpp + 3 + 1 + 1 + + + function + HuxRZuBJaL6YnoJa2a7IFMcNqvo= + regular + //////////////////////////8= + + + brief + + text + Brief from the second transform + + + + + + identifier + void + + + normal + + diff --git a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js index 11c6a7bc48..487a2e2c60 100644 --- a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js +++ b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js @@ -7,7 +7,7 @@ // proxy's `set` trap forwards each assignment into the live C++ // Symbol via reflection. -function transform_corpus(corpus) +register_transform(function(corpus) { for (var i = 0; i < corpus.symbols.length; ++i) { @@ -25,4 +25,4 @@ function transform_corpus(corpus) }; } } -} +}); diff --git a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua index 2ae0753dd0..7b97f0dc58 100644 --- a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua +++ b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua @@ -3,10 +3,10 @@ -- doc-comment so the rendered output contains no doc-comment block -- for it. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" then sym.doc = nil end end -end +end) diff --git a/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/empty.lua b/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/empty.lua index 7509709033..95424ca1a1 100644 --- a/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/empty.lua +++ b/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/empty.lua @@ -1,3 +1,3 @@ --- Intentionally empty: no `transform_corpus` defined. The docs --- promise that MrDocs silently skips such scripts so a file can be --- empty during development without breaking the build. +-- Intentionally empty: registers no transform. MrDocs warns that the +-- script had no effect but still completes the build, so a file can be +-- empty during development. diff --git a/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/non_transform.lua b/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/non_transform.lua index 9eb4744ce1..4dfb6f8f13 100644 --- a/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/non_transform.lua +++ b/test-files/golden-tests/extensions/lua-empty-script/addons/extensions/non_transform.lua @@ -1,5 +1,5 @@ --- A file that defines globals but no `transform_corpus`. The docs --- say MrDocs silently skips such scripts: the global below should --- have no effect on the rendered output. +-- A file that defines globals but registers no transform. MrDocs warns +-- that it had no effect but still completes the build: the global below +-- does not change the rendered output. unrelated_helper = function(x) return x + 1 end diff --git a/test-files/golden-tests/extensions/lua-empty-script/empty_script.cpp b/test-files/golden-tests/extensions/lua-empty-script/empty_script.cpp index 61458e30de..0be1a6e470 100644 --- a/test-files/golden-tests/extensions/lua-empty-script/empty_script.cpp +++ b/test-files/golden-tests/extensions/lua-empty-script/empty_script.cpp @@ -1,4 +1,4 @@ /// A function untouched by any extension - the test verifies that -/// loading an extension file that does NOT define `transform_corpus` -/// completes the build cleanly and leaves the corpus alone. +/// loading an extension that registers no transform completes the +/// build cleanly and leaves the corpus alone. void unchanged_function(); diff --git a/test-files/golden-tests/extensions/lua-empty-script/empty_script.xml b/test-files/golden-tests/extensions/lua-empty-script/empty_script.xml index 2dff972b68..66b9ee05cc 100644 --- a/test-files/golden-tests/extensions/lua-empty-script/empty_script.xml +++ b/test-files/golden-tests/extensions/lua-empty-script/empty_script.xml @@ -31,18 +31,7 @@ brief text - A function untouched by any extension - the test verifies that loading an extension file that does NOT define - - - code - - text - transform_corpus - - - - text - completes the build cleanly and leaves the corpus alone. + A function untouched by any extension - the test verifies that loading an extension that registers no transform completes the build cleanly and leaves the corpus alone. diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua index 9f76e55f06..e97d1d1a66 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua @@ -5,10 +5,10 @@ -- so this script runs FIRST; its rename is overwritten by the -- supplemental's. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" then sym.name = "from_primary" end end -end +end) diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua index 0dbd520fd2..b093586d87 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua @@ -3,10 +3,10 @@ -- overwrites the primary root's, so this is the name that must -- appear in the rendered output. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" then sym.name = "from_supplemental" end end -end +end) diff --git a/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua new file mode 100644 index 0000000000..04c28f2540 --- /dev/null +++ b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua @@ -0,0 +1,27 @@ +-- Declare two corpus transforms with `register_transform`. Both run, in +-- registration order, so one extension can contribute several +-- transforms. The first renames every function; the second rewrites +-- its brief. + +register_transform(function(corpus) + for _, sym in ipairs(corpus.symbols) do + if sym.kind == "function" then + sym.name = "renamed_" .. sym.name + end + end +end) + +register_transform(function(corpus) + for _, sym in ipairs(corpus.symbols) do + if sym.kind == "function" then + sym.doc = { + brief = { + children = { + { kind = "text", + literal = "Brief from the second transform" } + } + } + } + end + end +end) diff --git a/test-files/golden-tests/extensions/lua-register-transform/mrdocs.yml b/test-files/golden-tests/extensions/lua-register-transform/mrdocs.yml new file mode 100644 index 0000000000..cde6450530 --- /dev/null +++ b/test-files/golden-tests/extensions/lua-register-transform/mrdocs.yml @@ -0,0 +1,6 @@ +addons-supplemental: + - addons +generator: xml +multipage: false +warn-if-undocumented: false +source-root: . diff --git a/test-files/golden-tests/extensions/lua-register-transform/register_transform.cpp b/test-files/golden-tests/extensions/lua-register-transform/register_transform.cpp new file mode 100644 index 0000000000..9d0a6b2c22 --- /dev/null +++ b/test-files/golden-tests/extensions/lua-register-transform/register_transform.cpp @@ -0,0 +1,3 @@ +/// A function whose name and brief are rewritten by two registered +/// transforms. +void target_function(); diff --git a/test-files/golden-tests/extensions/lua-register-transform/register_transform.xml b/test-files/golden-tests/extensions/lua-register-transform/register_transform.xml new file mode 100644 index 0000000000..18f3bc303b --- /dev/null +++ b/test-files/golden-tests/extensions/lua-register-transform/register_transform.xml @@ -0,0 +1,46 @@ + + + + + + namespace + //////////////////////////8= + regular + + HuxRZuBJaL6YnoJa2a7IFMcNqvo= + + + + renamed_target_function + + + register_transform.cpp + register_transform.cpp + 3 + 1 + 1 + + + function + HuxRZuBJaL6YnoJa2a7IFMcNqvo= + regular + //////////////////////////8= + + + brief + + text + Brief from the second transform + + + + + + identifier + void + + + normal + + diff --git a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua index 177fee85d3..eb9c511133 100644 --- a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua +++ b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua @@ -6,7 +6,7 @@ -- corpus.symbols is a regular Lua sequence: 1-indexed, with `#` and -- `ipairs`/`pairs` support. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" then sym.name = "renamed_" .. sym.name @@ -20,4 +20,4 @@ function transform_corpus(corpus) } end end -end +end) diff --git a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua index 7f19b7347b..d1b84138f4 100644 --- a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua +++ b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua @@ -26,7 +26,7 @@ -- This fixture omits the lookup and uses a bare identifier so the -- test is self-contained. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" and sym.name == "target_function" then sym.returnType = { @@ -38,4 +38,4 @@ function transform_corpus(corpus) } end end -end +end) diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js index e53883820c..133afd46c2 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js @@ -7,26 +7,25 @@ // sentence on every declaration. Anything an author already wrote is // preserved: only missing fields are filled in. -function transform_corpus(corpus) { +register_transform(function(corpus) { for (var i = 0; i < corpus.symbols.length; ++i) { var sym = corpus.symbols[i]; - if (sym.kind !== "function") { continue; } - if (sym.name.indexOf("is_") !== 0) { continue; } + if (sym.kind === "function" && sym.name.indexOf("is_") === 0) { + if (!sym.doc) { sym.doc = {}; } - if (!sym.doc) { sym.doc = {}; } + var subject = sym.name.slice(3).replace(/_/g, " "); - var subject = sym.name.slice(3).replace(/_/g, " "); + if (!sym.doc.brief) { + sym.doc.brief = "Returns true if " + subject + "."; + } - if (!sym.doc.brief) { - sym.doc.brief = "Returns true if " + subject + "."; - } - - if (sym.params.length === 1 - && (!sym.doc.params || sym.doc.params.length === 0)) { - sym.doc.params = [{ - name: sym.params[0].name, - children: "The input examined for the " + subject + " property." - }]; + if (sym.params.length === 1 + && (!sym.doc.params || sym.doc.params.length === 0)) { + sym.doc.params = [{ + name: sym.params[0].name, + children: "The input examined for the " + subject + " property." + }]; + } } } -} +}); diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua index 406f1ae333..6834b3d2cb 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua @@ -7,7 +7,7 @@ -- sentence on every declaration. Anything an author already wrote is -- preserved: only missing fields are filled in. -function transform_corpus(corpus) +register_transform(function(corpus) for _, sym in ipairs(corpus.symbols) do if sym.kind == "function" and sym.name:sub(1, 3) == "is_" then @@ -31,4 +31,4 @@ function transform_corpus(corpus) end end end -end +end) diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js index ecbeaf29a3..1e57a7b4d3 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js @@ -1,3 +1,3 @@ -function transform_corpus(corpus) { +register_transform(function(corpus) { // walk corpus.symbols, assign to the fields you want to change -} +}); diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua index c6e63973ec..6869ab4abb 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua @@ -1,3 +1,3 @@ -function transform_corpus(corpus) +register_transform(function(corpus) -- walk corpus.symbols, assign to the fields you want to change -end +end) diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js index 328168b81c..b298e8aa22 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js @@ -6,31 +6,32 @@ // anyone writing `@see` by hand. function partnerName(name) { + var partner = null; if (name.indexOf("parse_") === 0) { - return "format_" + name.slice(6); + partner = "format_" + name.slice(6); + } else if (name.indexOf("format_") === 0) { + partner = "parse_" + name.slice(7); } - if (name.indexOf("format_") === 0) { - return "parse_" + name.slice(7); - } - return null; + return partner; } -function transform_corpus(corpus) { +register_transform(function(corpus) { for (var i = 0; i < corpus.symbols.length; ++i) { var s = corpus.symbols[i]; - if (s.kind !== "function") { continue; } - var pname = partnerName(s.name); - if (!pname) { continue; } - var partner = corpus.lookup(pname); - if (!partner) { continue; } - s.doc = { - sees: [{ - children: [{ - kind: "reference", - literal: pname, - id: partner.id - }] - }] - }; + if (s.kind === "function") { + var pname = partnerName(s.name); + var partner = pname ? corpus.lookup(pname) : null; + if (partner) { + s.doc = { + sees: [{ + children: [{ + kind: "reference", + literal: pname, + id: partner.id + }] + }] + }; + } + } } -} +}); diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua index eaa7c3504b..d4a4efdabf 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua @@ -6,16 +6,16 @@ -- anyone writing `@see` by hand. local function partnerName(name) + local partner = nil if name:sub(1, 6) == "parse_" then - return "format_" .. name:sub(7) + partner = "format_" .. name:sub(7) + elseif name:sub(1, 7) == "format_" then + partner = "parse_" .. name:sub(8) end - if name:sub(1, 7) == "format_" then - return "parse_" .. name:sub(8) - end - return nil + return partner end -function transform_corpus(corpus) +register_transform(function(corpus) for _, s in ipairs(corpus.symbols) do if s.kind == "function" then local pname = partnerName(s.name) @@ -37,4 +37,4 @@ function transform_corpus(corpus) end end end -end +end) diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js index fb412b6587..990a1d2c8d 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js @@ -9,15 +9,17 @@ function listSubclasses(corpus, sym, indent) { for (var i = 0; i < sym.derived.length; ++i) { var child = corpus.get(sym.derived[i]); - if (!child) { continue; } - console.log(indent + child.name); - listSubclasses(corpus, child, indent + " "); + if (child) { + console.log(indent + child.name); + listSubclasses(corpus, child, indent + " "); + } } } -function transform_corpus(corpus) { +register_transform(function(corpus) { var base = corpus.lookup("Shape"); - if (!base) { return; } - console.log(base.name); - listSubclasses(corpus, base, " "); -} + if (base) { + console.log(base.name); + listSubclasses(corpus, base, " "); + } +}); diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua index e58940c905..b2b53465a7 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua @@ -16,9 +16,10 @@ local function listSubclasses(corpus, sym, indent) end end -function transform_corpus(corpus) +register_transform(function(corpus) local base = corpus.lookup("Shape") - if not base then return end - print(base.name) - listSubclasses(corpus, base, " ") -end + if base then + print(base.name) + listSubclasses(corpus, base, " ") + end +end) From 1f02213209c4308b604c71354bd6cea81e1120c7 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Fri, 12 Jun 2026 09:34:26 +0200 Subject: [PATCH 07/12] fix: a JavaScript function value weakly references its interpreter `js::Context::~Context` tore the JerryScript interpreter down on the first `Context` destruction, breaking the reference cycle between the `Impl` and the native holders that each keep a `shared_ptr` to it. A `dom::Function` obtained from a JavaScript value holds only that `shared_ptr`, so once the originating `Context` was gone the function referred to a freed interpreter and calling it crashed. So, count only the live `js::Context` instances that share an interpreter and tear it down when the last one goes away. A function value converted to a `dom::Function` holds a weak reference and locks it on each call: once the interpreter is gone the call returns an error instead of touching freed memory. Code that needs such a function to outlive the `Context` that produced it keeps a `Context` alive separately. Single-context use is unchanged: the count drops to zero on that `Context`'s destruction and cleanup runs as before. Handlebars helpers written in JavaScript are unaffected; they hold the value directly rather than through the function-value conversion. --- src/lib/Support/JavaScript.cpp | 55 +++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/src/lib/Support/JavaScript.cpp b/src/lib/Support/JavaScript.cpp index 442bf72cab..e7ff819600 100644 --- a/src/lib/Support/JavaScript.cpp +++ b/src/lib/Support/JavaScript.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // Copyright (c) 2025 Alan de Freitas (alandefreitas@gmail.com) +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) // // Official repository: https://github.com/cppalliance/mrdocs // @@ -462,6 +463,15 @@ struct Context::Impl { // Flag set while cleanup/jerry_cleanup is running to suppress deleters. bool cleaning_up = false; + // Number of live `js::Context` instances (the original plus any copies) + // sharing this interpreter. The interpreter is torn down when this + // reaches zero. JS function values converted to a `dom::Function` only + // weakly reference the interpreter, so keeping one runnable past its + // originating `Context` means holding a `Context` copy elsewhere (the + // corpus does this for a registered generator). Atomic because the last + // reference may be released from any thread. + std::atomic context_refs{0}; + // Serialize access to this JerryScript context (single-threaded engine). mutable std::recursive_mutex mtx; @@ -675,15 +685,34 @@ lockContext(std::shared_ptr const& impl) return ContextActivation(impl); } -Context::Context() : impl_(std::make_shared()) {} +Context::Context() : impl_(std::make_shared()) +{ + impl_->context_refs.store(1, std::memory_order_relaxed); +} -Context::Context(Context const& other) noexcept = default; +Context::Context(Context const& other) noexcept + : impl_(other.impl_) +{ + if (impl_) + { + impl_->context_refs.fetch_add(1, std::memory_order_relaxed); + } +} Context::~Context() { - // Clean up the JerryScript context before releasing impl_. - // DomValueHolder objects keep shared_ptr, so cleanup breaks that cycle. - if (impl_) + // Tear the interpreter down once the last `Context` referencing it goes + // away (`context_refs` counts live `Context` instances and their copies). + // A `dom::Function` obtained from a JS value holds only a weak reference, + // so it never keeps the interpreter alive on its own; code that needs such + // a function to outlive this `Context` (a corpus that stores a + // `register_generator` function) keeps a `Context` copy alive instead. The + // `DomValueHolder` / `FunctionHolder` objects keep a `shared_ptr`, so + // the interpreter owns them through a cycle; `cleanup()` breaks that cycle + // by tearing down the holders, which is why teardown is explicit here + // rather than left to `~Impl`. + if (impl_ && + impl_->context_refs.fetch_sub(1, std::memory_order_acq_rel) == 1) { impl_->cleanup(); } @@ -2085,9 +2114,21 @@ toDomValue(jerry_value_t v, std::shared_ptr const& impl) delete h; }); + // The function value does not own the interpreter: it captures a weak + // reference and locks it on each call. Code that needs the function to + // outlive the `Context` that produced it (a corpus that stores a + // `register_generator` function) keeps a `js::Context` alive + // separately; if nothing does, a later call reports an error rather + // than using a freed interpreter. return dom::makeVariadicInvocable( - [fnHandle, - impl](dom::Array const& args) -> Expected { + [fnHandle, weak_impl = std::weak_ptr(impl)]( + dom::Array const& args) -> Expected { + std::shared_ptr impl = weak_impl.lock(); + if (!impl || !impl->alive || !impl->jerry_ctx) + { + return Unexpected(Error( + "JavaScript interpreter is no longer available")); + } auto lock = lockContext(impl); std::vector jsArgs; jsArgs.reserve(args.size()); From 9bbf86219d8ecf3425ca3d39e93f0469e59b2ab5 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Fri, 12 Jun 2026 15:35:47 +0200 Subject: [PATCH 08/12] feat: declare output generators from extension scripts An extension script now defines an output generator with `register_generator(id, fn)`, alongside any `register_transform` it declares, rather than a generator directory shipping a mrdocs-generator.yml that names a script. Both hooks receive one `ctx` object: `ctx.corpus` and `ctx.config` for a transform, and additionally `ctx.output` for a generator. This replaces the positional `generate(corpus, output, config, params)` entry point and the separate manifest-script discovery pass. A registered generator is a `dom::Function` the corpus owns, because the generator registry is a process-global that is never cleared. The build populates the corpus with the registered generators while extensions run; `GenerateAction` then resolves the requested generator from the corpus before falling back to the registry. A single language-agnostic runner invokes the function, so the two per-language generator runners collapse into one path. The manifest now carries only the data-driven generator fields, its escape rules and the parent it extends; the `script` and `params` keys are gone. The search-index example moves under addons/extensions and declares its generator with `register_generator`. --- CMakeLists.txt | 1 + docs/mrdocs.schema.json | 2 +- .../addons/extensions/search_index.lua | 31 ++ .../generator/search-index/generate.lua | 20 - .../search-index/mrdocs-generator.yml | 1 - src/lib/ConfigOptions.json | 2 +- src/lib/CorpusImpl.cpp | 32 ++ src/lib/CorpusImpl.hpp | 44 ++ src/lib/Extensions/CorpusDom.cpp | 9 + src/lib/Extensions/CorpusDom.hpp | 15 +- src/lib/Extensions/JsBinding.cpp | 85 +++- src/lib/Extensions/JsBinding.hpp | 14 +- src/lib/Extensions/LuaBinding.cpp | 88 ++-- src/lib/Extensions/LuaBinding.hpp | 14 +- src/lib/Gen/GeneratorManifest.cpp | 81 ---- src/lib/Gen/GeneratorManifest.hpp | 57 +-- src/lib/Gen/hbs/DataDrivenGenerators.cpp | 18 +- src/lib/Gen/hbs/DataDrivenGenerators.hpp | 4 - src/lib/Gen/script/ScriptGenerator.cpp | 152 +++---- src/lib/Gen/script/ScriptGenerator.hpp | 92 ++--- src/lib/Gen/script/ScriptGeneratorJs.cpp | 102 ----- src/lib/Gen/script/ScriptGeneratorLua.cpp | 146 ------- src/lib/Gen/script/ScriptRunner.hpp | 68 --- src/test/TestRunner.cpp | 8 - src/test/lib/Gen/script/ScriptGenerator.cpp | 387 ++++++------------ src/tool/GenerateAction.cpp | 72 ++-- .../addons/extensions/transforms.js | 12 +- .../js-set-name/addons/extensions/rename.js | 6 +- .../lua-clear-doc/addons/extensions/clear.lua | 4 +- .../addons/primary/extensions/zzz-primary.lua | 4 +- .../extensions/aaa-supplemental.lua | 4 +- .../addons/extensions/transforms.lua | 8 +- .../lua-set-name/addons/extensions/rename.lua | 6 +- .../addons/extensions/replace_return.lua | 6 +- .../addons/extensions/brief_from_name.js | 6 +- .../addons/extensions/brief_from_name.lua | 4 +- .../entry-point/addons/extensions/noop.js | 4 +- .../entry-point/addons/extensions/noop.lua | 4 +- .../addons/extensions/parse_format_relates.js | 8 +- .../extensions/parse_format_relates.lua | 6 +- .../addons/extensions/subclass_tree.js | 12 +- .../addons/extensions/subclass_tree.lua | 12 +- 42 files changed, 602 insertions(+), 1049 deletions(-) create mode 100644 examples/generators/script-driven/search-index/addons/extensions/search_index.lua delete mode 100644 examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua delete mode 100644 examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml delete mode 100644 src/lib/Gen/script/ScriptGeneratorJs.cpp delete mode 100644 src/lib/Gen/script/ScriptGeneratorLua.cpp delete mode 100644 src/lib/Gen/script/ScriptRunner.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a688422a3b..8a4eab2206 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -483,6 +483,7 @@ if (MRDOCS_BUILD_TESTS) "${PROJECT_BINARY_DIR}/src" ) target_link_libraries(mrdocs-test PUBLIC mrdocs-core) + target_link_libraries(mrdocs-test PRIVATE Lua::lua) if (MRDOCS_CLANG) target_compile_options(mrdocs-test PRIVATE -Wno-covered-switch-default) endif () diff --git a/docs/mrdocs.schema.json b/docs/mrdocs.schema.json index d95bf2a4e6..6dca45d2dc 100644 --- a/docs/mrdocs.schema.json +++ b/docs/mrdocs.schema.json @@ -282,7 +282,7 @@ "default": [ "html" ], - "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "title": "Generator(s) used to create the documentation" }, "global-namespace-index": { diff --git a/examples/generators/script-driven/search-index/addons/extensions/search_index.lua b/examples/generators/script-driven/search-index/addons/extensions/search_index.lua new file mode 100644 index 0000000000..5bea13fb69 --- /dev/null +++ b/examples/generators/script-driven/search-index/addons/extensions/search_index.lua @@ -0,0 +1,31 @@ +-- Declare a `search-index` generator: a script-defined generator that +-- aggregates every symbol into a single search-index.json, the kind of +-- artifact the per-page generators cannot produce. +-- +-- `register_generator(id, fn)` declares it next to any +-- `register_transform` a script might also declare; selecting +-- `generator: ` runs `fn` with one `ctx`. `ctx.corpus.symbols` is +-- every symbol (each tagged with a flat `_id` so the generator can form +-- stable per-symbol URLs) and `ctx.output.write` emits files under the +-- output directory. + +-- Quote a string as a JSON value. +local function json_string(s) + s = s:gsub('\\', '\\\\'):gsub('"', '\\"') + return '"' .. s .. '"' +end + +register_generator("search-index", function(ctx) + local entries = {} + for _, sym in ipairs(ctx.corpus.symbols) do + local name = sym.name or "" + if name ~= "" then + entries[#entries + 1] = + '{"name":' .. json_string(name) .. + ',"url":' .. json_string(sym._id .. ".html") .. "}" + end + end + ctx.output.write( + "search-index.json", + "[" .. table.concat(entries, ",") .. "]") +end) diff --git a/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua b/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua deleted file mode 100644 index d6a468ed4f..0000000000 --- a/examples/generators/script-driven/search-index/addons/generator/search-index/generate.lua +++ /dev/null @@ -1,20 +0,0 @@ --- Quote a string as a JSON value. -local function json_string(s) - s = s:gsub('\\', '\\\\'):gsub('"', '\\"') - return '"' .. s .. '"' -end - -function generate(corpus, output) - local entries = {} - for _, sym in ipairs(corpus.symbols) do - local name = sym.name or "" - if name ~= "" then - entries[#entries + 1] = - '{"name":' .. json_string(name) .. - ',"url":' .. json_string(sym._id .. ".html") .. "}" - end - end - output.write( - "search-index.json", - "[" .. table.concat(entries, ",") .. "]") -end diff --git a/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml b/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml deleted file mode 100644 index 79a2356a56..0000000000 --- a/examples/generators/script-driven/search-index/addons/generator/search-index/mrdocs-generator.yml +++ /dev/null @@ -1 +0,0 @@ -script: generate.lua diff --git a/src/lib/ConfigOptions.json b/src/lib/ConfigOptions.json index 96216f6a91..5ac8b6788e 100644 --- a/src/lib/ConfigOptions.json +++ b/src/lib/ConfigOptions.json @@ -443,7 +443,7 @@ { "name": "generator", "brief": "Generator(s) used to create the documentation", - "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "type": "string-list", "default": ["html"] }, diff --git a/src/lib/CorpusImpl.cpp b/src/lib/CorpusImpl.cpp index 5877df7a6d..d650e32b7c 100644 --- a/src/lib/CorpusImpl.cpp +++ b/src/lib/CorpusImpl.cpp @@ -1102,4 +1102,36 @@ CorpusImpl::finalize() } } +void +CorpusImpl:: +registerScriptGenerator(std::string id, dom::Function fn) +{ + if (findScriptGenerator(id) == nullptr) + { + scriptGenerators_.emplace_back(std::move(id), std::move(fn)); + } +} + +dom::Function const* +CorpusImpl:: +findScriptGenerator(std::string_view id) const noexcept +{ + dom::Function const* result = nullptr; + for (auto const& entry : scriptGenerators_) + { + if (result == nullptr && std::string_view(entry.first) == id) + { + result = &entry.second; + } + } + return result; +} + +void +CorpusImpl:: +keepScriptVmAlive(std::shared_ptr keepAlive) +{ + scriptVmKeepAlives_.push_back(std::move(keepAlive)); +} + } // mrdocs diff --git a/src/lib/CorpusImpl.hpp b/src/lib/CorpusImpl.hpp index 567ef38ed0..a05295d9ed 100644 --- a/src/lib/CorpusImpl.hpp +++ b/src/lib/CorpusImpl.hpp @@ -20,14 +20,19 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include +#include +#include +#include namespace mrdocs { @@ -56,6 +61,22 @@ class CorpusImpl final : public Corpus // The value is another map from the name to the Info. std::map> lookupCache_; + // Output generators an extension script defined via + // `register_generator(id, fn)`. Each `fn` is a `dom::Function` that + // stays runnable until this corpus is destroyed (after extensions run, + // when a generator is selected). Its scripting VM is kept alive either + // by the function itself or by a matching entry in + // `scriptVmKeepAlives_`. First registration of an id wins; later ones + // are ignored. + std::vector> scriptGenerators_; + + // Strong references to scripting VMs that back `scriptGenerators_` but + // are only weakly held by the generator functions themselves (the + // JavaScript backend works this way). Keeping a reference here lets + // such a generator outlive the extension run that defined it, up to + // this corpus's destruction. + std::vector> scriptVmKeepAlives_; + friend class Corpus; friend class BaseMembersFinalizer; friend class OverloadsFinalizer; @@ -194,6 +215,29 @@ class CorpusImpl final : public Corpus void finalize(); + /** Register a script-defined output generator. + + Called from an extension's `register_generator(id, fn)`. The first + registration of a given id wins; later ones are ignored. + */ + void + registerScriptGenerator(std::string id, dom::Function fn); + + /** Return the script-defined generator with this id, or `nullptr`. + */ + dom::Function const* + findScriptGenerator(std::string_view id) const noexcept; + + /** Keep a scripting VM alive for the lifetime of this corpus. + + A generator registered via `register_generator` may hold only a + weak reference to the VM that defined it. The extension binding + hands the VM over here so it outlives the extension run and stays + usable when the generator is selected. + */ + void + keepScriptVmAlive(std::shared_ptr keepAlive); + private: /** Return the Info with the specified symbol ID. diff --git a/src/lib/Extensions/CorpusDom.cpp b/src/lib/Extensions/CorpusDom.cpp index 5f5cbb0191..8d7b09985d 100644 --- a/src/lib/Extensions/CorpusDom.cpp +++ b/src/lib/Extensions/CorpusDom.cpp @@ -151,4 +151,13 @@ buildCorpusDom(CorpusImpl& corpus) return dom::Value(std::move(corpusObj)); } +dom::Value +buildTransformContext(CorpusImpl& corpus) +{ + dom::Object ctx; + ctx.set("corpus", buildCorpusDom(corpus)); + ctx.set("config", dom::Value(corpus.config.object())); + return dom::Value(std::move(ctx)); +} + } // mrdocs diff --git a/src/lib/Extensions/CorpusDom.hpp b/src/lib/Extensions/CorpusDom.hpp index fd61f5b6e0..a2c4444a70 100644 --- a/src/lib/Extensions/CorpusDom.hpp +++ b/src/lib/Extensions/CorpusDom.hpp @@ -15,8 +15,7 @@ namespace mrdocs { class CorpusImpl; -/** Build the `corpus` argument passed to each registered corpus - transform. +/** Build the `ctx.corpus` object seen by extension scripts. The returned value is a small object: @@ -33,6 +32,18 @@ class CorpusImpl; dom::Value buildCorpusDom(CorpusImpl& corpus); +/** Build the `ctx` argument passed to each registered corpus transform. + + A transform receives one object so new capabilities can be added + without changing its signature: + + - `ctx.corpus` -- the navigable corpus (see @ref buildCorpusDom) the + transform reads and mutates in place. + - `ctx.config` -- the generation configuration. +*/ +dom::Value +buildTransformContext(CorpusImpl& corpus); + } // mrdocs #endif diff --git a/src/lib/Extensions/JsBinding.cpp b/src/lib/Extensions/JsBinding.cpp index f4bac662d2..080b9e95cd 100644 --- a/src/lib/Extensions/JsBinding.cpp +++ b/src/lib/Extensions/JsBinding.cpp @@ -20,19 +20,24 @@ #include #include +#include #include namespace mrdocs { namespace { -// Bind `register_transform` as the script-facing entry point before the -// script runs. A JavaScript function bridges to a `dom::Function`, so -// each registered callable is captured as a DOM value in `transforms`, -// in registration order. The collector outlives every invocation because -// the script (which does the registering) runs while it is in scope. +// Bind the `register_transform` and `register_generator` entry points +// before the script runs. A JavaScript function bridges to a +// `dom::Function`: transforms are captured in `transforms` (invoked once +// the script has run), generators are handed to the corpus, which keeps +// them runnable past this VM's lifetime. void -registerJsExtensionApi(js::Scope& scope, dom::Array& transforms) +registerJsExtensionApi( + js::Scope& scope, + CorpusImpl& corpus, + dom::Array& transforms, + std::size_t& generators) { scope.setGlobal( "register_transform", @@ -52,17 +57,41 @@ registerJsExtensionApi(js::Scope& scope, dom::Array& transforms) } return result; }))); + + scope.setGlobal( + "register_generator", + dom::Value(dom::makeVariadicInvocable( + [&corpus, &generators](dom::Array const& args) + -> Expected + { + Expected result; + if (args.size() < 2 || + !args.get(0).isString() || + !args.get(1).isFunction()) + { + result = Unexpected(Error( + "register_generator: expected (string id, function)")); + } + else + { + corpus.registerScriptGenerator( + std::string(args.get(0).getString().get()), + args.get(1).getFunction()); + ++generators; + } + return result; + }))); } -// Invoke one registered transform with the corpus, tagging any failure -// with the script path for context. +// Invoke one registered transform with the `ctx` object, tagging any +// failure with the script path for context. Expected invokeTransform( dom::Value const& transform, - dom::Value const& corpus, + dom::Value const& ctx, std::string const& scriptPath) { - Expected invoked = transform.getFunction().try_invoke(corpus); + Expected invoked = transform.getFunction().try_invoke(ctx); Expected result; if (!invoked.has_value()) { @@ -84,13 +113,17 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) js::registerStdGlobals(scope); dom::Array transforms; - registerJsExtensionApi(scope, transforms); - - // The corpus argument is a small navigable object: an array of - // per-symbol proxies plus `get(id)` / `lookup(name)` functions. - // Everything else a script does runs through that proxy: direct - // reads via reflection, direct writes that mutate the live Symbol. - dom::Value corpusValue = buildCorpusDom(corpus); + // Generators are owned by the corpus, not held here, so this counts + // them only to tell whether the script registered anything at all. + std::size_t generators = 0; + registerJsExtensionApi(scope, corpus, transforms, generators); + + // Each transform receives one `ctx` object: `ctx.corpus` is the + // navigable corpus (an array of per-symbol proxies plus + // `get(id)` / `lookup(name)`), `ctx.config` the generation config. + // Everything a script does runs through that proxy: direct reads via + // reflection, direct writes that mutate the live Symbol. + dom::Value ctxValue = buildTransformContext(corpus); MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); @@ -102,11 +135,12 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) "extension '{}': {}", scriptPath, result.error().message())); } - else if (transforms.empty()) + else if (transforms.empty() && generators == 0) { // A discovered script that registers nothing is almost always a - // mistake (a misspelled `register_transform`, or a guard that - // skipped it), so flag it rather than silently doing nothing. + // mistake (a misspelled `register_transform` / `register_generator`, + // or a guard that skipped it), so flag it rather than silently + // doing nothing. report::warn("extension '{}' registered nothing", scriptPath); } @@ -117,9 +151,18 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) if (result.has_value()) { result = invokeTransform( - transforms.get(i), corpusValue, scriptPath); + transforms.get(i), ctxValue, scriptPath); } } + + // A generator registered above holds only a weak reference to this + // VM, so hand the corpus a strong reference. That keeps the VM, and + // therefore the generator, alive past this run until the corpus is + // destroyed. + if (generators > 0) + { + corpus.keepScriptVmAlive(std::make_shared(ctx)); + } return result; } diff --git a/src/lib/Extensions/JsBinding.hpp b/src/lib/Extensions/JsBinding.hpp index e761a96356..82eb15b92f 100644 --- a/src/lib/Extensions/JsBinding.hpp +++ b/src/lib/Extensions/JsBinding.hpp @@ -21,12 +21,14 @@ class CorpusImpl; /** Run one JavaScript extension script against the corpus. - Build a fresh JS context, evaluate the script, and run every corpus - transform it declares by calling `register_transform(fn)`. Each - registered function is invoked once, in registration order, with a - navigable DOM view of the corpus that it can read and mutate in place. - A script that registers nothing causes a warning and otherwise has no - effect, so an empty .js file is tolerated. + Build a fresh JS context and evaluate the script. The script declares + corpus transforms with `register_transform(fn)` and output generators + with `register_generator(id, fn)`, in either combination. Each transform + is invoked once, in registration order, with a navigable DOM view of the + corpus it can read and mutate in place; each generator is handed to the + corpus to run later, once one is selected. A script that registers + nothing causes a warning and otherwise has no effect, so an empty .js + file is tolerated. */ Expected runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath); diff --git a/src/lib/Extensions/LuaBinding.cpp b/src/lib/Extensions/LuaBinding.cpp index 17d0eb0e49..66c79281c7 100644 --- a/src/lib/Extensions/LuaBinding.cpp +++ b/src/lib/Extensions/LuaBinding.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -31,16 +32,20 @@ namespace mrdocs { namespace { -// Collects the corpus transforms an extension declares through -// `register_transform`. Each registered Lua function is anchored in the -// Lua registry (never a global) and exposed as a `dom::Function` over the -// live corpus, so both scripting languages funnel through one call path. -// The collector rides as the `register_transform` closure's single -// upvalue. +// Backing state for an extension's `register_*` calls. Transforms are +// collected here and invoked once the chunk has run; generators are handed +// straight to the corpus, which owns them past this VM's lifetime. Each +// registered Lua function is anchored in the Lua registry (never a global) +// and exposed as a `dom::Function`. This struct rides as the +// `register_transform` / `register_generator` closures' single upvalue. struct LuaRegistrations { lua::Context* ctx = nullptr; + CorpusImpl* corpus = nullptr; std::vector transforms; + // Generators are owned by the corpus, not held here, so this counts + // them only to tell whether the script registered anything at all. + std::size_t generators = 0; }; // `register_transform(fn)`: anchor `fn` in the registry and record it as @@ -65,28 +70,61 @@ luaRegisterTransform(lua_State* L) return result; } -// Bind `register_transform` as the script-facing entry point before the -// chunk runs, the same way a script generator's `output` object is bound. -// The collector pointer is carried as the closure's single upvalue. +// `register_generator(id, fn)`: anchor `fn` in the registry and hand it to +// the corpus under `id`. The corpus keeps it runnable until a generator is +// selected and run, long after this chunk's stack has unwound. +int +luaRegisterGenerator(lua_State* L) +{ + LuaRegistrations* regs = static_cast( + lua_touserdata(L, lua_upvalueindex(1))); + int result = 0; + if (lua_type(L, 1) != LUA_TSTRING || lua_type(L, 2) != LUA_TFUNCTION) + { + result = luaL_error(L, + "register_generator: expected (string id, function)"); + } + else + { + std::size_t len = 0; + char const* data = lua_tolstring(L, 1, &len); + std::string id(data, len); + lua_pushvalue(L, 2); + int const ref = luaL_ref(L, LUA_REGISTRYINDEX); + regs->corpus->registerScriptGenerator( + std::move(id), lua::makeCallable(*regs->ctx, ref)); + ++regs->generators; + } + return result; +} + +// Bind the `register_transform` and `register_generator` entry points +// before the chunk runs. The shared registrations pointer is carried as +// each closure's single upvalue. void -registerLuaExtensionApi(lua::Context& ctx, LuaRegistrations& regs) +registerLuaExtensionApi( + lua::Context& ctx, CorpusImpl& corpus, LuaRegistrations& regs) { regs.ctx = &ctx; + regs.corpus = &corpus; lua_State* L = static_cast(ctx.nativeState()); lua_pushlightuserdata(L, ®s); lua_pushcclosure(L, &luaRegisterTransform, 1); lua_setglobal(L, "register_transform"); + lua_pushlightuserdata(L, ®s); + lua_pushcclosure(L, &luaRegisterGenerator, 1); + lua_setglobal(L, "register_generator"); } -// Invoke one registered transform with the corpus, tagging any failure -// with the script path for context. +// Invoke one registered transform with the `ctx` object, tagging any +// failure with the script path for context. Expected invokeTransform( dom::Function const& transform, - dom::Value const& corpus, + dom::Value const& ctx, std::string const& scriptPath) { - Expected invoked = transform.try_invoke(corpus); + Expected invoked = transform.try_invoke(ctx); Expected result; if (!invoked.has_value()) { @@ -104,16 +142,17 @@ runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) { lua::Context ctx; LuaRegistrations regs; - registerLuaExtensionApi(ctx, regs); + registerLuaExtensionApi(ctx, corpus, regs); lua::Scope scope(ctx); - // The corpus argument is a small navigable object: an array of - // per-symbol proxies plus `get(id)` / `lookup(name)` functions. + // Each transform receives one `ctx` object: `ctx.corpus` is the + // navigable corpus (an array of per-symbol proxies plus + // `get(id)` / `lookup(name)`), `ctx.config` the generation config. // Reads run through reflection on the live C++ Symbol; writes - // (`sym.name = "..."`) mutate that Symbol directly through the - // `__newindex` metamethod's `dom::Object::set` path. - dom::Value corpusValue = buildCorpusDom(corpus); + // (`ctx.corpus.symbols[i].name = "..."`) mutate that Symbol directly + // through the `__newindex` metamethod's `dom::Object::set` path. + dom::Value ctxValue = buildTransformContext(corpus); MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); @@ -123,9 +162,10 @@ runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) MRDOCS_TRY(chunk.call()); // A discovered script that registers nothing is almost always a - // mistake (a misspelled `register_transform`, or a guard that skipped - // it), so flag it rather than silently doing nothing. - if (regs.transforms.empty()) + // mistake (a misspelled `register_transform` / `register_generator`, + // or a guard that skipped it), so flag it rather than silently doing + // nothing. + if (regs.transforms.empty() && regs.generators == 0) { report::warn("extension '{}' registered nothing", scriptPath); } @@ -137,7 +177,7 @@ runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) { if (result.has_value()) { - result = invokeTransform(transform, corpusValue, scriptPath); + result = invokeTransform(transform, ctxValue, scriptPath); } } return result; diff --git a/src/lib/Extensions/LuaBinding.hpp b/src/lib/Extensions/LuaBinding.hpp index 96c0841183..a4c2e3c0c6 100644 --- a/src/lib/Extensions/LuaBinding.hpp +++ b/src/lib/Extensions/LuaBinding.hpp @@ -21,12 +21,14 @@ class CorpusImpl; /** Run one Lua extension script against the corpus. - Build a fresh Lua context, evaluate the script, and run every corpus - transform it declares by calling `register_transform(fn)`. Each - registered function is invoked once, in registration order, with a - navigable DOM view of the corpus that it can read and mutate in place. - A script that registers nothing causes a warning and otherwise has no - effect, so an empty .lua file is tolerated. + Build a fresh Lua context and evaluate the script. The script declares + corpus transforms with `register_transform(fn)` and output generators + with `register_generator(id, fn)`, in either combination. Each transform + is invoked once, in registration order, with a navigable DOM view of the + corpus it can read and mutate in place; each generator is handed to the + corpus to run later, once one is selected. A script that registers + nothing causes a warning and otherwise has no effect, so an empty .lua + file is tolerated. */ Expected runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath); diff --git a/src/lib/Gen/GeneratorManifest.cpp b/src/lib/Gen/GeneratorManifest.cpp index 2e624ebdad..0c0309839a 100644 --- a/src/lib/Gen/GeneratorManifest.cpp +++ b/src/lib/Gen/GeneratorManifest.cpp @@ -9,7 +9,6 @@ // #include "GeneratorManifest.hpp" -#include #include #include #include @@ -30,58 +29,6 @@ scalarText(llvm::yaml::ScalarNode& node) return std::string(text.data(), text.size()); } -// Forward declaration for the recursive conversion below. -dom::Value yamlToDom(llvm::yaml::Node* node); - -// Add one key/value pair of a YAML mapping to `obj`, recursing on the -// value. A non-scalar key is skipped. -void -addMappingEntry(llvm::yaml::KeyValueNode& entry, dom::Object& obj) -{ - llvm::yaml::ScalarNode* const keyNode = - llvm::dyn_cast_or_null(entry.getKey()); - if (keyNode) - { - obj.set(scalarText(*keyNode), yamlToDom(entry.getValue())); - } -} - -// Convert a YAML node to a DOM value. -dom::Value -yamlToDom(llvm::yaml::Node* node) -{ - dom::Value result(nullptr); - if (node && !llvm::isa(node)) - { - if (llvm::yaml::ScalarNode* const scalar = - llvm::dyn_cast(node)) - { - result = dom::Value(scalarText(*scalar)); - } - else if (llvm::yaml::SequenceNode* const sequence = - llvm::dyn_cast(node)) - { - dom::Array array; - for (llvm::yaml::Node& element : *sequence) - { - array.emplace_back(yamlToDom(&element)); - } - result = dom::Value(std::move(array)); - } - else if (llvm::yaml::MappingNode* const mapping = - llvm::dyn_cast(node)) - { - dom::Object object; - for (llvm::yaml::KeyValueNode& entry : *mapping) - { - addMappingEntry(entry, object); - } - result = dom::Value(std::move(object)); - } - } - return result; -} - // Parse a YAML mapping whose entries are non-empty byte-sequence keys // mapped to replacement strings. An empty key is a hard error. Expected @@ -153,34 +100,6 @@ parseTopLevelEntry( result = parseEscape(*escNode, manifest, yamlPath); } } - else if (key == "script") - { - llvm::yaml::ScalarNode* const valNode = - llvm::dyn_cast_or_null(pair.getValue()); - if (!valNode) - { - result = Unexpected(formatError( - "{}: 'script' must be a scalar", yamlPath)); - } - else - { - manifest.script = scalarText(*valNode); - } - } - else if (key == "params") - { - llvm::yaml::MappingNode* const paramsNode = - llvm::dyn_cast_or_null(pair.getValue()); - if (!paramsNode) - { - result = Unexpected(formatError( - "{}: 'params' must be a mapping", yamlPath)); - } - else - { - manifest.params = yamlToDom(paramsNode).getObject(); - } - } else if (key == "extends") { llvm::yaml::ScalarNode* const valNode = diff --git a/src/lib/Gen/GeneratorManifest.hpp b/src/lib/Gen/GeneratorManifest.hpp index ac98f3da48..21675d46b6 100644 --- a/src/lib/Gen/GeneratorManifest.hpp +++ b/src/lib/Gen/GeneratorManifest.hpp @@ -11,10 +11,8 @@ #ifndef MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP #define MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP -#include #include #include -#include #include #include #include @@ -24,56 +22,28 @@ namespace mrdocs { /** The parsed contents of a generator manifest. - A manifest is the mrdocs-generator.yml that an addon directory - under /generator// exposes to declare a generator. The - two generator flavors read disjoint fields of the same file: - - @li A data-driven (Handlebars) generator reads the escape rules - and the parent it `extends`. - - @li A script-driven generator reads the script-file path and its - parameters. - - The presence of the `script` entry is what distinguishes the two: a - manifest that names a `script` is a script-driven generator, - otherwise it is data-driven. + A manifest is the mrdocs-generator.yml that an addon directory under + /generator// exposes to declare a data-driven Handlebars + generator: it carries the escape rules the generator applies to + rendered output and the parent it `extends`. */ struct GeneratorManifest { - /** The entry file of a script-driven generator. - - Holds the value of the manifest's optional `script` key, a path - relative to the generator directory. Empty when the manifest - declares no `script`, in which case the directory is a - data-driven generator. - */ - std::optional script; - - /** The escape rules of a data-driven generator. + /** The escape rules of the generator. Each pair maps a byte-sequence source to its replacement string, in manifest order. Empty when no escape rules are declared. */ std::vector> escape; - /** The parent a data-driven generator inherits from. + /** The parent this generator inherits from. Holds the manifest's optional `extends` key: the id of another generator whose partials and helpers this one falls back to, after its own directory but before `common/`. Empty when no - parent is declared. A script-driven generator ignores this field. + parent is declared. */ std::string extends; - - /** The parameters of a script-driven generator. - - Holds the manifest's optional `params` mapping, passed to the - entry script as its `params` argument. Mapping values may be - nested objects or arrays; a scalar value is a string. Empty when - the manifest declares no `params`. A data-driven generator - ignores this field. - */ - dom::Object params; }; /** Parse a generator manifest into plain data. @@ -82,12 +52,9 @@ struct GeneratorManifest expected to contain a top-level mapping. The optional `escape` key holds a sub-mapping from byte-sequence keys to replacement strings; keys may be one or more bytes long, and an empty key is a hard error. - The optional `script` key holds the entry-file path as a scalar. The - optional `extends` key names a parent generator as a scalar. The - optional `params` key holds a mapping of generator-specific - parameters; its values may be nested, and a scalar value is read as a - string. Unknown top-level keys are ignored so future schema additions - are non-breaking. + The optional `extends` key names a parent generator as a scalar. + Unknown top-level keys are ignored so future schema additions are + non-breaking. An empty document (an empty file, comments only, or a literal `null`) yields an empty manifest. @@ -116,9 +83,7 @@ struct DiscoveredManifest its directory. Directories without a manifest (like the built-in common/) are skipped. - The presence of a `script` entry distinguishes the two generator - flavors, so a caller installs the flavor it owns and ignores the - other. Roots are searched in order, so the result preserves addon + Roots are searched in order, so the result preserves addon precedence. */ Expected> diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.cpp b/src/lib/Gen/hbs/DataDrivenGenerators.cpp index 13f0839f5d..6da473d8b1 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.cpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.cpp @@ -54,9 +54,6 @@ discoverDataDrivenGenerators(Config::Settings const& settings) discoverGeneratorManifests(addon_paths::addonRoots(settings))); for (DiscoveredManifest const& d : found) { - // A manifest that names a `script` is a script-driven generator; - // that flavor is installed by its own discovery pass. - // // The generator registry is process-global and is not cleared // between runs in the same process. `installGenerator` fails when // the id is already taken, whether by a built-in or by an @@ -64,15 +61,12 @@ discoverDataDrivenGenerators(Config::Settings const& settings) // first-writer-wins layering we want, so a duplicate id is a // silent skip rather than an error (a `null` generator is the only // other failure it reports, and we never pass one). - if (!d.manifest.script) - { - std::string const name(files::getFileName(d.dir)); - (void)installGenerator( - std::make_unique( - name, name, name, - toEscapeMap(d.manifest.escape), - d.manifest.extends)); - } + std::string const name(files::getFileName(d.dir)); + (void)installGenerator( + std::make_unique( + name, name, name, + toEscapeMap(d.manifest.escape), + d.manifest.extends)); } return {}; } diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.hpp b/src/lib/Gen/hbs/DataDrivenGenerators.hpp index 0158f18396..92dc02a4b4 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.hpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.hpp @@ -33,10 +33,6 @@ namespace mrdocs::hbs { (the built-in `common/` is the canonical example) don't declare a manifest and are skipped. - 3. Its manifest does not name a `script`. A manifest with a `script` - key declares a script-driven generator, which is installed by - `discoverScriptGenerators` instead, so it is skipped here. - For each accepted directory, a `HandlebarsGenerator` is constructed with id, file extension, and display name all set to ``, and installed into the global registry. Escape rules are read from diff --git a/src/lib/Gen/script/ScriptGenerator.cpp b/src/lib/Gen/script/ScriptGenerator.cpp index 8f1649343d..de7cee8186 100644 --- a/src/lib/Gen/script/ScriptGenerator.cpp +++ b/src/lib/Gen/script/ScriptGenerator.cpp @@ -9,32 +9,26 @@ // #include "ScriptGenerator.hpp" -#include "ScriptRunner.hpp" #include "OutputSink.hpp" -#include -#include + #include #include #include #include #include -#include -#include -#include -#include + #include -#include namespace mrdocs::script { namespace { -// Build the read-only corpus DOM a `generate(corpus, output)` entry -// point receives. This mirrors what an extension script sees: a -// `symbols` array of lazy per-symbol objects, each tagged with its flat -// `_id` so a script can form stable per-symbol URLs. +// Build the read-only corpus the generator's `ctx.corpus` exposes: a +// `symbols` array of the same per-symbol objects the templates see, each +// tagged with its flat `_id` so a generator can form stable per-symbol +// URLs. dom::Value -buildScriptCorpus(Corpus const& corpus, DomCorpus const& domCorpus) +buildGeneratorCorpus(Corpus const& corpus, DomCorpus const& domCorpus) { dom::Array symbols; for (Symbol const& sym : corpus) @@ -48,100 +42,76 @@ buildScriptCorpus(Corpus const& corpus, DomCorpus const& domCorpus) return dom::Value(std::move(corpusObj)); } -} // (anon) - -ScriptGenerator:: -ScriptGenerator(std::string id, std::string scriptPath, dom::Object params) - : id_(std::move(id)) - , scriptPath_(std::move(scriptPath)) - , params_(std::move(params)) -{ -} - -std::string_view -ScriptGenerator:: -id() const noexcept +// Build the `ctx.output` object and its `write(path, contents)` method. +// The method is a DOM invocable that routes to the sink; the same value +// is callable from both Lua and JavaScript, so one output API serves +// either language. The sink outlives the call (a local in +// `runScriptGenerator`), so capturing it by pointer is safe. +dom::Value +buildOutputApi(OutputSink& sink) { - return id_; + OutputSink* sinkPtr = &sink; + dom::Object api; + api.set("write", dom::Value(dom::makeVariadicInvocable( + [sinkPtr](dom::Array const& args) -> Expected + { + Expected result; + if (args.size() < 2 || + !args.get(0).isString() || + !args.get(1).isString()) + { + result = Unexpected(Error( + "output.write: expected (string path, string contents)")); + } + else if (Expected wrote = sinkPtr->write( + args.get(0).getString().get(), + args.get(1).getString().get()); + !wrote) + { + result = Unexpected(wrote.error()); + } + else + { + result = dom::Value(); + } + return result; + }))); + return dom::Value(std::move(api)); } -std::string_view -ScriptGenerator:: -displayName() const noexcept +// Assemble the single `ctx` object the generator receives. +dom::Value +buildGeneratorContext( + Corpus const& corpus, DomCorpus const& domCorpus, OutputSink& sink) { - return id_; + dom::Object ctx; + ctx.set("corpus", buildGeneratorCorpus(corpus, domCorpus)); + ctx.set("output", buildOutputApi(sink)); + ctx.set("config", dom::Value(corpus.config.object())); + return dom::Value(std::move(ctx)); } -std::string_view -ScriptGenerator:: -fileExtension() const noexcept -{ - // A script-driven generator names its own output files, so there's - // no single extension. Report the id for diagnostics. - return id_; -} +} // (anon) Expected -ScriptGenerator:: -build(std::string_view outputPath, Corpus const& corpus) const +runScriptGenerator( + dom::Function const& generate, + std::string_view id, + Corpus const& corpus, + std::string_view outputPath) { OutputSink sink(outputPath); DomCorpus domCorpus(corpus); - dom::Value corpusValue = buildScriptCorpus(corpus, domCorpus); - dom::Value const config(corpus.config.object()); - dom::Value const params(params_); + dom::Value ctx = buildGeneratorContext(corpus, domCorpus, sink); + + Expected invoked = generate.try_invoke(ctx); Expected result; - if (scriptPath_.ends_with(".lua")) - { - result = runLuaGenerator( - corpusValue, scriptPath_, sink, config, params); - } - else if (scriptPath_.ends_with(".js")) - { - result = runJsGenerator( - corpusValue, scriptPath_, sink, config, params); - } - else + if (!invoked) { result = Unexpected(formatError( - "generator '{}': script '{}' must be a .lua or .js file", - id_, scriptPath_)); + "generator '{}': {}", id, invoked.error().message())); } return result; } -Expected -ScriptGenerator:: -buildOne(std::ostream&, Corpus const&) const -{ - return Unexpected(formatError( - "generator '{}' is script-driven and does not support " - "single-page output", id_)); -} - -Expected -discoverScriptGenerators(Config::Settings const& settings) -{ - MRDOCS_TRY( - std::vector found, - discoverGeneratorManifests(hbs::addon_paths::addonRoots(settings))); - for (DiscoveredManifest const& d : found) - { - // Only manifests that name a `script` are script-driven - // generators; the data-driven pass installs the rest. - // First-writer-wins, exactly as the data-driven pass: a - // duplicate id is a silent skip, and we never pass a `null`. - if (d.manifest.script) - { - std::string const name(files::getFileName(d.dir)); - std::string scriptPath = - files::appendPath(d.dir, *d.manifest.script); - (void)installGenerator( - std::make_unique( - name, std::move(scriptPath), d.manifest.params)); - } - } - return {}; -} - } // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGenerator.hpp b/src/lib/Gen/script/ScriptGenerator.hpp index ff7a2aa44b..5429bb78bc 100644 --- a/src/lib/Gen/script/ScriptGenerator.hpp +++ b/src/lib/Gen/script/ScriptGenerator.hpp @@ -11,90 +11,44 @@ #ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP #define MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP -#include +#include #include -#include #include #include -#include -#include #include namespace mrdocs::script { -/** A generator whose output is produced by a user script. +/** Run a script-defined output generator. - A script-driven generator hands the whole emit to a Lua or - JavaScript entry point of the form - `generate(corpus, output, config, params)`: the script traverses the - corpus and writes whatever files it wants through the `output` - object, optionally reading the resolved `config` and its own - `params`. Because the script owns the output structure, it can - produce shapes the per-page generators cannot, such as a single - artifact aggregated across all symbols (a search index, for example). -*/ -class ScriptGenerator - : public Generator -{ - std::string id_; - // The absolute path to the Lua or JavaScript entry script. - std::string scriptPath_; - // The generator's own parameters, from the manifest's `params` - // mapping; passed to the entry script as its `params` argument. - dom::Object params_; - -public: - /** Construct a script-driven generator. - - @param id The generator id, used to select it on the command - line. - @param scriptPath The absolute path to the entry script. - @param params The generator's own parameters, from its manifest. - */ - ScriptGenerator( - std::string id, - std::string scriptPath, - dom::Object params); - - std::string_view - id() const noexcept override; - - std::string_view - displayName() const noexcept override; - - std::string_view - fileExtension() const noexcept override; - - /** Run the entry script, which owns the whole emit. - */ - Expected - build( - std::string_view outputPath, - Corpus const& corpus) const override; + A generator declared with `register_generator(id, fn)` is a + `dom::Function` that owns the whole emit. Invoke it with one `ctx` + object, mirroring the shape a transform receives: - /** Reject single-page output. + @li `ctx.corpus` is the read-only corpus, a `symbols` array of the + same per-symbol objects the templates see, each tagged with its + flat `_id` so the generator can form stable per-symbol URLs. - A script-driven generator owns its output structure and writes - whatever files it wants, so there is no single-stream form. - */ - Expected - buildOne( - std::ostream& os, - Corpus const& corpus) const override; -}; + @li `ctx.output` exposes `write(path, contents)`, resolved under the + output directory; a path that escapes it is rejected. -/** Discover script-driven generators and install them. + @li `ctx.config` is the resolved configuration, as templates see it. - For each configured addon root, walk the immediate subdirectories of - /generator/. A subdirectory becomes a script-driven generator - when its `mrdocs-generator.yml` names an entry script. The generator - id, used to select it on the command line, is the subdirectory name. + The function is language-agnostic: a `dom::Function` self-owns its + scripting VM, so one call drives a Lua or a JavaScript generator + without the host knowing which. - Should be called once after the configuration is resolved and before - a generator is looked up by id. + @param generate The registered generator function. + @param id The generator id, used to tag diagnostics. + @param corpus The finalized corpus to emit. + @param outputPath The output directory the generator writes under. */ Expected -discoverScriptGenerators(Config::Settings const& settings); +runScriptGenerator( + dom::Function const& generate, + std::string_view id, + Corpus const& corpus, + std::string_view outputPath); } // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGeneratorJs.cpp b/src/lib/Gen/script/ScriptGeneratorJs.cpp deleted file mode 100644 index cd34361e29..0000000000 --- a/src/lib/Gen/script/ScriptGeneratorJs.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) -// -// Official repository: https://github.com/cppalliance/mrdocs -// - -#include "ScriptRunner.hpp" -#include "OutputSink.hpp" - -#include -#include -#include - -#include -#include - -namespace mrdocs::script { - -namespace { - -// Build the `output` object passed as the second argument to `generate`. -// The JavaScript wrapper exposes a `dom::Function` as a callable proxy, -// so unlike the Lua side this needs no escape hatch: `write` is a variadic -// invocable that routes to the sink. The sink outlives the call (it is a -// local in `runJsGenerator`), so capturing it by pointer is safe. -dom::Object -buildJsOutputApi(OutputSink& sink) -{ - OutputSink* sinkPtr = &sink; - dom::Object api; - api.set("write", dom::Value(dom::makeVariadicInvocable( - [sinkPtr](dom::Array const& args) -> Expected - { - if (args.size() < 2) - { - return Unexpected(Error( - "output.write: expected (path, contents)")); - } - dom::Value const path = args.get(0); - dom::Value const body = args.get(1); - if (!path.isString() || !body.isString()) - { - return Unexpected(Error( - "output.write: path and contents must be strings")); - } - Expected result = sinkPtr->write( - path.getString().get(), body.getString().get()); - if (!result) - { - return Unexpected(result.error()); - } - return dom::Value(); - }))); - return api; -} - -} // (anon) - -Expected -runJsGenerator( - dom::Value const& corpus, - std::string const& scriptPath, - OutputSink& sink, - dom::Value const& config, - dom::Value const& params) -{ - js::Context ctx; - js::Scope scope(ctx); - - MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); - if (Expected exp = scope.script(script); !exp) - { - return Unexpected(formatError( - "generator '{}': {}", - scriptPath, exp.error().message())); - } - - // Unlike an extension, a generator must define `generate`. - Expected fn = scope.getGlobal("generate"); - if (!fn || !fn->isFunction()) - { - return Unexpected(formatError( - "generator '{}': script must define a 'generate' function", - scriptPath)); - } - - Expected result = - fn->call(corpus, buildJsOutputApi(sink), config, params); - if (!result) - { - return Unexpected(formatError( - "generator '{}': {}", - scriptPath, result.error().message())); - } - return {}; -} - -} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGeneratorLua.cpp b/src/lib/Gen/script/ScriptGeneratorLua.cpp deleted file mode 100644 index 306c2273f4..0000000000 --- a/src/lib/Gen/script/ScriptGeneratorLua.cpp +++ /dev/null @@ -1,146 +0,0 @@ -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) -// -// Official repository: https://github.com/cppalliance/mrdocs -// - -#include "ScriptRunner.hpp" -#include "OutputSink.hpp" - -#include -#include -#include - -#include -#include -#include -#include - -extern "C" { -#include -#include -} - -namespace mrdocs::script { - -namespace { - -// Lua adapter for `OutputSink::write`. On failure, the script aborts via -// `luaL_error`; the host turns that into an `Unexpected` when `lua_pcall` -// returns non-OK. The sink pointer is carried as the closure's single -// upvalue. -int -luaWrite(lua_State* L) -{ - OutputSink* sink = static_cast( - lua_touserdata(L, lua_upvalueindex(1))); - if (lua_type(L, 1) != LUA_TSTRING || - lua_type(L, 2) != LUA_TSTRING) - { - return luaL_error(L, - "output.write: expected (string path, string contents)"); - } - std::size_t pathLen = 0; - char const* pathData = lua_tolstring(L, 1, &pathLen); - std::size_t bodyLen = 0; - char const* bodyData = lua_tolstring(L, 2, &bodyLen); - - Expected result = sink->write( - std::string_view(pathData, pathLen), - std::string_view(bodyData, bodyLen)); - if (!result) - { - return luaL_error(L, "%s", result.error().message().c_str()); - } - return 0; -} - -// Build the `output` global table and bind its `write` method. -// -// We register the C closure directly on the raw `lua_State` (via the -// `Context::nativeState()` escape hatch) because the wrapper cannot carry -// a native callable through a DOM value: `domValue_push` has no function -// case. The closure carries the sink pointer as its single upvalue. -void -registerLuaOutputApi(lua_State* L, OutputSink& sink) -{ - lua_newtable(L); - - lua_pushlightuserdata(L, &sink); - lua_pushcclosure(L, &luaWrite, 1); - lua_setfield(L, -2, "write"); - - lua_setglobal(L, "output"); -} - -} // (anon) - -Expected -runLuaGenerator( - dom::Value const& corpus, - std::string const& scriptPath, - OutputSink& sink, - dom::Value const& config, - dom::Value const& params) -{ - lua::Context ctx; - - // Register the `output` global before loading the script so - // top-level code can reference it, and so we can pass it as the - // second argument below. - registerLuaOutputApi( - static_cast(ctx.nativeState()), sink); - - lua::Scope scope(ctx); - MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); - MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); - - Expected chunkResult = chunk.call(); - if (!chunkResult) - { - return Unexpected(chunkResult.error()); - } - - // Fetch the `output` global so it can be passed as the second - // argument. It must outlive the `generate` call below, so hold it - // here rather than moving it out. - Expected output = scope.getGlobal("output"); - if (!output) - { - return Unexpected(output.error()); - } - - auto callGenerate = - [&](lua::Function&& fn) -> Expected - { - Expected result = - fn.call(corpus, *output, config, params); - if (!result) - { - return Unexpected(formatError( - "generator '{}': {}", - scriptPath, result.error().message())); - } - return {}; - }; - - // A generator must define a global `generate` function, the same - // shape JavaScript requires. Accepting only the named global (rather - // than also a function the chunk returns) keeps one convention across - // both languages and leaves room for a script to expose more than one - // named entry point later. - Expected generateFn = scope.getGlobal("generate"); - if (!generateFn || !generateFn->isFunction()) - { - return Unexpected(formatError( - "generator '{}': script must define a 'generate' function", - scriptPath)); - } - return callGenerate(lua::Function(std::move(*generateFn))); -} - -} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptRunner.hpp b/src/lib/Gen/script/ScriptRunner.hpp deleted file mode 100644 index a1dae4ac4f..0000000000 --- a/src/lib/Gen/script/ScriptRunner.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) -// -// Official repository: https://github.com/cppalliance/mrdocs -// - -#ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP -#define MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP - -#include -#include -#include -#include - -namespace mrdocs::script { - -class OutputSink; - -/** Run a Lua entry script's `generate(corpus, output, config, params)`. - - Build a Lua context, expose the output writer as the `output` global, - evaluate the script, and call its `generate` function with the - corpus, the writer, the resolved configuration, and the generator's - own parameters. A missing `generate` function is an error. - - @param corpus The read-only corpus DOM passed as the first argument. - @param scriptPath The absolute path to the Lua entry script. - @param sink The file-writing API exposed to the script. - @param config The resolved configuration DOM, as templates see it. - @param params The generator's own parameters, from its manifest. -*/ -Expected -runLuaGenerator( - dom::Value const& corpus, - std::string const& scriptPath, - OutputSink& sink, - dom::Value const& config, - dom::Value const& params); - -/** Run a JS entry script's `generate(corpus, output, config, params)`. - - Build a JavaScript context, evaluate the script, and call its - `generate` function with the corpus, an `output` object whose - `write` method routes to the writer, the resolved configuration, and - the generator's own parameters. A missing `generate` function is an - error. - - @param corpus The read-only corpus DOM passed as the first argument. - @param scriptPath The absolute path to the JavaScript entry script. - @param sink The file-writing API exposed to the script. - @param config The resolved configuration DOM, as templates see it. - @param params The generator's own parameters, from its manifest. -*/ -Expected -runJsGenerator( - dom::Value const& corpus, - std::string const& scriptPath, - OutputSink& sink, - dom::Value const& config, - dom::Value const& params); - -} // mrdocs::script - -#endif diff --git a/src/test/TestRunner.cpp b/src/test/TestRunner.cpp index 724696b1a5..4bf74862bb 100644 --- a/src/test/TestRunner.cpp +++ b/src/test/TestRunner.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -190,13 +189,6 @@ handleFile( { return report::error("{}: \"{}\"", discovered.error(), filePath); } - Expected scriptsDiscovered = - script::discoverScriptGenerators(loaded->settings); - if (!scriptsDiscovered) - { - return report::error( - "{}: \"{}\"", scriptsDiscovered.error(), filePath); - } // The generator(s) come from the test's merged configuration. Accept // the comma-separated form and fall back to a single xml run. diff --git a/src/test/lib/Gen/script/ScriptGenerator.cpp b/src/test/lib/Gen/script/ScriptGenerator.cpp index f6d32f3911..3e8a46c42e 100644 --- a/src/test/lib/Gen/script/ScriptGenerator.cpp +++ b/src/test/lib/Gen/script/ScriptGenerator.cpp @@ -8,47 +8,32 @@ // Official repository: https://github.com/cppalliance/mrdocs // -#include #include #include -#include #include #include #include #include -#include +#include +#include #include #include #include -#include #include #include -#include + +extern "C" { +#include +#include +} namespace mrdocs::script { namespace { -// Write `content` verbatim to `path`. Pre-existing files are truncated. -void -writeFile(std::string_view path, std::string_view content) -{ - std::ofstream os(std::string{path}, std::ios::binary | std::ios::trunc); - os.write(content.data(), - static_cast(content.size())); -} - -// The `config` and `params` arguments a generator receives. Tests that -// exercise only the `corpus` and `output` path pass empty objects. -dom::Value -emptyObject() -{ - return dom::Value(dom::Object()); -} - -// A minimal `Config` whose `object()` returns a canned DOM object. It -// lets a `build()`-level test assert what `generate` sees as `config` -// without creating a real `ConfigImpl`. +// A minimal `Config` whose `object()` returns a canned DOM object, so a +// test can assert what a generator sees as `ctx.config` without building +// a real `ConfigImpl`. struct StubConfig : Config { @@ -73,9 +58,9 @@ struct StubConfig } }; -// An empty corpus: `build()` iterates no symbols, so it never reflects -// a real `Symbol`; it carries only the `Config` that `build()` reads -// `config` from. +// An empty corpus: the runner iterates no symbols, so `ctx.corpus.symbols` +// is an empty array. It carries only the `Config` the runner reads +// `ctx.config` from. struct StubCorpus : Corpus { @@ -125,51 +110,51 @@ struct StubCorpus } }; -// A two-symbol corpus shaped like what `generate(corpus, output)` sees: -// a `symbols` array whose entries carry a `name` and a flat `_id`. -dom::Value -makeCorpus() +// Load `src`, which must define a global `generate(ctx)`, and return it as +// a callable `dom::Function`. The function is self-owning: it anchors the +// chunk in the Lua registry and carries a copy of the context, so it +// outlives the local VM here exactly as a `register_generator` function +// outlives the extension that declared it. +dom::Function +makeLuaGenerator(std::string_view src) { - dom::Object foo; - foo.set("name", std::string("foo")); - foo.set("_id", std::string("0001")); - dom::Object bar; - bar.set("name", std::string("bar")); - bar.set("_id", std::string("0002")); - dom::Array symbols; - symbols.emplace_back(dom::Value(std::move(foo))); - symbols.emplace_back(dom::Value(std::move(bar))); - dom::Object corpus; - corpus.set("symbols", std::move(symbols)); - return dom::Value(std::move(corpus)); + lua::Context ctx; + lua::Scope scope(ctx); + Expected chunk = + scope.loadChunk(std::string(src), "generator.lua"); + BOOST_TEST(chunk.has_value()); + if (chunk) + { + BOOST_TEST(chunk->call().has_value()); + } + lua_State* L = static_cast(ctx.nativeState()); + lua_getglobal(L, "generate"); + int const ref = luaL_ref(L, LUA_REGISTRYINDEX); + return lua::makeCallable(ctx, ref); } -// A Lua generator that emits one aggregated artifact across all symbols, -// the canonical thing a per-page generator cannot produce. -constexpr std::string_view luaIndex = R"LUA( -function generate(corpus, output) - local parts = {} - for _, sym in ipairs(corpus.symbols) do - parts[#parts + 1] = '{"name":"' .. sym.name .. '","id":"' .. sym._id .. '"}' - end - output.write("search-index.json", "[" .. table.concat(parts, ",") .. "]") -end -)LUA"; +// The JavaScript counterpart of `makeLuaGenerator`. A JS function holds only +// a weak reference to its interpreter, so - exactly as a corpus does for a +// `register_generator` function - the caller must keep the VM alive for as +// long as it intends to call the generator. `JsGenerator::keepAlive` does +// that; dropping it tears the interpreter down. (A Lua callable instead +// carries its own VM, so `makeLuaGenerator` needs no such companion.) +struct JsGenerator +{ + dom::Function generate; + js::Context keepAlive; +}; -// The same generator in JavaScript, using the global-function shape. -constexpr std::string_view jsIndex = R"JS( -function generate(corpus, output) { - var parts = []; - for (var i = 0; i < corpus.symbols.length; i++) { - var s = corpus.symbols[i]; - parts.push('{"name":"' + s.name + '","id":"' + s._id + '"}'); - } - output.write("search-index.json", "[" + parts.join(",") + "]"); +JsGenerator +makeJsGenerator(std::string_view src) +{ + js::Context ctx; + js::Scope scope(ctx); + BOOST_TEST(scope.script(std::string(src)).has_value()); + Expected fn = scope.getGlobal("generate"); + BOOST_TEST(fn.has_value()); + return JsGenerator{fn->getFunction(), ctx}; } -)JS"; - -constexpr std::string_view expectedJson = - R"([{"name":"foo","id":"0001"},{"name":"bar","id":"0002"}])"; } // (anon) @@ -218,248 +203,143 @@ struct ScriptGeneratorTest } // - // runLuaGenerator / runJsGenerator + // runScriptGenerator // - void - testLuaGenerator() + // Run `generate` over an empty stub corpus, writing into `outDir`, and + // return whether it succeeded. + static Expected + runOver( + dom::Function const& generate, + std::string_view outDir) { - ScopedTempDirectory td("mrdocs-scriptgen"); - BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.lua"); - writeFile(script, luaIndex); - std::string const outDir = files::appendPath(td.path(), "out"); - OutputSink sink(outDir); - - Expected result = runLuaGenerator( - makeCorpus(), script, sink, emptyObject(), emptyObject()); - BOOST_TEST(result.has_value()); - Expected got = - files::getFileText(files::appendPath(outDir, "search-index.json")); - BOOST_TEST(got.has_value()); - if (got) - { - BOOST_TEST(*got == expectedJson); - } + StubConfig config; + config.configObject.set("multipage", true); + StubCorpus corpus(config); + return runScriptGenerator(generate, "selftest", corpus, outDir); } void - testJsGenerator() + testLuaGeneratorWrites() { ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.js"); - writeFile(script, jsIndex); std::string const outDir = files::appendPath(td.path(), "out"); - OutputSink sink(outDir); - - Expected result = runJsGenerator( - makeCorpus(), script, sink, emptyObject(), emptyObject()); - BOOST_TEST(result.has_value()); + dom::Function gen = makeLuaGenerator(R"LUA( +function generate(ctx) + ctx.output.write("from-lua.txt", "hello from lua") +end +)LUA"); + BOOST_TEST(runOver(gen, outDir).has_value()); Expected got = - files::getFileText(files::appendPath(outDir, "search-index.json")); + files::getFileText(files::appendPath(outDir, "from-lua.txt")); BOOST_TEST(got.has_value()); if (got) { - BOOST_TEST(*got == expectedJson); + BOOST_TEST(*got == "hello from lua"); } } void - testLuaReadsMissingFieldAsNil() + testJsGeneratorWrites() { - // A symbol object without a `name` field: `get("name")` yields - // `Undefined`, which Lua must marshal as `nil` rather than abort. - // The global namespace has no name, so a real corpus hits this. - dom::Object noName; - noName.set("_id", std::string("0009")); - dom::Array symbols; - symbols.emplace_back(dom::Value(std::move(noName))); - dom::Object corpusObj; - corpusObj.set("symbols", std::move(symbols)); - dom::Value const corpus(std::move(corpusObj)); - ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.lua"); - writeFile(script, R"LUA( -function generate(corpus, output) - local s = corpus.symbols[1] - output.write("out.txt", "name=" .. (s.name or "NONE")) -end -)LUA"); std::string const outDir = files::appendPath(td.path(), "out"); - OutputSink sink(outDir); - - Expected result = runLuaGenerator( - corpus, script, sink, emptyObject(), emptyObject()); - BOOST_TEST(result.has_value()); + JsGenerator gen = makeJsGenerator(R"JS( +function generate(ctx) { + ctx.output.write("from-js.txt", "hello from js"); +} +)JS"); + BOOST_TEST(runOver(gen.generate, outDir).has_value()); Expected got = - files::getFileText(files::appendPath(outDir, "out.txt")); + files::getFileText(files::appendPath(outDir, "from-js.txt")); BOOST_TEST(got.has_value()); if (got) { - BOOST_TEST(*got == "name=NONE"); + BOOST_TEST(*got == "hello from js"); } } void - testMissingGenerateIsError() - { - ScopedTempDirectory td("mrdocs-scriptgen"); - BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "empty.lua"); - writeFile(script, "-- this script defines no generate function\n"); - OutputSink sink(files::appendPath(td.path(), "out")); - // A generator must define `generate`; its absence is an error. - BOOST_TEST(!runLuaGenerator( - makeCorpus(), script, sink, emptyObject(), emptyObject()) - .has_value()); - } - - void - testLuaReceivesConfigAndParams() + testGeneratorReceivesConfig() { ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.lua"); - writeFile(script, R"LUA( -function generate(corpus, output, config, params) - output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) + std::string const outDir = files::appendPath(td.path(), "out"); + // `ctx.config` is the resolved configuration; `runOver` sets + // `multipage` to true on it. + dom::Function gen = makeLuaGenerator(R"LUA( +function generate(ctx) + ctx.output.write("config.txt", tostring(ctx.config.multipage)) end )LUA"); - std::string const outDir = files::appendPath(td.path(), "out"); - OutputSink sink(outDir); - - dom::Object config; - config.set("multipage", true); - dom::Object params; - params.set("greeting", std::string("hi")); - Expected result = runLuaGenerator( - makeCorpus(), script, sink, - dom::Value(std::move(config)), dom::Value(std::move(params))); - BOOST_TEST(result.has_value()); + BOOST_TEST(runOver(gen, outDir).has_value()); Expected got = - files::getFileText(files::appendPath(outDir, "o.txt")); + files::getFileText(files::appendPath(outDir, "config.txt")); BOOST_TEST(got.has_value()); if (got) { - BOOST_TEST(*got == "true|hi"); + BOOST_TEST(*got == "true"); } } void - testJsReceivesConfigAndParams() + testGeneratorIteratesCorpus() { ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.js"); - writeFile(script, R"JS( -function generate(corpus, output, config, params) { - output.write("o.txt", String(config.multipage) + "|" + params.greeting); -} -)JS"); std::string const outDir = files::appendPath(td.path(), "out"); - OutputSink sink(outDir); - - dom::Object config; - config.set("multipage", true); - dom::Object params; - params.set("greeting", std::string("hi")); - Expected result = runJsGenerator( - makeCorpus(), script, sink, - dom::Value(std::move(config)), dom::Value(std::move(params))); - BOOST_TEST(result.has_value()); - Expected got = - files::getFileText(files::appendPath(outDir, "o.txt")); - BOOST_TEST(got.has_value()); - if (got) - { - BOOST_TEST(*got == "true|hi"); - } - } - - // The full `build()` path: `config` comes from - // `corpus.config.object()` and `params` from the generator's - // manifest, both reaching the script. - void - testBuildPassesConfigAndParams() - { - ScopedTempDirectory td("mrdocs-scriptgen-build"); - BOOST_TEST(td); - std::string const script = files::appendPath(td.path(), "g.lua"); - writeFile(script, R"LUA( -function generate(corpus, output, config, params) - output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) + // `ctx.corpus.symbols` is iterable; over the empty stub corpus it + // yields nothing, so the aggregated artifact is an empty list. + dom::Function gen = makeLuaGenerator(R"LUA( +function generate(ctx) + local parts = {} + for _, sym in ipairs(ctx.corpus.symbols) do + parts[#parts + 1] = sym.name + end + ctx.output.write("index.txt", "[" .. table.concat(parts, ",") .. "]") end )LUA"); - - StubConfig config; - config.configObject.set("multipage", true); - StubCorpus corpus(config); - - dom::Object params; - params.set("greeting", std::string("hi")); - ScriptGenerator gen("build-selftest", script, std::move(params)); - - std::string const outDir = files::appendPath(td.path(), "out"); - Expected result = gen.build(outDir, corpus); - BOOST_TEST(result.has_value()); + BOOST_TEST(runOver(gen, outDir).has_value()); Expected got = - files::getFileText(files::appendPath(outDir, "o.txt")); + files::getFileText(files::appendPath(outDir, "index.txt")); BOOST_TEST(got.has_value()); if (got) { - BOOST_TEST(*got == "true|hi"); + BOOST_TEST(*got == "[]"); } } - // - // discoverScriptGenerators - // - void - testDiscoveryRegistersScriptGenerator() + testWriteEscapeIsError() { - ScopedTempDirectory td("mrdocs-scriptgen-disc"); + ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - // Lay out /generator// with a script manifest. The id - // is unusual so it does not collide with the process-global - // registry shared across the test binary. - std::string const id = "mrdocs-script-generator-selftest"; - std::string const genDir = - files::appendPath(td.path(), "generator", id); - BOOST_TEST(files::createDirectory(genDir).has_value()); - writeFile( - files::appendPath(genDir, "mrdocs-generator.yml"), - "script: g.lua\n"); - writeFile(files::appendPath(genDir, "g.lua"), luaIndex); - - Config::Settings settings; - settings.addons = std::string(td.path()); - BOOST_TEST(discoverScriptGenerators(settings).has_value()); - BOOST_TEST(findGenerator(id) != nullptr); + std::string const outDir = files::appendPath(td.path(), "out"); + // A write that escapes the output directory fails in the sink, and + // the failure surfaces back through the script as a runner error. + dom::Function gen = makeLuaGenerator(R"LUA( +function generate(ctx) + ctx.output.write("../escaped.txt", "no") +end +)LUA"); + BOOST_TEST(!runOver(gen, outDir).has_value()); } void - testManifestParamsParsed() + testGeneratorErrorIsReported() { - ScopedTempDirectory td("mrdocs-scriptgen-manifest"); + ScopedTempDirectory td("mrdocs-scriptgen"); BOOST_TEST(td); - std::string const yml = - files::appendPath(td.path(), "mrdocs-generator.yml"); - writeFile(yml, "script: g.lua\nparams:\n greeting: hi\n"); - Expected manifest = loadGeneratorManifest(yml); - BOOST_TEST(manifest.has_value()); - if (manifest) - { - dom::Value const greeting = manifest->params.get("greeting"); - BOOST_TEST(greeting.isString()); - if (greeting.isString()) - { - BOOST_TEST(greeting.getString().get() == "hi"); - } - } + std::string const outDir = files::appendPath(td.path(), "out"); + // An error raised inside the generator is reported, not swallowed. + dom::Function gen = makeLuaGenerator(R"LUA( +function generate(ctx) + error("boom") +end +)LUA"); + BOOST_TEST(!runOver(gen, outDir).has_value()); } void @@ -468,15 +348,12 @@ end testSinkWritesUnderRoot(); testSinkRejectsAbsolutePath(); testSinkRejectsEscape(); - testLuaGenerator(); - testJsGenerator(); - testLuaReadsMissingFieldAsNil(); - testMissingGenerateIsError(); - testLuaReceivesConfigAndParams(); - testJsReceivesConfigAndParams(); - testBuildPassesConfigAndParams(); - testDiscoveryRegistersScriptGenerator(); - testManifestParamsParsed(); + testLuaGeneratorWrites(); + testJsGeneratorWrites(); + testGeneratorReceivesConfig(); + testGeneratorIteratesCorpus(); + testWriteEscapeIsError(); + testGeneratorErrorIsReported(); } }; diff --git a/src/tool/GenerateAction.cpp b/src/tool/GenerateAction.cpp index 50e148b8dc..3897d8a345 100644 --- a/src/tool/GenerateAction.cpp +++ b/src/tool/GenerateAction.cpp @@ -54,31 +54,18 @@ DoGenerateAction( // // -------------------------------------------------------------- // Each /generator// directory that ships a - // mrdocs-generator.yml is registered as an additional generator - // before the user-requested generator is looked up below. A manifest - // that declares `escape` rules is a data-driven Handlebars generator; - // a manifest that names a `script` is a script-driven generator. + // mrdocs-generator.yml with `escape` rules is registered as a + // data-driven Handlebars generator, ready alongside the built-ins + // when the requested generator is selected after extraction. A + // script-defined generator is declared differently: an extension + // registers it on the corpus while it is built, so it is resolved + // from the corpus rather than here. MRDOCS_TRY(hbs::discoverDataDrivenGenerators(config->settings())); - MRDOCS_TRY(script::discoverScriptGenerators(config->settings())); - // -------------------------------------------------------------- - // - // Load generator - // - // -------------------------------------------------------------- - auto& settings = config->settings(); - std::vector generators; - for (auto const& genId : settings.generator.values) - { - MRDOCS_TRY( - Generator const& generator, - findGenerator(genId), - formatError( - "the Generator \"{}\" was not found", - genId)); - generators.push_back(&generator); - } - MRDOCS_CHECK(!generators.empty(), "No generator was specified"); + Config::Settings const& settings = config->settings(); + MRDOCS_CHECK(settings.output, "The output path argument is missing"); + MRDOCS_CHECK( + !settings.generator.values.empty(), "No generator was specified"); // -------------------------------------------------------------- // @@ -124,17 +111,38 @@ DoGenerateAction( // Generate docs // // -------------------------------------------------------------- - // Normalize outputPath path - MRDOCS_CHECK(settings.output, "The output path argument is missing"); report::info("Generating docs"); - // Each generator decides how to resolve its own output (single vs - // multipage, directory vs file, existing target, and whether other - // generators share the output). GenerateAction does not have enough - // information to route per-generator output here, so it just builds - // each one against the configured output. - for (Generator const* generator : generators) + // Each configured generator shares the same output directory; + // GenerateAction does not route per-generator output, so each one + // resolves its own (single vs multipage, file vs directory) against + // it. For a given id a generator an extension registered on the + // corpus with register_generator wins over a built-in or data-driven + // one: it owns the whole emit through the script runner, while a + // registry generator renders the corpus page by page. + CorpusImpl const& corpusImpl = static_cast(*corpus); + for (std::string const& genId : settings.generator.values) { - MRDOCS_TRY(generator->build(*corpus)); + dom::Function const* scriptGenerator = + corpusImpl.findScriptGenerator(genId); + if (scriptGenerator != nullptr) + { + std::string const outputDir = files::normalizePath( + files::makeAbsolute( + corpus->config->output, + corpus->config->configDir())); + MRDOCS_TRY(script::runScriptGenerator( + *scriptGenerator, genId, *corpus, outputDir)); + } + else + { + MRDOCS_TRY( + Generator const& generator, + findGenerator(genId), + formatError( + "the Generator \"{}\" was not found", + genId)); + MRDOCS_TRY(generator.build(*corpus)); + } } // -------------------------------------------------------------- diff --git a/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js index 7de823947d..5f17eb3d34 100644 --- a/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js +++ b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js @@ -3,11 +3,11 @@ // transforms. The first renames every function; the second rewrites // its brief. Mirrors the lua-register-transform fixture on the JS path. -register_transform(function(corpus) +register_transform(function(ctx) { - for (var i = 0; i < corpus.symbols.length; ++i) + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { - var sym = corpus.symbols[i]; + var sym = ctx.corpus.symbols[i]; if (sym.kind === "function") { sym.name = "renamed_" + sym.name; @@ -15,11 +15,11 @@ register_transform(function(corpus) } }); -register_transform(function(corpus) +register_transform(function(ctx) { - for (var i = 0; i < corpus.symbols.length; ++i) + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { - var sym = corpus.symbols[i]; + var sym = ctx.corpus.symbols[i]; if (sym.kind === "function") { sym.doc = { diff --git a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js index 487a2e2c60..e831446a96 100644 --- a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js +++ b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js @@ -7,11 +7,11 @@ // proxy's `set` trap forwards each assignment into the live C++ // Symbol via reflection. -register_transform(function(corpus) +register_transform(function(ctx) { - for (var i = 0; i < corpus.symbols.length; ++i) + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { - var sym = corpus.symbols[i]; + var sym = ctx.corpus.symbols[i]; if (sym.kind === "function") { sym.name = "renamed_" + sym.name; diff --git a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua index 7b97f0dc58..f02b37ce71 100644 --- a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua +++ b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua @@ -3,8 +3,8 @@ -- doc-comment so the rendered output contains no doc-comment block -- for it. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.doc = nil end diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua index e97d1d1a66..1f7c89d132 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua @@ -5,8 +5,8 @@ -- so this script runs FIRST; its rename is overwritten by the -- supplemental's. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "from_primary" end diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua index b093586d87..f621aca30e 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua @@ -3,8 +3,8 @@ -- overwrites the primary root's, so this is the name that must -- appear in the rendered output. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "from_supplemental" end diff --git a/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua index 04c28f2540..6811e2dd2b 100644 --- a/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua +++ b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua @@ -3,16 +3,16 @@ -- transforms. The first renames every function; the second rewrites -- its brief. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "renamed_" .. sym.name end end end) -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.doc = { brief = { diff --git a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua index eb9c511133..44f2c8669a 100644 --- a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua +++ b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua @@ -3,11 +3,11 @@ -- nested-object write whose leaves are `Polymorphic` values -- selected by a kebab-case `kind` tag. -- --- corpus.symbols is a regular Lua sequence: 1-indexed, with `#` and +-- ctx.corpus.symbols is a regular Lua sequence: 1-indexed, with `#` and -- `ipairs`/`pairs` support. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "renamed_" .. sym.name sym.doc = { diff --git a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua index d1b84138f4..7d5e5f3d95 100644 --- a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua +++ b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua @@ -7,7 +7,7 @@ -- to the concept's documentation page): -- -- local awaitable_id = nil --- for _, s in ipairs(corpus.symbols) do +-- for _, s in ipairs(ctx.corpus.symbols) do -- if s.kind == "concept" and s.name == "Awaitable" then -- awaitable_id = s.id -- break @@ -26,8 +26,8 @@ -- This fixture omits the lookup and uses a bare identifier so the -- test is self-contained. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" and sym.name == "target_function" then sym.returnType = { kind = "named", diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js index 133afd46c2..c210347dd9 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js @@ -7,9 +7,9 @@ // sentence on every declaration. Anything an author already wrote is // preserved: only missing fields are filled in. -register_transform(function(corpus) { - for (var i = 0; i < corpus.symbols.length; ++i) { - var sym = corpus.symbols[i]; +register_transform(function(ctx) { + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { + var sym = ctx.corpus.symbols[i]; if (sym.kind === "function" && sym.name.indexOf("is_") === 0) { if (!sym.doc) { sym.doc = {}; } diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua index 6834b3d2cb..004f7ac651 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua @@ -7,8 +7,8 @@ -- sentence on every declaration. Anything an author already wrote is -- preserved: only missing fields are filled in. -register_transform(function(corpus) - for _, sym in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" and sym.name:sub(1, 3) == "is_" then if not sym.doc then sym.doc = {} end diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js index 1e57a7b4d3..95efe15417 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js @@ -1,3 +1,3 @@ -register_transform(function(corpus) { - // walk corpus.symbols, assign to the fields you want to change +register_transform(function(ctx) { + // walk ctx.corpus.symbols, assign to the fields you want to change }); diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua index 6869ab4abb..f18b6f8a19 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua @@ -1,3 +1,3 @@ -register_transform(function(corpus) - -- walk corpus.symbols, assign to the fields you want to change +register_transform(function(ctx) + -- walk ctx.corpus.symbols, assign to the fields you want to change end) diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js index b298e8aa22..2c662af1a6 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js @@ -15,12 +15,12 @@ function partnerName(name) { return partner; } -register_transform(function(corpus) { - for (var i = 0; i < corpus.symbols.length; ++i) { - var s = corpus.symbols[i]; +register_transform(function(ctx) { + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { + var s = ctx.corpus.symbols[i]; if (s.kind === "function") { var pname = partnerName(s.name); - var partner = pname ? corpus.lookup(pname) : null; + var partner = pname ? ctx.corpus.lookup(pname) : null; if (partner) { s.doc = { sees: [{ diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua index d4a4efdabf..ba308e3ba8 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua @@ -15,12 +15,12 @@ local function partnerName(name) return partner end -register_transform(function(corpus) - for _, s in ipairs(corpus.symbols) do +register_transform(function(ctx) + for _, s in ipairs(ctx.corpus.symbols) do if s.kind == "function" then local pname = partnerName(s.name) if pname then - local partner = corpus.lookup(pname) + local partner = ctx.corpus.lookup(pname) if partner then s.doc = { sees = { diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js index 990a1d2c8d..77c14e445a 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js @@ -1,10 +1,10 @@ // Print the inheritance subtree rooted at a named class. // -// `corpus.lookup(name)` resolves the entry point once. From there the +// `ctx.corpus.lookup(name)` resolves the entry point once. From there the // only way down the tree is by id: each record carries a `derived` -// list of base16 ids, and `corpus.get(id)` turns each id back into a +// list of base16 ids, and `ctx.corpus.get(id)` turns each id back into a // live symbol proxy. The recursion walks the graph that single-pass -// iteration over `corpus.symbols` cannot reconstruct. +// iteration over `ctx.corpus.symbols` cannot reconstruct. function listSubclasses(corpus, sym, indent) { for (var i = 0; i < sym.derived.length; ++i) { @@ -16,10 +16,10 @@ function listSubclasses(corpus, sym, indent) { } } -register_transform(function(corpus) { - var base = corpus.lookup("Shape"); +register_transform(function(ctx) { + var base = ctx.corpus.lookup("Shape"); if (base) { console.log(base.name); - listSubclasses(corpus, base, " "); + listSubclasses(ctx.corpus, base, " "); } }); diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua index b2b53465a7..89912a8c21 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua @@ -1,10 +1,10 @@ -- Print the inheritance subtree rooted at a named class. -- --- `corpus.lookup(name)` resolves the entry point once. From there the +-- `ctx.corpus.lookup(name)` resolves the entry point once. From there the -- only way down the tree is by id: each record carries a `derived` --- list of base16 ids, and `corpus.get(id)` turns each id back into a +-- list of base16 ids, and `ctx.corpus.get(id)` turns each id back into a -- live symbol proxy. The recursion walks the graph that single-pass --- iteration over `corpus.symbols` cannot reconstruct. +-- iteration over `ctx.corpus.symbols` cannot reconstruct. local function listSubclasses(corpus, sym, indent) for _, id in ipairs(sym.derived) do @@ -16,10 +16,10 @@ local function listSubclasses(corpus, sym, indent) end end -register_transform(function(corpus) - local base = corpus.lookup("Shape") +register_transform(function(ctx) + local base = ctx.corpus.lookup("Shape") if base then print(base.name) - listSubclasses(corpus, base, " ") + listSubclasses(ctx.corpus, base, " ") end end) From 7e68b8b81e70acb52bc4ef9e5b6d064cbee48be1 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Fri, 12 Jun 2026 16:35:11 +0200 Subject: [PATCH 09/12] docs: merge generators into the extensions page The corpus-extensions page and the script-driven-generators page documented two halves of one feature: scripts under addons/extensions that declare transforms and generators through the `register_*` hooks. This folds the generator material into the extensions page, which now covers both hooks and the shared `ctx` object, and removes the separate page. --- docs/modules/ROOT/nav.adoc | 3 +- .../pages/extensions/corpus-extensions.adoc | 73 +++++++++++++++---- .../extensions/data-driven-generators.adoc | 2 +- .../extensions/script-driven-generators.adoc | 70 ------------------ 4 files changed, 60 insertions(+), 88 deletions(-) delete mode 100644 docs/modules/ROOT/pages/extensions/script-driven-generators.adoc diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 109e8db934..37d1d2fc14 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -23,10 +23,9 @@ ** xref:generators/adoc.adoc[AsciiDoc] ** xref:generators/xml.adoc[XML] * Extensions -** xref:extensions/corpus-extensions.adoc[Corpus Extensions] +** xref:extensions/corpus-extensions.adoc[Extensions] ** xref:extensions/handlebars-extensions.adoc[Handlebars Extensions] ** xref:extensions/data-driven-generators.adoc[Data-Driven Generators] -** xref:extensions/script-driven-generators.adoc[Script-Driven Generators] ** xref:extensions/antora.adoc[Antora Extensions] ** xref:extensions/as-library.adoc[Mr.Docs as a Library] ** xref:extensions/dom-reference.adoc[DOM Reference] diff --git a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc index 625fd4ae86..8a49e4f3e5 100644 --- a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc +++ b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc @@ -1,6 +1,11 @@ -= Corpus Extensions += Extensions -Use an extension to rewrite metadata across many symbols at once: backfill briefs from a naming convention, tag symbols by group, mark generated code as "see below" in the output. Extensions run between extraction and rendering, so every generator sees the change. +An extension is a Lua or JavaScript script that runs as part of a Mr.Docs build and shapes the documentation in a way templates alone cannot. There are two kinds, and a single script may declare either or both: + +* A *corpus transform* rewrites metadata across many symbols at once: backfill briefs from a naming convention, tag symbols by group, mark generated code as "see below" in the output. Transforms run between extraction and rendering, so every generator sees the change. +* A *generator* owns the whole emit: instead of rendering one page per symbol, it traverses the corpus and writes whatever files it wants. That lets it produce shapes the per-page generators cannot, such as a single artifact aggregated across every symbol. + +A script declares a transform with `register_transform(fn)` and a generator with `register_generator(id, fn)`. Either way the registered function receives a single context object, `ctx`. == Languages and addon locations @@ -14,9 +19,21 @@ Both scripting languages reach the same `mrdocs` API. The choice is a trade-off, * *Lua* is the language designed to be embedded. Mr.Docs links it whole, so scripts have access to the entire Lua standard library (`string`, `table`, `math`, `io`, `os`) and can do filesystem work or text munging without leaving the script. The cost is that fewer people read Lua at a glance than read JavaScript. If you're already familiar with Lua, it is the more powerful choice. ==== -== Accessing the corpus +== The context object + +Every registered function receives one argument, `ctx`: + +* `ctx.corpus` is the corpus. Its `symbols` field is a flat array of every extracted symbol; what a script can do with those symbols depends on the kind of extension, described in the sections below. +* `ctx.config` is the resolved configuration: the same object the templates receive, holding every value from the config file and the command line. See xref:configuration/reference.adoc[the configuration reference] for the available keys. + +A generator's `ctx` carries one more field, `ctx.output`, covered under <>. -A script extends Mr.Docs by calling `register_transform(fn)` with a function that takes the corpus. Mr.Docs invokes each registered function once, in registration order, with a flat view of the corpus. A script can register several transforms, or none at all; if it registers nothing, Mr.Docs warns that the script had no effect and moves on. +A script can register any number of transforms and generators, or none at all. If it registers nothing, Mr.Docs warns that the script had no effect and moves on. + +[#corpus-transforms] +== Corpus transforms + +A transform is a function passed to `register_transform`. Mr.Docs invokes each registered function once, in registration order, with the `ctx` object. A flat view of the corpus reaches the script through `ctx.corpus`. [tabs] ====== @@ -37,7 +54,7 @@ include::example$snippets/extensions/entry-point/addons/extensions/noop.lua[] ---- ====== -The `corpus` object provides functions that expose the symbol graph. The `corpus.symbols` field is a flat array containing every extracted symbol. Scripts that need queries like "all members of `X`" simply walk the array and filter. +The `ctx.corpus` object provides functions that expose the symbol graph. The `ctx.corpus.symbols` field is a flat array containing every extracted symbol. Scripts that need queries like "all members of `X`" simply walk the array and filter. For instance, the following scripts count the symbols of each kind and report the totals at the end of the run: @@ -48,10 +65,10 @@ JavaScript:: .`addons/extensions/count_by_kind.js` [source,javascript] ---- -register_transform(function(corpus) { +register_transform(function(ctx) { var counts = {}; - for (var i = 0; i < corpus.symbols.length; ++i) { - var k = corpus.symbols[i].kind; + for (var i = 0; i < ctx.corpus.symbols.length; ++i) { + var k = ctx.corpus.symbols[i].kind; counts[k] = (counts[k] || 0) + 1; } for (var k in counts) { @@ -65,9 +82,9 @@ Lua:: .`addons/extensions/count_by_kind.lua` [source,lua] ---- -register_transform(function(corpus) +register_transform(function(ctx) local counts = {} - for _, sym in ipairs(corpus.symbols) do + for _, sym in ipairs(ctx.corpus.symbols) do counts[sym.kind] = (counts[sym.kind] or 0) + 1 end for k, v in pairs(counts) do @@ -77,12 +94,12 @@ end) ---- ====== -Each entry in `corpus.symbols` is a proxy for a live Mr.Docs symbol. The fields of each object are at xref:extensions/dom-reference.adoc[the DOM reference]. +Each entry in `ctx.corpus.symbols` is a proxy for a live Mr.Docs symbol. The fields of each object are at xref:extensions/dom-reference.adoc[the DOM reference]. When a script knows a symbol's id and needs to act on that one symbol: -* `corpus.get(id)` returns the proxy for it or `null` if the id is unknown -* `corpus.lookup(name)` does a global-namespace name lookup and returns the proxy (or `null`) +* `ctx.corpus.get(id)` returns the proxy for it or `null` if the id is unknown +* `ctx.corpus.lookup(name)` does a global-namespace name lookup and returns the proxy (or `null`) .`subclass-tree.cpp` [source,cpp] @@ -122,7 +139,7 @@ Shape ---- [[modifying-the-corpus]] -== Modifying the corpus +=== Modifying the corpus Scripts modify the corpus by assigning to fields on a symbol proxy. Each assignment lands directly in the underlying Mr.Docs symbol. The runtime validates each assignment and raises an exception on an invalid value. An uncaught error in an extension aborts the build and includes the script's path and the error message. @@ -160,7 +177,7 @@ Every `is_foo_bar` function then ships with "Returns true if foo bar." Authors o include::example$snippets/extensions/brief-from-name/brief-from-name.adoc[tags=!footer] ======== -== Cross-linking Symbols +=== Cross-linking Symbols When the value being written needs to reference another symbol, the second symbol's `id` is what makes the link clickable in the rendered output rather than a plain string. @@ -203,3 +220,29 @@ The two-pass shape (index, then look up) is the idiom whenever a write needs to Notice in this example that `s.doc.sees` receives a list of polymorphic types that represent a paragraph in `s.doc.sees.children`. These polymorphic objects accept an object with a `kind:` selector that names the concrete derived class to construct. ==== +[[generators]] +== Generators + +The built-in generators render one page per symbol. When you need a different output structure, e.g. one file per namespace, or a single artifact aggregated across every symbol such as a search index, that page-per-symbol shape cannot express it. A generator hands the whole emit to the script instead: it traverses the corpus and writes whatever files it wants. No C++ and no templates are involved. + +A script declares a generator with `register_generator(id, fn)`. The `id` is the name you select on the command line with `--generator=`; a registered generator takes precedence over a built-in of the same name. Selecting a generator is a request for output, so its function does the work directly, the page-per-symbol fallback the built-ins provide does not apply. + +The function receives the same `ctx` a transform does, plus `ctx.output`: + +* `ctx.corpus.symbols` is the array of every symbol. Each carries the same fields the template and helper layers see, plus a flat `_id` string suitable as a stable per-symbol URL fragment. A generator reads the corpus rather than mutating it. +* `ctx.config` is the resolved configuration, as above. +* `ctx.output.write(relativePath, contents)` writes one file under the configured output directory, which is the path specified with `--output` on the command line, or with the `output` key in the config file; that's the same location the built-in generators write to. The path is resolved relative to that directory and may not escape it; an absolute path or one that climbs above the output directory is rejected. Parent directories are created as needed. + +Because the script owns the output, it also owns what a per-page generator would otherwise do for it: the URLs it emits, and any escaping of the content it writes. Mr.Docs does not apply an escape map to a generator's output. + +=== Example: a search index + +A complete, runnable example lives at `examples/generators/script-driven/search-index/`. The extension declares a `search-index` generator that emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce: + +.`addons/extensions/search_index.lua` +[source,lua] +---- +include::example$script-driven-generators/search-index/addons/extensions/search_index.lua[] +---- + +Select it with `--generator=search-index`; it writes search-index.json into the output directory. diff --git a/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc b/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc index 479e5c1cf9..e64a40affa 100644 --- a/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc +++ b/docs/modules/ROOT/pages/extensions/data-driven-generators.adoc @@ -246,6 +246,6 @@ include::example$data-driven-generators/tex/simple.tex[] ---- ====== -To build the output structure yourself, e.g. one file per namespace or a single aggregated artifact like a search index, hand the whole emit to a script instead of rendering one page per symbol. See xref:extensions/script-driven-generators.adoc[Script-driven generators]. +To build the output structure yourself, e.g. one file per namespace or a single aggregated artifact like a search index, hand the whole emit to a script instead of rendering one page per symbol. See the xref:extensions/corpus-extensions.adoc#generators[generators] section of the extensions page. diff --git a/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc b/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc deleted file mode 100644 index 90b14d633f..0000000000 --- a/docs/modules/ROOT/pages/extensions/script-driven-generators.adoc +++ /dev/null @@ -1,70 +0,0 @@ -= Script-driven generators - -A data-driven generator renders one page per symbol from templates. When you need a different output structure, e.g. one file per namespace, or a single artifact aggregated across every symbol, such as a search index, a template generator cannot express it, because the page-per-symbol shape is fixed by the host. A script-driven generator hands the whole emit to a Lua or JavaScript script, which traverses the corpus and writes whatever files it wants. No C++ and no templates are involved. - -A generator directory is script-driven when its mrdocs-generator.yml names an entry script: - -[source,yaml] ----- -script: generator.lua ----- - -The `script` key holds a path to a Lua (.lua) or JavaScript (.js) file, relative to the generator directory. Naming a script is what distinguishes the two flavors: a manifest with a `script` key is script-driven, otherwise the directory is a data-driven (template) generator. As with template generators, the directory name is the generator id you select with `--generator`. - -== The `generate` entry point - -The script defines a single entry point, a function named `generate`: - -[source,lua] ----- -function generate(corpus, output, config, params) - -- ... -end ----- - -`corpus.symbols` is the array of every symbol. Each symbol carries the same fields the template and helper layers see, plus a flat `_id` string suitable as a stable per-symbol URL fragment. - -`output.write(relativePath, contents)` writes one file under the configured output directory, which is the path specified with `--output` on the command line, or with the `output` key in the config file; that's the same location the built-in generators write to. The path is resolved relative to that directory and may not escape it; an absolute path or one that climbs above the output directory is rejected. Parent directories are created as needed. - -Because the script owns the output, it also owns what a per-page generator would otherwise do for it: the URLs it emits, and any escaping of the content it writes. The host does not apply an escape map to a script-driven generator's output. - -`config` is the resolved configuration: the same object the templates receive, holding every value from the config file and the command line. See xref:configuration/reference.adoc[the configuration reference] for the available keys. - -`params` is this generator's own parameters, read from the optional `params:` mapping in its mrdocs-generator.yml. A scalar value is a string (a script coerces numbers or booleans itself); nested mappings and sequences become objects and arrays. It is an empty object when the manifest declares no parameters. For example: - -[source,yaml] ----- -script: generator.lua -params: - title: API Reference ----- - -makes `params.title` available to the script. - -`config` and `params` are trailing arguments, so a generator that needs neither can omit them, and use `function generate(corpus, output)`. - -Both Lua and JavaScript look up `generate` as a global function, so a generator must define one; a value the script returns is not used. Requiring the named global keeps one convention across the two languages and leaves room for a script to expose more than one named entry point later. - -Unlike a corpus-transform extension, whose hook is optional, a generator must define a `generate` function: selecting the generator is a request for output, so a missing entry point is an error. - -== Example: a search index - -A complete, runnable example lives at `examples/generators/script-driven/search-index/`. It emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce. - -The manifest names the script: - -.`addons/generator/search-index/mrdocs-generator.yml` -[source,yaml] ----- -include::example$script-driven-generators/search-index/addons/generator/search-index/mrdocs-generator.yml[] ----- - -The script itself: - -.`addons/generator/search-index/generate.lua` -[source,lua] ----- -include::example$script-driven-generators/search-index/addons/generator/search-index/generate.lua[] ----- - -Select it with `--generator=search-index`; it writes search-index.json into the output directory. From 2094838918d2c586bce83657ff34245723f4c324 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Fri, 12 Jun 2026 18:00:02 +0200 Subject: [PATCH 10/12] test: cover register_generator declaration and lookup --- src/test/lib/Gen/script/ScriptGenerator.cpp | 96 +++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/test/lib/Gen/script/ScriptGenerator.cpp b/src/test/lib/Gen/script/ScriptGenerator.cpp index 3e8a46c42e..d54aabe703 100644 --- a/src/test/lib/Gen/script/ScriptGenerator.cpp +++ b/src/test/lib/Gen/script/ScriptGenerator.cpp @@ -8,6 +8,10 @@ // Official repository: https://github.com/cppalliance/mrdocs // +#include +#include +#include +#include #include #include #include @@ -19,6 +23,9 @@ #include #include #include +#include +#include +#include #include #include @@ -156,6 +163,35 @@ makeJsGenerator(std::string_view src) return JsGenerator{fn->getFunction(), ctx}; } +// Write `content` verbatim to `path`. Pre-existing files are truncated. +void +writeFile(std::string_view path, std::string_view content) +{ + std::ofstream os(std::string{path}, std::ios::binary | std::ios::trunc); + os.write(content.data(), + static_cast(content.size())); +} + +// A generator function that ignores its argument and returns `value`, so two +// registrations can be told apart by what the resolved one returns. +dom::Function +makeConstGenerator(std::int64_t value) +{ + return dom::makeVariadicInvocable( + [value](dom::Array const&) -> Expected + { + return dom::Value(value); + }); +} + +// An empty in-memory configuration. The ThreadPool is stored by reference, +// so the caller must keep it alive at least as long as the config. +std::shared_ptr +makeConfig(ThreadPool& pool) +{ + return std::make_shared(ConfigImpl::access_token{}, pool); +} + } // (anon) struct ScriptGeneratorTest @@ -342,6 +378,63 @@ end BOOST_TEST(!runOver(gen, outDir).has_value()); } + // + // register_generator: corpus host and the script bindings + // + + void + testHostKeepsFirstRegistration() + { + ThreadPool pool; + CorpusImpl corpus(makeConfig(pool)); + + // The first registration of an id wins; a later one is ignored. + corpus.registerScriptGenerator("a", makeConstGenerator(1)); + corpus.registerScriptGenerator("a", makeConstGenerator(2)); + + BOOST_TEST(corpus.findScriptGenerator("missing") == nullptr); + dom::Function const* found = corpus.findScriptGenerator("a"); + BOOST_TEST(found != nullptr); + if (found) + { + Expected got = found->try_invoke(dom::Value()); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(got->getInteger() == 1); + } + } + } + + void + testRegisterGeneratorLua() + { + ThreadPool pool; + CorpusImpl corpus(makeConfig(pool)); + ScopedTempDirectory td("mrdocs-reggen"); + BOOST_TEST(td); + // A Lua extension that registers a generator leaves it findable on + // the corpus by its id, and does not warn about registering nothing. + std::string const script = files::appendPath(td.path(), "gen.lua"); + writeFile(script, "register_generator(\"my-gen\", function(ctx) end)\n"); + BOOST_TEST(runOneLuaExtension(corpus, script).has_value()); + BOOST_TEST(corpus.findScriptGenerator("my-gen") != nullptr); + } + + void + testRegisterGeneratorJs() + { + ThreadPool pool; + CorpusImpl corpus(makeConfig(pool)); + ScopedTempDirectory td("mrdocs-reggen"); + BOOST_TEST(td); + // The JavaScript counterpart. + std::string const script = files::appendPath(td.path(), "gen.js"); + writeFile(script, "register_generator(\"my-gen\", function(ctx) {});\n"); + BOOST_TEST(runOneJsExtension(corpus, script).has_value()); + BOOST_TEST(corpus.findScriptGenerator("my-gen") != nullptr); + } + void run() { @@ -354,6 +447,9 @@ end testGeneratorIteratesCorpus(); testWriteEscapeIsError(); testGeneratorErrorIsReported(); + testHostKeepsFirstRegistration(); + testRegisterGeneratorLua(); + testRegisterGeneratorJs(); } }; From d6fc2cdddf9badd76819ac447decd5f1489b8aab Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Tue, 16 Jun 2026 16:08:17 +0200 Subject: [PATCH 11/12] fix: destroy C++ locals before raising a Lua error `luaL_error` and `lua_error` report a failure by longjmp-ing to the enclosing `lua_pcall`. The jump runs no C++ destructors on the frames it unwinds, so any local with a non-trivial destructor still alive at the point of the raise is leaked, and the unwind itself is undefined. Two of the Lua binding functions raised while such a local was live. The one invoked when a script assigns to a `dom::Object` field held the value being assigned and raised from inside a `catch`. The one invoked when a script calls a `dom::Function` held the argument `dom::Array`, the call result, and the function being invoked. Stage the outcome on the Lua stack first - push the result value, or build a location-prefixed message with `luaL_where` and `lua_concat` - then close the scope so every C++ local is destroyed, and only then raise. Lua copies a pushed string, so the message outlives the destructors. --- src/lib/Support/Lua.cpp | 65 +++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/src/lib/Support/Lua.cpp b/src/lib/Support/Lua.cpp index 4c7b45c2b7..38b3289cd3 100644 --- a/src/lib/Support/Lua.cpp +++ b/src/lib/Support/Lua.cpp @@ -598,18 +598,27 @@ domObject_push_metatable( [](lua_State* L) { Access A(L); - auto& obj = domObject_get(A, 1); - auto key = luaM_getstring(A, 2); - dom::Value value = luaValueToDom(L, 3); - try - { - obj.set(key, std::move(value)); - } - catch (std::exception const& ex) + // `lua_error` longjmps to the enclosing `pcall` and skips any + // pending C++ destructor on this frame, so scope the locals and + // stage the error message before raising. + bool raised = false; { - return luaL_error(L, "%s", ex.what()); + auto& obj = domObject_get(A, 1); + auto key = luaM_getstring(A, 2); + dom::Value value = luaValueToDom(L, 3); + try + { + obj.set(key, std::move(value)); + } + catch (std::exception const& ex) + { + luaL_where(A, 1); + lua_pushstring(A, ex.what()); + lua_concat(A, 2); + raised = true; + } } - return 0; + return raised ? lua_error(A) : 0; }); lua_settable(A, -3); @@ -740,20 +749,32 @@ domFunction_push_metatable( { Access A(L); int const top = lua_gettop(A); - dom::Array args; - for (int i = 2; i <= top; ++i) + // `lua_error` longjmps to the enclosing `pcall` and skips any + // pending C++ destructor on this frame, so stage the outcome - + // the result value, or a location-prefixed error message, on the + // Lua stack - and let every local here be destroyed before + // raising. + bool raised = false; { - args.push_back(luaValueToDom(L, i)); - } - dom::Function fn = domFunction_get(A, 1); - Expected result = fn.call(args); - if (! result) - { - return luaL_error(L, "%s", - result.error().reason().c_str()); + dom::Array args; + for (int i = 2; i <= top; ++i) + { + args.push_back(luaValueToDom(L, i)); + } + Expected result = domFunction_get(A, 1).call(args); + if (result) + { + domValue_push(A, *result); + } + else + { + luaL_where(A, 1); + lua_pushstring(A, result.error().reason().c_str()); + lua_concat(A, 2); + raised = true; + } } - domValue_push(A, *result); - return 1; + return raised ? lua_error(A) : 1; }); lua_settable(A, -3); From 0ba2b4d66904bee9015e69f2692de76b49411aed Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 17 Jun 2026 10:36:26 +0200 Subject: [PATCH 12/12] refactor(extensions): register scripts through a `mrdocs` object, not free functions `register_transform` and `register_generator` were bare globals in the extension environment. This moves them onto a `mrdocs` global object, the way `console` is already provided, so a script writes `mrdocs.register_transform(fn)` / `mrdocs.register_generator(id, fn)`. Among Lua-embedding applications that expose a "register an extension" call, a host namespace object is the prevailing convention - darktable uses `dt.register_*`, Aegisub `aegisub.register_*`, Redis `redis.register_function` - while bare globals are the exception. Note that the per-invocation `ctx` (`ctx.corpus`, `ctx.output`, `ctx.config`) is unchanged: only load-time declarations go through `mrdocs`. --- .../pages/extensions/corpus-extensions.adoc | 10 +++---- docs/mrdocs.schema.json | 2 +- .../addons/extensions/search_index.lua | 6 ++-- src/lib/ConfigOptions.json | 2 +- src/lib/CorpusImpl.hpp | 8 ++--- src/lib/Extensions/JsBinding.cpp | 28 ++++++++++------- src/lib/Extensions/JsBinding.hpp | 9 +++--- src/lib/Extensions/LuaBinding.cpp | 30 +++++++++++-------- src/lib/Extensions/LuaBinding.hpp | 9 +++--- src/lib/Extensions/RunExtensions.hpp | 2 +- src/lib/Gen/script/ScriptGenerator.hpp | 2 +- src/lib/Support/JavaScript.cpp | 9 +++--- src/test/lib/Gen/script/ScriptGenerator.cpp | 19 ++++++------ src/tool/GenerateAction.cpp | 2 +- .../addons/extensions/transforms.js | 6 ++-- .../js-set-name/addons/extensions/rename.js | 2 +- .../lua-clear-doc/addons/extensions/clear.lua | 2 +- .../addons/primary/extensions/zzz-primary.lua | 2 +- .../extensions/aaa-supplemental.lua | 2 +- .../addons/extensions/transforms.lua | 6 ++-- .../lua-set-name/addons/extensions/rename.lua | 2 +- .../addons/extensions/replace_return.lua | 2 +- .../addons/extensions/brief_from_name.js | 2 +- .../addons/extensions/brief_from_name.lua | 2 +- .../entry-point/addons/extensions/noop.js | 2 +- .../entry-point/addons/extensions/noop.lua | 2 +- .../addons/extensions/parse_format_relates.js | 2 +- .../extensions/parse_format_relates.lua | 2 +- .../addons/extensions/subclass_tree.js | 2 +- .../addons/extensions/subclass_tree.lua | 2 +- 30 files changed, 96 insertions(+), 82 deletions(-) diff --git a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc index 8a49e4f3e5..9d11346378 100644 --- a/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc +++ b/docs/modules/ROOT/pages/extensions/corpus-extensions.adoc @@ -5,7 +5,7 @@ An extension is a Lua or JavaScript script that runs as part of a Mr.Docs build * A *corpus transform* rewrites metadata across many symbols at once: backfill briefs from a naming convention, tag symbols by group, mark generated code as "see below" in the output. Transforms run between extraction and rendering, so every generator sees the change. * A *generator* owns the whole emit: instead of rendering one page per symbol, it traverses the corpus and writes whatever files it wants. That lets it produce shapes the per-page generators cannot, such as a single artifact aggregated across every symbol. -A script declares a transform with `register_transform(fn)` and a generator with `register_generator(id, fn)`. Either way the registered function receives a single context object, `ctx`. +A script declares a transform with `mrdocs.register_transform(fn)` and a generator with `mrdocs.register_generator(id, fn)`. Either way the registered function receives a single context object, `ctx`. == Languages and addon locations @@ -33,7 +33,7 @@ A script can register any number of transforms and generators, or none at all. I [#corpus-transforms] == Corpus transforms -A transform is a function passed to `register_transform`. Mr.Docs invokes each registered function once, in registration order, with the `ctx` object. A flat view of the corpus reaches the script through `ctx.corpus`. +A transform is a function passed to `mrdocs.register_transform`. Mr.Docs invokes each registered function once, in registration order, with the `ctx` object. A flat view of the corpus reaches the script through `ctx.corpus`. [tabs] ====== @@ -65,7 +65,7 @@ JavaScript:: .`addons/extensions/count_by_kind.js` [source,javascript] ---- -register_transform(function(ctx) { +mrdocs.register_transform(function(ctx) { var counts = {}; for (var i = 0; i < ctx.corpus.symbols.length; ++i) { var k = ctx.corpus.symbols[i].kind; @@ -82,7 +82,7 @@ Lua:: .`addons/extensions/count_by_kind.lua` [source,lua] ---- -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) local counts = {} for _, sym in ipairs(ctx.corpus.symbols) do counts[sym.kind] = (counts[sym.kind] or 0) + 1 @@ -225,7 +225,7 @@ Notice in this example that `s.doc.sees` receives a list of polymorphic types th The built-in generators render one page per symbol. When you need a different output structure, e.g. one file per namespace, or a single artifact aggregated across every symbol such as a search index, that page-per-symbol shape cannot express it. A generator hands the whole emit to the script instead: it traverses the corpus and writes whatever files it wants. No C++ and no templates are involved. -A script declares a generator with `register_generator(id, fn)`. The `id` is the name you select on the command line with `--generator=`; a registered generator takes precedence over a built-in of the same name. Selecting a generator is a request for output, so its function does the work directly, the page-per-symbol fallback the built-ins provide does not apply. +A script declares a generator with `mrdocs.register_generator(id, fn)`. The `id` is the name you select on the command line with `--generator=`; a registered generator takes precedence over a built-in of the same name. Selecting a generator is a request for output, so its function does the work directly, the page-per-symbol fallback the built-ins provide does not apply. The function receives the same `ctx` a transform does, plus `ctx.output`: diff --git a/docs/mrdocs.schema.json b/docs/mrdocs.schema.json index 6dca45d2dc..95e1ca3ff7 100644 --- a/docs/mrdocs.schema.json +++ b/docs/mrdocs.schema.json @@ -282,7 +282,7 @@ "default": [ "html" ], - "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `mrdocs.register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "title": "Generator(s) used to create the documentation" }, "global-namespace-index": { diff --git a/examples/generators/script-driven/search-index/addons/extensions/search_index.lua b/examples/generators/script-driven/search-index/addons/extensions/search_index.lua index 5bea13fb69..341b7919e4 100644 --- a/examples/generators/script-driven/search-index/addons/extensions/search_index.lua +++ b/examples/generators/script-driven/search-index/addons/extensions/search_index.lua @@ -2,8 +2,8 @@ -- aggregates every symbol into a single search-index.json, the kind of -- artifact the per-page generators cannot produce. -- --- `register_generator(id, fn)` declares it next to any --- `register_transform` a script might also declare; selecting +-- `mrdocs.register_generator(id, fn)` declares it next to any +-- `mrdocs.register_transform` a script might also declare; selecting -- `generator: ` runs `fn` with one `ctx`. `ctx.corpus.symbols` is -- every symbol (each tagged with a flat `_id` so the generator can form -- stable per-symbol URLs) and `ctx.output.write` emits files under the @@ -15,7 +15,7 @@ local function json_string(s) return '"' .. s .. '"' end -register_generator("search-index", function(ctx) +mrdocs.register_generator("search-index", function(ctx) local entries = {} for _, sym in ipairs(ctx.corpus.symbols) do local name = sym.name or "" diff --git a/src/lib/ConfigOptions.json b/src/lib/ConfigOptions.json index 5ac8b6788e..7cbcb53952 100644 --- a/src/lib/ConfigOptions.json +++ b/src/lib/ConfigOptions.json @@ -443,7 +443,7 @@ { "name": "generator", "brief": "Generator(s) used to create the documentation", - "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", + "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, `xml`, and `noop` (which extracts but writes no output, useful for checking extraction warnings); data-driven generators can be added by dropping a template folder under /generator//; a script-driven generator is declared by an extension with `mrdocs.register_generator` and produces the output itself. This option accepts a single generator (`generator: xml`), a list (`generator: [xml, adoc]`), or a comma-separated string (`generator: \"xml,adoc\"`); when more than one is given the documentation is produced once per generator.", "type": "string-list", "default": ["html"] }, diff --git a/src/lib/CorpusImpl.hpp b/src/lib/CorpusImpl.hpp index a05295d9ed..091d36d03c 100644 --- a/src/lib/CorpusImpl.hpp +++ b/src/lib/CorpusImpl.hpp @@ -62,7 +62,7 @@ class CorpusImpl final : public Corpus std::map> lookupCache_; // Output generators an extension script defined via - // `register_generator(id, fn)`. Each `fn` is a `dom::Function` that + // `mrdocs.register_generator(id, fn)`. Each `fn` is a `dom::Function` that // stays runnable until this corpus is destroyed (after extensions run, // when a generator is selected). Its scripting VM is kept alive either // by the function itself or by a matching entry in @@ -217,8 +217,8 @@ class CorpusImpl final : public Corpus /** Register a script-defined output generator. - Called from an extension's `register_generator(id, fn)`. The first - registration of a given id wins; later ones are ignored. + Called from an extension's `mrdocs.register_generator(id, fn)`. + The first registration of a given id wins; later ones are ignored. */ void registerScriptGenerator(std::string id, dom::Function fn); @@ -230,7 +230,7 @@ class CorpusImpl final : public Corpus /** Keep a scripting VM alive for the lifetime of this corpus. - A generator registered via `register_generator` may hold only a + A generator registered via `mrdocs.register_generator` may hold only a weak reference to the VM that defined it. The extension binding hands the VM over here so it outlives the extension run and stays usable when the generator is selected. diff --git a/src/lib/Extensions/JsBinding.cpp b/src/lib/Extensions/JsBinding.cpp index 080b9e95cd..d5f6a7ac1e 100644 --- a/src/lib/Extensions/JsBinding.cpp +++ b/src/lib/Extensions/JsBinding.cpp @@ -27,11 +27,11 @@ namespace mrdocs { namespace { -// Bind the `register_transform` and `register_generator` entry points -// before the script runs. A JavaScript function bridges to a -// `dom::Function`: transforms are captured in `transforms` (invoked once -// the script has run), generators are handed to the corpus, which keeps -// them runnable past this VM's lifetime. +// Install the `mrdocs` global carrying the `register_transform` and +// `register_generator` entry points before the script runs. A JavaScript +// function bridges to a `dom::Function`: transforms are captured in +// `transforms` (invoked once the script has run), generators are handed to +// the corpus, which keeps them runnable past this VM's lifetime. void registerJsExtensionApi( js::Scope& scope, @@ -39,7 +39,8 @@ registerJsExtensionApi( dom::Array& transforms, std::size_t& generators) { - scope.setGlobal( + dom::Object api; + api.set( "register_transform", dom::Value(dom::makeVariadicInvocable( [&transforms](dom::Array const& args) @@ -49,7 +50,8 @@ registerJsExtensionApi( if (args.empty() || !args.get(0).isFunction()) { result = Unexpected(Error( - "register_transform: expected a function argument")); + "mrdocs.register_transform: expected a function " + "argument")); } else { @@ -58,7 +60,7 @@ registerJsExtensionApi( return result; }))); - scope.setGlobal( + api.set( "register_generator", dom::Value(dom::makeVariadicInvocable( [&corpus, &generators](dom::Array const& args) @@ -70,7 +72,8 @@ registerJsExtensionApi( !args.get(1).isFunction()) { result = Unexpected(Error( - "register_generator: expected (string id, function)")); + "mrdocs.register_generator: expected (string id, " + "function)")); } else { @@ -81,6 +84,8 @@ registerJsExtensionApi( } return result; }))); + + scope.setGlobal("mrdocs", dom::Value(std::move(api))); } // Invoke one registered transform with the `ctx` object, tagging any @@ -127,7 +132,7 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); - // Running the script is what calls `register_transform`. + // Running the script is what calls `mrdocs.register_transform`. Expected result = scope.script(script); if (!result.has_value()) { @@ -138,7 +143,8 @@ runOneJsExtension(CorpusImpl& corpus, std::string const& scriptPath) else if (transforms.empty() && generators == 0) { // A discovered script that registers nothing is almost always a - // mistake (a misspelled `register_transform` / `register_generator`, + // mistake (a misspelled `mrdocs.register_transform` / + // `mrdocs.register_generator`, // or a guard that skipped it), so flag it rather than silently // doing nothing. report::warn("extension '{}' registered nothing", scriptPath); diff --git a/src/lib/Extensions/JsBinding.hpp b/src/lib/Extensions/JsBinding.hpp index 82eb15b92f..6db5735ea2 100644 --- a/src/lib/Extensions/JsBinding.hpp +++ b/src/lib/Extensions/JsBinding.hpp @@ -22,10 +22,11 @@ class CorpusImpl; /** Run one JavaScript extension script against the corpus. Build a fresh JS context and evaluate the script. The script declares - corpus transforms with `register_transform(fn)` and output generators - with `register_generator(id, fn)`, in either combination. Each transform - is invoked once, in registration order, with a navigable DOM view of the - corpus it can read and mutate in place; each generator is handed to the + corpus transforms with `mrdocs.register_transform(fn)` and output + generators with `mrdocs.register_generator(id, fn)`, in either + combination. Each transform is invoked once, in registration order, + with a navigable DOM view of the corpus it can read and mutate in + place; each generator is handed to the corpus to run later, once one is selected. A script that registers nothing causes a warning and otherwise has no effect, so an empty .js file is tolerated. diff --git a/src/lib/Extensions/LuaBinding.cpp b/src/lib/Extensions/LuaBinding.cpp index 66c79281c7..a32647a3f7 100644 --- a/src/lib/Extensions/LuaBinding.cpp +++ b/src/lib/Extensions/LuaBinding.cpp @@ -48,7 +48,7 @@ struct LuaRegistrations std::size_t generators = 0; }; -// `register_transform(fn)`: anchor `fn` in the registry and record it as +// `mrdocs.register_transform(fn)`: anchor `fn` in the registry and record it as // a callable, so the host can invoke it once the chunk has run. int luaRegisterTransform(lua_State* L) @@ -59,7 +59,7 @@ luaRegisterTransform(lua_State* L) if (lua_type(L, 1) != LUA_TFUNCTION) { result = luaL_error(L, - "register_transform: expected a function argument"); + "mrdocs.register_transform: expected a function argument"); } else { @@ -70,9 +70,10 @@ luaRegisterTransform(lua_State* L) return result; } -// `register_generator(id, fn)`: anchor `fn` in the registry and hand it to -// the corpus under `id`. The corpus keeps it runnable until a generator is -// selected and run, long after this chunk's stack has unwound. +// `mrdocs.register_generator(id, fn)`: anchor `fn` in the registry and +// hand it to the corpus under `id`. The corpus keeps it runnable until a +// generator is selected and run, long after this chunk's stack has +// unwound. int luaRegisterGenerator(lua_State* L) { @@ -82,7 +83,7 @@ luaRegisterGenerator(lua_State* L) if (lua_type(L, 1) != LUA_TSTRING || lua_type(L, 2) != LUA_TFUNCTION) { result = luaL_error(L, - "register_generator: expected (string id, function)"); + "mrdocs.register_generator: expected (string id, function)"); } else { @@ -98,9 +99,9 @@ luaRegisterGenerator(lua_State* L) return result; } -// Bind the `register_transform` and `register_generator` entry points -// before the chunk runs. The shared registrations pointer is carried as -// each closure's single upvalue. +// Install the `mrdocs` global carrying the `register_transform` and +// `register_generator` entry points before the chunk runs. The shared +// registrations pointer is carried as each closure's single upvalue. void registerLuaExtensionApi( lua::Context& ctx, CorpusImpl& corpus, LuaRegistrations& regs) @@ -108,12 +109,14 @@ registerLuaExtensionApi( regs.ctx = &ctx; regs.corpus = &corpus; lua_State* L = static_cast(ctx.nativeState()); + lua_newtable(L); lua_pushlightuserdata(L, ®s); lua_pushcclosure(L, &luaRegisterTransform, 1); - lua_setglobal(L, "register_transform"); + lua_setfield(L, -2, "register_transform"); lua_pushlightuserdata(L, ®s); lua_pushcclosure(L, &luaRegisterGenerator, 1); - lua_setglobal(L, "register_generator"); + lua_setfield(L, -2, "register_generator"); + lua_setglobal(L, "mrdocs"); } // Invoke one registered transform with the `ctx` object, tagging any @@ -158,11 +161,12 @@ runOneLuaExtension(CorpusImpl& corpus, std::string const& scriptPath) MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); // Running the chunk's top-level code is what calls - // `register_transform`; the chunk's own return value is unused. + // `mrdocs.register_transform`; the chunk's own return value is unused. MRDOCS_TRY(chunk.call()); // A discovered script that registers nothing is almost always a - // mistake (a misspelled `register_transform` / `register_generator`, + // mistake (a misspelled `mrdocs.register_transform` / + // `mrdocs.register_generator`, // or a guard that skipped it), so flag it rather than silently doing // nothing. if (regs.transforms.empty() && regs.generators == 0) diff --git a/src/lib/Extensions/LuaBinding.hpp b/src/lib/Extensions/LuaBinding.hpp index a4c2e3c0c6..3f215807e6 100644 --- a/src/lib/Extensions/LuaBinding.hpp +++ b/src/lib/Extensions/LuaBinding.hpp @@ -22,10 +22,11 @@ class CorpusImpl; /** Run one Lua extension script against the corpus. Build a fresh Lua context and evaluate the script. The script declares - corpus transforms with `register_transform(fn)` and output generators - with `register_generator(id, fn)`, in either combination. Each transform - is invoked once, in registration order, with a navigable DOM view of the - corpus it can read and mutate in place; each generator is handed to the + corpus transforms with `mrdocs.register_transform(fn)` and output + generators with `mrdocs.register_generator(id, fn)`, in either + combination. Each transform is invoked once, in registration order, + with a navigable DOM view of the corpus it can read and mutate in + place; each generator is handed to the corpus to run later, once one is selected. A script that registers nothing causes a warning and otherwise has no effect, so an empty .lua file is tolerated. diff --git a/src/lib/Extensions/RunExtensions.hpp b/src/lib/Extensions/RunExtensions.hpp index b63506b36e..bc3c10adbe 100644 --- a/src/lib/Extensions/RunExtensions.hpp +++ b/src/lib/Extensions/RunExtensions.hpp @@ -23,7 +23,7 @@ class CorpusImpl; Extensions are discovered under each addon root's extensions/ directory (the primary addons plus addons-supplemental): a .lua or .js file is an extension. Each script declares corpus transforms by - calling `register_transform(fn)`; every registered function is + calling `mrdocs.register_transform(fn)`; every registered function is invoked once, in registration order, with a navigable DOM view of the corpus. A transform reads the corpus through that view and mutates it by assigning to symbol fields (for example `sym.name = "..."`), which diff --git a/src/lib/Gen/script/ScriptGenerator.hpp b/src/lib/Gen/script/ScriptGenerator.hpp index 5429bb78bc..ea95e1de40 100644 --- a/src/lib/Gen/script/ScriptGenerator.hpp +++ b/src/lib/Gen/script/ScriptGenerator.hpp @@ -21,7 +21,7 @@ namespace mrdocs::script { /** Run a script-defined output generator. - A generator declared with `register_generator(id, fn)` is a + A generator declared with `mrdocs.register_generator(id, fn)` is a `dom::Function` that owns the whole emit. Invoke it with one `ctx` object, mirroring the shape a transform receives: diff --git a/src/lib/Support/JavaScript.cpp b/src/lib/Support/JavaScript.cpp index e7ff819600..fc850c8fa3 100644 --- a/src/lib/Support/JavaScript.cpp +++ b/src/lib/Support/JavaScript.cpp @@ -706,9 +706,10 @@ Context::~Context() // A `dom::Function` obtained from a JS value holds only a weak reference, // so it never keeps the interpreter alive on its own; code that needs such // a function to outlive this `Context` (a corpus that stores a - // `register_generator` function) keeps a `Context` copy alive instead. The - // `DomValueHolder` / `FunctionHolder` objects keep a `shared_ptr`, so - // the interpreter owns them through a cycle; `cleanup()` breaks that cycle + // `mrdocs.register_generator` function) keeps a `Context` copy alive + // instead. The `DomValueHolder` / `FunctionHolder` objects keep a + // `shared_ptr`, so the interpreter owns them through a cycle; + // `cleanup()` breaks that cycle // by tearing down the holders, which is why teardown is explicit here // rather than left to `~Impl`. if (impl_ && @@ -2117,7 +2118,7 @@ toDomValue(jerry_value_t v, std::shared_ptr const& impl) // The function value does not own the interpreter: it captures a weak // reference and locks it on each call. Code that needs the function to // outlive the `Context` that produced it (a corpus that stores a - // `register_generator` function) keeps a `js::Context` alive + // `mrdocs.register_generator` function) keeps a `js::Context` alive // separately; if nothing does, a later call reports an error rather // than using a freed interpreter. return dom::makeVariadicInvocable( diff --git a/src/test/lib/Gen/script/ScriptGenerator.cpp b/src/test/lib/Gen/script/ScriptGenerator.cpp index d54aabe703..1d76c527d9 100644 --- a/src/test/lib/Gen/script/ScriptGenerator.cpp +++ b/src/test/lib/Gen/script/ScriptGenerator.cpp @@ -120,7 +120,7 @@ struct StubCorpus // Load `src`, which must define a global `generate(ctx)`, and return it as // a callable `dom::Function`. The function is self-owning: it anchors the // chunk in the Lua registry and carries a copy of the context, so it -// outlives the local VM here exactly as a `register_generator` function +// outlives the local VM here exactly as a `mrdocs.register_generator` function // outlives the extension that declared it. dom::Function makeLuaGenerator(std::string_view src) @@ -140,11 +140,12 @@ makeLuaGenerator(std::string_view src) return lua::makeCallable(ctx, ref); } -// The JavaScript counterpart of `makeLuaGenerator`. A JS function holds only -// a weak reference to its interpreter, so - exactly as a corpus does for a -// `register_generator` function - the caller must keep the VM alive for as -// long as it intends to call the generator. `JsGenerator::keepAlive` does -// that; dropping it tears the interpreter down. (A Lua callable instead +// The JavaScript counterpart of `makeLuaGenerator`. A JS function holds +// only a weak reference to its interpreter, so - exactly as a corpus does +// for a `mrdocs.register_generator` function - the caller must keep the VM +// alive for as long as it intends to call the generator. +// `JsGenerator::keepAlive` does that; dropping it tears the interpreter +// down. (A Lua callable instead // carries its own VM, so `makeLuaGenerator` needs no such companion.) struct JsGenerator { @@ -379,7 +380,7 @@ end } // - // register_generator: corpus host and the script bindings + // mrdocs.register_generator: corpus host and the script bindings // void @@ -416,7 +417,7 @@ end // A Lua extension that registers a generator leaves it findable on // the corpus by its id, and does not warn about registering nothing. std::string const script = files::appendPath(td.path(), "gen.lua"); - writeFile(script, "register_generator(\"my-gen\", function(ctx) end)\n"); + writeFile(script, "mrdocs.register_generator(\"my-gen\", function(ctx) end)\n"); BOOST_TEST(runOneLuaExtension(corpus, script).has_value()); BOOST_TEST(corpus.findScriptGenerator("my-gen") != nullptr); } @@ -430,7 +431,7 @@ end BOOST_TEST(td); // The JavaScript counterpart. std::string const script = files::appendPath(td.path(), "gen.js"); - writeFile(script, "register_generator(\"my-gen\", function(ctx) {});\n"); + writeFile(script, "mrdocs.register_generator(\"my-gen\", function(ctx) {});\n"); BOOST_TEST(runOneJsExtension(corpus, script).has_value()); BOOST_TEST(corpus.findScriptGenerator("my-gen") != nullptr); } diff --git a/src/tool/GenerateAction.cpp b/src/tool/GenerateAction.cpp index 3897d8a345..ba07174e68 100644 --- a/src/tool/GenerateAction.cpp +++ b/src/tool/GenerateAction.cpp @@ -116,7 +116,7 @@ DoGenerateAction( // GenerateAction does not route per-generator output, so each one // resolves its own (single vs multipage, file vs directory) against // it. For a given id a generator an extension registered on the - // corpus with register_generator wins over a built-in or data-driven + // corpus with mrdocs.register_generator wins over a built-in or data-driven // one: it owns the whole emit through the script runner, while a // registry generator renders the corpus page by page. CorpusImpl const& corpusImpl = static_cast(*corpus); diff --git a/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js index 5f17eb3d34..964e12b87d 100644 --- a/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js +++ b/test-files/golden-tests/extensions/js-register-transform/addons/extensions/transforms.js @@ -1,9 +1,9 @@ -// Declare two corpus transforms with `register_transform`. Both run, in +// Declare two corpus transforms with `mrdocs.register_transform`. Both run, in // registration order, so one extension can contribute several // transforms. The first renames every function; the second rewrites // its brief. Mirrors the lua-register-transform fixture on the JS path. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) { for (var i = 0; i < ctx.corpus.symbols.length; ++i) { @@ -15,7 +15,7 @@ register_transform(function(ctx) } }); -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) { for (var i = 0; i < ctx.corpus.symbols.length; ++i) { diff --git a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js index e831446a96..642e1109b8 100644 --- a/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js +++ b/test-files/golden-tests/extensions/js-set-name/addons/extensions/rename.js @@ -7,7 +7,7 @@ // proxy's `set` trap forwards each assignment into the live C++ // Symbol via reflection. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) { for (var i = 0; i < ctx.corpus.symbols.length; ++i) { diff --git a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua index f02b37ce71..2cf217a97d 100644 --- a/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua +++ b/test-files/golden-tests/extensions/lua-clear-doc/addons/extensions/clear.lua @@ -3,7 +3,7 @@ -- doc-comment so the rendered output contains no doc-comment block -- for it. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.doc = nil diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua index 1f7c89d132..6e153a5493 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/primary/extensions/zzz-primary.lua @@ -5,7 +5,7 @@ -- so this script runs FIRST; its rename is overwritten by the -- supplemental's. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "from_primary" diff --git a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua index f621aca30e..0eb927b0f6 100644 --- a/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua +++ b/test-files/golden-tests/extensions/lua-extension-ordering/addons/supplemental/extensions/aaa-supplemental.lua @@ -3,7 +3,7 @@ -- overwrites the primary root's, so this is the name that must -- appear in the rendered output. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "from_supplemental" diff --git a/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua index 6811e2dd2b..e9fb3b1753 100644 --- a/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua +++ b/test-files/golden-tests/extensions/lua-register-transform/addons/extensions/transforms.lua @@ -1,9 +1,9 @@ --- Declare two corpus transforms with `register_transform`. Both run, in +-- Declare two corpus transforms with `mrdocs.register_transform`. Both run, in -- registration order, so one extension can contribute several -- transforms. The first renames every function; the second rewrites -- its brief. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "renamed_" .. sym.name @@ -11,7 +11,7 @@ register_transform(function(ctx) end end) -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.doc = { diff --git a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua index 44f2c8669a..d3dfc4589d 100644 --- a/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua +++ b/test-files/golden-tests/extensions/lua-set-name/addons/extensions/rename.lua @@ -6,7 +6,7 @@ -- ctx.corpus.symbols is a regular Lua sequence: 1-indexed, with `#` and -- `ipairs`/`pairs` support. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" then sym.name = "renamed_" .. sym.name diff --git a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua index 7d5e5f3d95..2a60ef748a 100644 --- a/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua +++ b/test-files/golden-tests/extensions/lua-set-return-type/addons/extensions/replace_return.lua @@ -26,7 +26,7 @@ -- This fixture omits the lookup and uses a bare identifier so the -- test is self-contained. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" and sym.name == "target_function" then sym.returnType = { diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js index c210347dd9..dda17fb7e8 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.js @@ -7,7 +7,7 @@ // sentence on every declaration. Anything an author already wrote is // preserved: only missing fields are filled in. -register_transform(function(ctx) { +mrdocs.register_transform(function(ctx) { for (var i = 0; i < ctx.corpus.symbols.length; ++i) { var sym = ctx.corpus.symbols[i]; if (sym.kind === "function" && sym.name.indexOf("is_") === 0) { diff --git a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua index 004f7ac651..4b3d76068f 100644 --- a/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua +++ b/test-files/golden-tests/snippets/extensions/brief-from-name/addons/extensions/brief_from_name.lua @@ -7,7 +7,7 @@ -- sentence on every declaration. Anything an author already wrote is -- preserved: only missing fields are filled in. -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, sym in ipairs(ctx.corpus.symbols) do if sym.kind == "function" and sym.name:sub(1, 3) == "is_" then diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js index 95efe15417..19f98a5087 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.js @@ -1,3 +1,3 @@ -register_transform(function(ctx) { +mrdocs.register_transform(function(ctx) { // walk ctx.corpus.symbols, assign to the fields you want to change }); diff --git a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua index f18b6f8a19..51f10acce6 100644 --- a/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua +++ b/test-files/golden-tests/snippets/extensions/entry-point/addons/extensions/noop.lua @@ -1,3 +1,3 @@ -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) -- walk ctx.corpus.symbols, assign to the fields you want to change end) diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js index 2c662af1a6..092b322df2 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.js @@ -15,7 +15,7 @@ function partnerName(name) { return partner; } -register_transform(function(ctx) { +mrdocs.register_transform(function(ctx) { for (var i = 0; i < ctx.corpus.symbols.length; ++i) { var s = ctx.corpus.symbols[i]; if (s.kind === "function") { diff --git a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua index ba308e3ba8..e9c920c159 100644 --- a/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua +++ b/test-files/golden-tests/snippets/extensions/parse-format-relates/addons/extensions/parse_format_relates.lua @@ -15,7 +15,7 @@ local function partnerName(name) return partner end -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) for _, s in ipairs(ctx.corpus.symbols) do if s.kind == "function" then local pname = partnerName(s.name) diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js index 77c14e445a..8627ae29c6 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.js @@ -16,7 +16,7 @@ function listSubclasses(corpus, sym, indent) { } } -register_transform(function(ctx) { +mrdocs.register_transform(function(ctx) { var base = ctx.corpus.lookup("Shape"); if (base) { console.log(base.name); diff --git a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua index 89912a8c21..def95d5e4f 100644 --- a/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua +++ b/test-files/golden-tests/snippets/extensions/subclass-tree/addons/extensions/subclass_tree.lua @@ -16,7 +16,7 @@ local function listSubclasses(corpus, sym, indent) end end -register_transform(function(ctx) +mrdocs.register_transform(function(ctx) local base = ctx.corpus.lookup("Shape") if base then print(base.name)