Skip to content

Commit d325c7a

Browse files
committed
feat: support script-driven generators
This adds a generator flavor backed by a user script. A directory under <addon>/generator/<name>/ whose mrdocs-generator.yml names a script entry installs a generator that hands the whole emit to a Lua or JavaScript `generate(corpus, output)` function: the script walks the corpus and writes files through the output object, so it can produce output shapes a per-page generator cannot, such as a single artifact aggregated across every symbol. The manifest parser moves into a shared `GeneratorManifest`, so the data-driven and script-driven discovery passes read the same file. A manifest that names a script is skipped by the data-driven pass and installed by the script pass. The output object exposes a single write method, resolved under the output directory and forbidden from escaping it. Both languages receive it as the second argument to generate; on the Lua side it is also bound as a global and passed from there, because the Lua bridge cannot carry a callable as a plain value.
1 parent bd1d237 commit d325c7a

13 files changed

Lines changed: 1022 additions & 156 deletions

src/lib/Gen/GeneratorManifest.cpp

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
//
2+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com)
7+
//
8+
// Official repository: https://github.com/cppalliance/mrdocs
9+
//
10+
11+
#include "GeneratorManifest.hpp"
12+
#include <mrdocs/Support/Path.hpp>
13+
#include <llvm/ADT/SmallString.h>
14+
#include <llvm/Support/Casting.h>
15+
#include <llvm/Support/SourceMgr.h>
16+
#include <llvm/Support/YAMLParser.h>
17+
#include <filesystem>
18+
19+
namespace mrdocs {
20+
21+
namespace {
22+
23+
// Read a scalar node into an owned string.
24+
std::string
25+
scalarText(llvm::yaml::ScalarNode& node)
26+
{
27+
llvm::SmallString<32> buf;
28+
llvm::StringRef const text = node.getValue(buf);
29+
return std::string(text.data(), text.size());
30+
}
31+
32+
// Parse a YAML mapping whose entries are non-empty byte-sequence keys
33+
// mapped to replacement strings. An empty key is a hard error.
34+
Expected<void>
35+
parseEscape(
36+
llvm::yaml::MappingNode& node,
37+
GeneratorManifest& manifest,
38+
std::string_view yamlPath)
39+
{
40+
for (llvm::yaml::KeyValueNode& entry : node)
41+
{
42+
llvm::yaml::ScalarNode* const keyNode =
43+
llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(entry.getKey());
44+
llvm::yaml::ScalarNode* const valNode =
45+
llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(entry.getValue());
46+
if (!keyNode || !valNode)
47+
{
48+
return Unexpected(formatError(
49+
"{}: each 'escape' entry must be a scalar->scalar mapping",
50+
yamlPath));
51+
}
52+
std::string key = scalarText(*keyNode);
53+
if (key.empty())
54+
{
55+
return Unexpected(formatError(
56+
"{}: escape key must not be empty", yamlPath));
57+
}
58+
manifest.escape.emplace_back(
59+
std::move(key), scalarText(*valNode));
60+
}
61+
return {};
62+
}
63+
64+
// Dispatch a single top-level manifest key to its handler. Unknown keys
65+
// are ignored so future schema additions stay non-breaking.
66+
Expected<void>
67+
parseTopLevelEntry(
68+
llvm::yaml::KeyValueNode& pair,
69+
GeneratorManifest& manifest,
70+
std::string_view yamlPath)
71+
{
72+
llvm::yaml::ScalarNode* const keyNode =
73+
llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(pair.getKey());
74+
if (!keyNode)
75+
{
76+
return {};
77+
}
78+
llvm::SmallString<16> keyBuf;
79+
llvm::StringRef const key = keyNode->getValue(keyBuf);
80+
if (key == "escape")
81+
{
82+
llvm::yaml::MappingNode* const escNode =
83+
llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(pair.getValue());
84+
if (!escNode)
85+
{
86+
return Unexpected(formatError(
87+
"{}: 'escape' must be a mapping", yamlPath));
88+
}
89+
return parseEscape(*escNode, manifest, yamlPath);
90+
}
91+
if (key == "script")
92+
{
93+
llvm::yaml::ScalarNode* const valNode =
94+
llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(pair.getValue());
95+
if (!valNode)
96+
{
97+
return Unexpected(formatError(
98+
"{}: 'script' must be a scalar", yamlPath));
99+
}
100+
manifest.script = scalarText(*valNode);
101+
}
102+
return {};
103+
}
104+
105+
} // (anon)
106+
107+
Expected<GeneratorManifest>
108+
loadGeneratorManifest(std::string_view yamlPath)
109+
{
110+
MRDOCS_TRY(std::string text, files::getFileText(yamlPath));
111+
llvm::SourceMgr sm;
112+
llvm::yaml::Stream stream(text, sm);
113+
114+
GeneratorManifest manifest;
115+
llvm::yaml::document_iterator docIt = stream.begin();
116+
if (docIt == stream.end())
117+
{
118+
return manifest;
119+
}
120+
llvm::yaml::Node* const rootNode = docIt->getRoot();
121+
if (rootNode == nullptr ||
122+
llvm::isa<llvm::yaml::NullNode>(rootNode))
123+
{
124+
// Empty document: a file with no content, only comments, or a
125+
// literal `null`. All of these mean "no rules".
126+
return manifest;
127+
}
128+
llvm::yaml::MappingNode* const root =
129+
llvm::dyn_cast<llvm::yaml::MappingNode>(rootNode);
130+
if (!root)
131+
{
132+
return Unexpected(formatError(
133+
"{}: top-level YAML node must be a mapping", yamlPath));
134+
}
135+
for (llvm::yaml::KeyValueNode& pair : *root)
136+
{
137+
MRDOCS_TRY(parseTopLevelEntry(pair, manifest, yamlPath));
138+
}
139+
return manifest;
140+
}
141+
142+
namespace {
143+
144+
constexpr std::string_view metadataFileName = "mrdocs-generator.yml";
145+
146+
// Append every manifested subdirectory of `generatorDir` to `out`.
147+
Expected<void>
148+
scanGeneratorDir(
149+
std::string_view generatorDir,
150+
std::vector<DiscoveredManifest>& out)
151+
{
152+
namespace fs = std::filesystem;
153+
std::error_code iterEc;
154+
fs::directory_iterator const end{};
155+
for (fs::directory_iterator it(generatorDir, iterEc);
156+
!iterEc && it != end;
157+
it.increment(iterEc))
158+
{
159+
std::error_code typeEc;
160+
if (!it->is_directory(typeEc))
161+
{
162+
continue;
163+
}
164+
std::string const dir = it->path().string();
165+
std::string const yamlPath = files::appendPath(
166+
dir, std::string(metadataFileName));
167+
if (!files::exists(yamlPath))
168+
{
169+
continue;
170+
}
171+
MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorManifest(yamlPath));
172+
out.push_back(DiscoveredManifest{ dir, std::move(manifest) });
173+
}
174+
return {};
175+
}
176+
177+
} // (anon)
178+
179+
Expected<std::vector<DiscoveredManifest>>
180+
discoverGeneratorManifests(std::vector<std::string> const& roots)
181+
{
182+
std::vector<DiscoveredManifest> out;
183+
for (std::string const& root : roots)
184+
{
185+
std::string const dir = files::appendPath(root, "generator");
186+
if (!files::exists(dir))
187+
{
188+
continue;
189+
}
190+
MRDOCS_TRY(scanGeneratorDir(dir, out));
191+
}
192+
return out;
193+
}
194+
195+
} // mrdocs

src/lib/Gen/GeneratorManifest.hpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
//
2+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com)
7+
//
8+
// Official repository: https://github.com/cppalliance/mrdocs
9+
//
10+
11+
#ifndef MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP
12+
#define MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP
13+
14+
#include <mrdocs/Support/Error.hpp>
15+
#include <mrdocs/Support/Expected.hpp>
16+
#include <optional>
17+
#include <string>
18+
#include <string_view>
19+
#include <utility>
20+
#include <vector>
21+
22+
namespace mrdocs {
23+
24+
/** The parsed contents of a generator manifest.
25+
26+
A manifest is the `mrdocs-generator.yml` that an addon directory
27+
under <root>/generator/<name>/ ships to declare a generator. The two
28+
generator flavors read disjoint fields of the same file:
29+
30+
@li A data-driven (Handlebars) generator reads the escape rules.
31+
32+
@li A script-driven generator reads the entry-file path.
33+
34+
The presence of the `script` entry is what distinguishes the two: a
35+
manifest that names a `script` is a script-driven generator,
36+
otherwise it is data-driven.
37+
*/
38+
struct GeneratorManifest
39+
{
40+
/** The entry file of a script-driven generator.
41+
42+
Holds the value of the manifest's optional `script` key, a path
43+
relative to the generator directory. Empty when the manifest
44+
declares no `script`, in which case the directory is a
45+
data-driven generator.
46+
*/
47+
std::optional<std::string> script;
48+
49+
/** The escape rules of a data-driven generator.
50+
51+
Each pair maps a byte-sequence source to its replacement string,
52+
in manifest order. Empty when no escape rules are declared.
53+
*/
54+
std::vector<std::pair<std::string, std::string>> escape;
55+
};
56+
57+
/** Parse a generator manifest into plain data.
58+
59+
Read the file at `yamlPath` and return its contents. The file is
60+
expected to contain a top-level mapping. The optional `escape` key
61+
holds a sub-mapping from byte-sequence keys to replacement strings;
62+
keys may be one or more bytes long, and an empty key is a hard error.
63+
The optional `script` key holds the entry-file path as a scalar.
64+
Unknown top-level keys are ignored so future schema additions are
65+
non-breaking.
66+
67+
An empty document (an empty file, comments only, or a literal `null`)
68+
yields an empty manifest.
69+
*/
70+
Expected<GeneratorManifest>
71+
loadGeneratorManifest(std::string_view yamlPath);
72+
73+
/** A generator directory paired with its parsed manifest.
74+
*/
75+
struct DiscoveredManifest
76+
{
77+
/** The generator directory, of the form <root>/generator/<name>.
78+
*/
79+
std::string dir;
80+
81+
/** The parsed contents of the directory's manifest.
82+
*/
83+
GeneratorManifest manifest;
84+
};
85+
86+
/** Find every addon generator directory that ships a manifest.
87+
88+
For each addon root, walk the immediate subdirectories of
89+
<root>/generator/. A subdirectory is reported when it ships an
90+
`mrdocs-generator.yml`; the manifest is parsed and returned alongside
91+
its directory. Directories without a manifest (the built-in shared
92+
common/ is the canonical example) are skipped.
93+
94+
The presence of a `script` entry distinguishes the two generator
95+
flavors, so a caller installs the flavor it owns and ignores the
96+
other. Roots are searched in order, so the result preserves addon
97+
precedence.
98+
*/
99+
Expected<std::vector<DiscoveredManifest>>
100+
discoverGeneratorManifests(std::vector<std::string> const& roots);
101+
102+
} // mrdocs
103+
104+
#endif

0 commit comments

Comments
 (0)