Skip to content

Commit c8a55d0

Browse files
committed
Merge remote-tracking branch 'origin/30-backend-unified-dispatcher' into staging
2 parents 5bfb3e9 + b0f6eb2 commit c8a55d0

5 files changed

Lines changed: 504 additions & 25 deletions

File tree

src/h5_attr_reader.hpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/* Copyright (c) 2018-2026 Steven Varga, steven@vargalabs.com Toronto, ON Canada */
2+
3+
#pragma once
4+
5+
// Issue #32 — shared attribute readers for h5::* annotations.
6+
//
7+
// The user-facing rewriter (src/h5_attr_translator.hpp) lowers every
8+
// [[h5::xxx(args)]] into a [[clang::annotate("h5::xxx", args)]] before
9+
// Clang sees the source, so the AST consumer sees a uniform shape:
10+
// a `clang::AnnotateAttr` whose first string is "h5::<kind>" and whose
11+
// trailing args are clang::Expr* nodes.
12+
13+
#include <clang/AST/Attr.h>
14+
#include <clang/AST/ASTContext.h>
15+
#include <clang/AST/Decl.h>
16+
#include <clang/AST/DeclCXX.h>
17+
#include <clang/AST/Expr.h>
18+
#include <llvm/ADT/SmallVector.h>
19+
#include <llvm/ADT/StringRef.h>
20+
#include <llvm/Support/Casting.h>
21+
#include <llvm/Support/raw_ostream.h>
22+
23+
#include <cstdint>
24+
#include <optional>
25+
#include <string>
26+
#include <vector>
27+
28+
namespace h5_attr_reader {
29+
30+
inline const clang::AnnotateAttr*
31+
find_annotate(const clang::Decl* d, llvm::StringRef kind) {
32+
if (!d) return nullptr;
33+
for (const clang::Attr* attr : d->attrs()) {
34+
const auto* ann = llvm::dyn_cast<clang::AnnotateAttr>(attr);
35+
if (!ann) continue;
36+
if (ann->getAnnotation() == kind) return ann;
37+
}
38+
return nullptr;
39+
}
40+
41+
inline bool has_attr(const clang::Decl* d, llvm::StringRef kind) {
42+
return find_annotate(d, kind) != nullptr;
43+
}
44+
45+
inline std::string read_string_arg(const clang::AnnotateAttr* ann) {
46+
if (!ann) return {};
47+
for (const clang::Expr* arg : ann->args()) {
48+
if (const auto* sl =
49+
llvm::dyn_cast<clang::StringLiteral>(arg->IgnoreParenImpCasts())) {
50+
return sl->getString().str();
51+
}
52+
}
53+
return {};
54+
}
55+
56+
inline std::vector<std::string>
57+
read_string_args(const clang::AnnotateAttr* ann) {
58+
std::vector<std::string> out;
59+
if (!ann) return out;
60+
for (const clang::Expr* arg : ann->args()) {
61+
if (const auto* sl =
62+
llvm::dyn_cast<clang::StringLiteral>(arg->IgnoreParenImpCasts())) {
63+
out.push_back(sl->getString().str());
64+
}
65+
}
66+
return out;
67+
}
68+
69+
inline std::vector<std::uint32_t>
70+
read_int_args(const clang::AnnotateAttr* ann, clang::ASTContext& ctx) {
71+
std::vector<std::uint32_t> out;
72+
if (!ann) return out;
73+
for (const clang::Expr* arg : ann->args()) {
74+
clang::Expr::EvalResult res;
75+
if (arg->EvaluateAsInt(res, ctx)) {
76+
out.push_back(static_cast<std::uint32_t>(res.Val.getInt().getZExtValue()));
77+
}
78+
}
79+
return out;
80+
}
81+
82+
inline std::optional<std::uint32_t>
83+
read_int_arg(const clang::AnnotateAttr* ann, clang::ASTContext& ctx) {
84+
auto v = read_int_args(ann, ctx);
85+
if (v.empty()) return std::nullopt;
86+
return v.front();
87+
}
88+
89+
inline std::optional<std::string>
90+
read_first_arg_text(const clang::AnnotateAttr* ann, clang::ASTContext& ctx) {
91+
if (!ann || ann->args().empty()) return std::nullopt;
92+
const clang::Expr* arg = ann->args().begin()[0];
93+
std::string out;
94+
llvm::raw_string_ostream os(out);
95+
arg->printPretty(os, nullptr, clang::PrintingPolicy{ctx.getLangOpts()});
96+
return out;
97+
}
98+
99+
// Convenience wrappers
100+
101+
inline std::string
102+
read_field_string(const clang::FieldDecl* fld, llvm::StringRef kind) {
103+
return read_string_arg(find_annotate(fld, kind));
104+
}
105+
106+
inline std::string
107+
read_class_string(const clang::CXXRecordDecl* rec, llvm::StringRef kind) {
108+
return read_string_arg(find_annotate(rec, kind));
109+
}
110+
111+
inline std::vector<std::uint32_t>
112+
read_field_ints(const clang::FieldDecl* fld, llvm::StringRef kind) {
113+
return read_int_args(find_annotate(fld, kind), fld->getASTContext());
114+
}
115+
116+
inline std::vector<std::uint32_t>
117+
read_class_ints(const clang::CXXRecordDecl* rec, llvm::StringRef kind) {
118+
return read_int_args(find_annotate(rec, kind), rec->getASTContext());
119+
}
120+
121+
inline std::optional<std::uint32_t>
122+
read_class_int(const clang::CXXRecordDecl* rec, llvm::StringRef kind) {
123+
return read_int_arg(find_annotate(rec, kind), rec->getASTContext());
124+
}
125+
126+
} // namespace h5_attr_reader

src/h5_attr_translator.hpp

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
/* Copyright (c) 2018-2026 Steven Varga, steven@vargalabs.com Toronto, ON Canada */
2+
3+
#pragma once
4+
5+
// Issue #32 — source-level translator for the clean `[[h5::xxx(...)]]`
6+
// attribute syntax.
7+
//
8+
// Clang 20's standard-attribute parser drops the argument list for plugin-
9+
// registered namespace-scoped attributes when they use C++11 `[[ns::name(...)]]`
10+
// syntax. To deliver the clean user-facing surface, h5cpp-compiler rewrites the
11+
// source before handing it to Clang Tooling:
12+
//
13+
// [[h5::name("x")]] → [[clang::annotate("h5::name", "x")]]
14+
// [[h5::ignore]] → [[clang::annotate("h5::ignore")]]
15+
// [[h5::chunk(1024)]] → [[clang::annotate("h5::chunk", 1024)]]
16+
// [[h5::compress(gzip, 6)]]
17+
// → [[clang::annotate("h5::compress", gzip, 6)]]
18+
//
19+
// The rewritten source goes through Clang as a standard `clang::annotate`
20+
// annotation. The user only ever sees the clean syntax; the wrapper is internal.
21+
22+
#include <clang/Tooling/Tooling.h>
23+
#include <llvm/ADT/SmallVector.h>
24+
#include <llvm/ADT/StringRef.h>
25+
#include <llvm/Support/MemoryBuffer.h>
26+
27+
#include <fstream>
28+
#include <sstream>
29+
#include <string>
30+
#include <vector>
31+
32+
namespace h5_attr_translator {
33+
34+
// Identifiers we recognize after `h5::` inside an attribute. Anything else
35+
// is left verbatim so user code can carry unrelated attributes alongside.
36+
inline bool is_h5_attr_name(llvm::StringRef name) {
37+
return name == "name" || name == "ignore" || name == "chunk"
38+
|| name == "compress" || name == "doc" || name == "on_missing"
39+
|| name == "alias" || name == "version" || name == "name_all"
40+
|| name == "serialize_full";
41+
}
42+
43+
// Skip whitespace and comments at position `i` in `src`. Returns the new
44+
// position. Comments are emitted to `out` verbatim.
45+
inline std::size_t skip_ws(llvm::StringRef src, std::size_t i, std::string& out) {
46+
while (i < src.size()) {
47+
char c = src[i];
48+
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
49+
out.push_back(c); ++i;
50+
} else if (c == '/' && i + 1 < src.size() && src[i+1] == '/') {
51+
while (i < src.size() && src[i] != '\n') { out.push_back(src[i]); ++i; }
52+
} else if (c == '/' && i + 1 < src.size() && src[i+1] == '*') {
53+
out.push_back(src[i++]); out.push_back(src[i++]);
54+
while (i + 1 < src.size() && !(src[i] == '*' && src[i+1] == '/')) {
55+
out.push_back(src[i++]);
56+
}
57+
if (i + 1 < src.size()) { out.push_back(src[i++]); out.push_back(src[i++]); }
58+
} else {
59+
break;
60+
}
61+
}
62+
return i;
63+
}
64+
65+
// Find the position of the closing `)` matching the `(` at `start`. Returns
66+
// `start` on failure. Respects nested parens and string/char literals.
67+
inline std::size_t find_matching_paren(llvm::StringRef src, std::size_t start) {
68+
if (start >= src.size() || src[start] != '(') return start;
69+
int depth = 1;
70+
std::size_t i = start + 1;
71+
while (i < src.size() && depth > 0) {
72+
char c = src[i];
73+
if (c == '(') ++depth;
74+
else if (c == ')') --depth;
75+
else if (c == '"' || c == '\'') {
76+
char q = c;
77+
++i;
78+
while (i < src.size() && src[i] != q) {
79+
if (src[i] == '\\' && i + 1 < src.size()) ++i;
80+
++i;
81+
}
82+
}
83+
if (depth == 0) return i;
84+
++i;
85+
}
86+
return start;
87+
}
88+
89+
// Find the matching `]]` for the `[[` at `start`. Returns `start` on failure.
90+
inline std::size_t find_attr_close(llvm::StringRef src, std::size_t start) {
91+
if (start + 1 >= src.size() || src[start] != '[' || src[start+1] != '[') return start;
92+
std::size_t i = start + 2;
93+
int paren_depth = 0;
94+
while (i + 1 < src.size()) {
95+
char c = src[i];
96+
if (c == '(' || c == '[' || c == '{') ++paren_depth;
97+
else if (c == ')' || c == '}') --paren_depth;
98+
else if (c == ']') {
99+
if (paren_depth == 0 && src[i+1] == ']') return i;
100+
--paren_depth;
101+
}
102+
else if (c == '"' || c == '\'') {
103+
char q = c;
104+
++i;
105+
while (i < src.size() && src[i] != q) {
106+
if (src[i] == '\\' && i + 1 < src.size()) ++i;
107+
++i;
108+
}
109+
}
110+
++i;
111+
}
112+
return start;
113+
}
114+
115+
// Split an attribute block's contents by top-level commas.
116+
inline std::vector<llvm::StringRef> split_attrs(llvm::StringRef block) {
117+
std::vector<llvm::StringRef> out;
118+
int depth = 0;
119+
std::size_t start = 0;
120+
for (std::size_t i = 0; i < block.size(); ++i) {
121+
char c = block[i];
122+
if (c == '(' || c == '[' || c == '{') ++depth;
123+
else if (c == ')' || c == ']' || c == '}') --depth;
124+
else if (c == '"' || c == '\'') {
125+
char q = c;
126+
++i;
127+
while (i < block.size() && block[i] != q) {
128+
if (block[i] == '\\' && i + 1 < block.size()) ++i;
129+
++i;
130+
}
131+
}
132+
else if (c == ',' && depth == 0) {
133+
out.push_back(block.substr(start, i - start));
134+
start = i + 1;
135+
}
136+
}
137+
out.push_back(block.substr(start));
138+
return out;
139+
}
140+
141+
// Rewrite one attribute spec. If it starts with `h5::<recognized-name>`,
142+
// convert to `clang::annotate("h5::<name>", <args>)`. Otherwise leave verbatim.
143+
inline std::string rewrite_one_attr(llvm::StringRef spec) {
144+
std::size_t start = 0;
145+
while (start < spec.size() && std::isspace(static_cast<unsigned char>(spec[start]))) ++start;
146+
llvm::StringRef leading = spec.substr(0, start);
147+
llvm::StringRef body = spec.substr(start);
148+
149+
constexpr llvm::StringRef ns = "h5::";
150+
if (!body.starts_with(ns)) return spec.str();
151+
152+
body = body.drop_front(ns.size());
153+
std::size_t i = 0;
154+
while (i < body.size()
155+
&& (std::isalnum(static_cast<unsigned char>(body[i])) || body[i] == '_')) ++i;
156+
if (i == 0) return spec.str();
157+
llvm::StringRef name = body.substr(0, i);
158+
if (!is_h5_attr_name(name)) return spec.str();
159+
160+
while (i < body.size() && std::isspace(static_cast<unsigned char>(body[i]))) ++i;
161+
162+
std::string out;
163+
out.append(leading.str());
164+
if (i >= body.size() || body[i] != '(') {
165+
out.append("clang::annotate(\"h5::");
166+
out.append(name.str());
167+
out.append("\")");
168+
out.append(body.substr(i).str());
169+
return out;
170+
}
171+
std::size_t paren_end = find_matching_paren(body, i);
172+
if (paren_end == i) return spec.str();
173+
llvm::StringRef args_inside = body.substr(i + 1, paren_end - i - 1);
174+
out.append("clang::annotate(\"h5::");
175+
out.append(name.str());
176+
out.append("\"");
177+
if (!args_inside.trim().empty()) {
178+
out.append(", ");
179+
out.append(args_inside.str());
180+
}
181+
out.append(")");
182+
if (paren_end + 1 < body.size()) {
183+
out.append(body.substr(paren_end + 1).str());
184+
}
185+
return out;
186+
}
187+
188+
// Whole-file rewrite. Respects // and /* */ comments and string/char literals.
189+
inline std::string rewrite(llvm::StringRef src) {
190+
std::string out;
191+
out.reserve(src.size() + src.size() / 16);
192+
std::size_t i = 0, n = src.size();
193+
while (i < n) {
194+
char c = src[i];
195+
if (c == '/' && i + 1 < n && src[i+1] == '/') {
196+
while (i < n && src[i] != '\n') out.push_back(src[i++]);
197+
continue;
198+
}
199+
if (c == '/' && i + 1 < n && src[i+1] == '*') {
200+
out.push_back(src[i++]); out.push_back(src[i++]);
201+
while (i + 1 < n && !(src[i] == '*' && src[i+1] == '/')) out.push_back(src[i++]);
202+
if (i + 1 < n) { out.push_back(src[i++]); out.push_back(src[i++]); }
203+
continue;
204+
}
205+
if (c == '"' || c == '\'') {
206+
char q = c;
207+
out.push_back(src[i++]);
208+
while (i < n && src[i] != q) {
209+
if (src[i] == '\\' && i + 1 < n) {
210+
out.push_back(src[i++]);
211+
out.push_back(src[i++]);
212+
continue;
213+
}
214+
out.push_back(src[i++]);
215+
}
216+
if (i < n) out.push_back(src[i++]);
217+
continue;
218+
}
219+
if (c == '[' && i + 1 < n && src[i+1] == '[') {
220+
std::size_t close = find_attr_close(src, i);
221+
if (close == i) {
222+
out.push_back(src[i++]);
223+
continue;
224+
}
225+
llvm::StringRef block = src.substr(i + 2, close - i - 2);
226+
if (block.contains("h5::")) {
227+
auto attrs = split_attrs(block);
228+
out.append("[[");
229+
for (std::size_t k = 0; k < attrs.size(); ++k) {
230+
if (k) out.append(",");
231+
out.append(rewrite_one_attr(attrs[k]));
232+
}
233+
out.append("]]");
234+
} else {
235+
out.append(src.substr(i, close + 2 - i).str());
236+
}
237+
i = close + 2;
238+
continue;
239+
}
240+
out.push_back(src[i++]);
241+
}
242+
return out;
243+
}
244+
245+
inline std::string read_file(const std::string& path) {
246+
std::ifstream f(path);
247+
if (!f) return {};
248+
std::stringstream ss; ss << f.rdbuf();
249+
return ss.str();
250+
}
251+
252+
inline void install_virtual_files(clang::tooling::ClangTool& Tool,
253+
const std::vector<std::string>& paths,
254+
std::vector<std::string>& storage) {
255+
storage.reserve(paths.size());
256+
for (const auto& p : paths) {
257+
std::string content = read_file(p);
258+
if (content.empty()) continue;
259+
if (content.find("h5::") == std::string::npos) continue;
260+
storage.push_back(rewrite(content));
261+
Tool.mapVirtualFile(p, storage.back());
262+
}
263+
}
264+
265+
} // namespace h5_attr_translator

0 commit comments

Comments
 (0)