|
| 1 | +/* Copyright (c) 2018-2026 Steven Varga, steven@vargalabs.com Toronto, ON Canada */ |
| 2 | + |
| 3 | +#pragma once |
| 4 | + |
| 5 | +// Issue #32 — source-level translator for the clean `[[h5::xxx(...)]]` |
| 6 | +// attribute syntax. |
| 7 | +// |
| 8 | +// Clang 20's standard-attribute parser drops the argument list for plugin- |
| 9 | +// registered namespace-scoped attributes when they use C++11 `[[ns::name(...)]]` |
| 10 | +// syntax. To deliver the clean user-facing surface, h5cpp-compiler rewrites the |
| 11 | +// source before handing it to Clang Tooling: |
| 12 | +// |
| 13 | +// [[h5::name("x")]] → [[clang::annotate("h5::name", "x")]] |
| 14 | +// [[h5::ignore]] → [[clang::annotate("h5::ignore")]] |
| 15 | +// [[h5::chunk(1024)]] → [[clang::annotate("h5::chunk", 1024)]] |
| 16 | +// [[h5::compress(gzip, 6)]] |
| 17 | +// → [[clang::annotate("h5::compress", gzip, 6)]] |
| 18 | +// |
| 19 | +// The rewritten source goes through Clang as a standard `clang::annotate` |
| 20 | +// annotation. The user only ever sees the clean syntax; the wrapper is internal. |
| 21 | + |
| 22 | +#include <clang/Tooling/Tooling.h> |
| 23 | +#include <llvm/ADT/SmallVector.h> |
| 24 | +#include <llvm/ADT/StringRef.h> |
| 25 | +#include <llvm/Support/MemoryBuffer.h> |
| 26 | + |
| 27 | +#include <fstream> |
| 28 | +#include <sstream> |
| 29 | +#include <string> |
| 30 | +#include <vector> |
| 31 | + |
| 32 | +namespace h5_attr_translator { |
| 33 | + |
| 34 | +// Identifiers we recognize after `h5::` inside an attribute. Anything else |
| 35 | +// is left verbatim so user code can carry unrelated attributes alongside. |
| 36 | +inline bool is_h5_attr_name(llvm::StringRef name) { |
| 37 | + return name == "name" || name == "ignore" || name == "chunk" |
| 38 | + || name == "compress" || name == "doc" || name == "on_missing" |
| 39 | + || name == "alias" || name == "version" || name == "name_all" |
| 40 | + || name == "serialize_full"; |
| 41 | +} |
| 42 | + |
| 43 | +// Skip whitespace and comments at position `i` in `src`. Returns the new |
| 44 | +// position. Comments are emitted to `out` verbatim. |
| 45 | +inline std::size_t skip_ws(llvm::StringRef src, std::size_t i, std::string& out) { |
| 46 | + while (i < src.size()) { |
| 47 | + char c = src[i]; |
| 48 | + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
| 49 | + out.push_back(c); ++i; |
| 50 | + } else if (c == '/' && i + 1 < src.size() && src[i+1] == '/') { |
| 51 | + while (i < src.size() && src[i] != '\n') { out.push_back(src[i]); ++i; } |
| 52 | + } else if (c == '/' && i + 1 < src.size() && src[i+1] == '*') { |
| 53 | + out.push_back(src[i++]); out.push_back(src[i++]); |
| 54 | + while (i + 1 < src.size() && !(src[i] == '*' && src[i+1] == '/')) { |
| 55 | + out.push_back(src[i++]); |
| 56 | + } |
| 57 | + if (i + 1 < src.size()) { out.push_back(src[i++]); out.push_back(src[i++]); } |
| 58 | + } else { |
| 59 | + break; |
| 60 | + } |
| 61 | + } |
| 62 | + return i; |
| 63 | +} |
| 64 | + |
| 65 | +// Find the position of the closing `)` matching the `(` at `start`. Returns |
| 66 | +// `start` on failure. Respects nested parens and string/char literals. |
| 67 | +inline std::size_t find_matching_paren(llvm::StringRef src, std::size_t start) { |
| 68 | + if (start >= src.size() || src[start] != '(') return start; |
| 69 | + int depth = 1; |
| 70 | + std::size_t i = start + 1; |
| 71 | + while (i < src.size() && depth > 0) { |
| 72 | + char c = src[i]; |
| 73 | + if (c == '(') ++depth; |
| 74 | + else if (c == ')') --depth; |
| 75 | + else if (c == '"' || c == '\'') { |
| 76 | + char q = c; |
| 77 | + ++i; |
| 78 | + while (i < src.size() && src[i] != q) { |
| 79 | + if (src[i] == '\\' && i + 1 < src.size()) ++i; |
| 80 | + ++i; |
| 81 | + } |
| 82 | + } |
| 83 | + if (depth == 0) return i; |
| 84 | + ++i; |
| 85 | + } |
| 86 | + return start; |
| 87 | +} |
| 88 | + |
| 89 | +// Find the matching `]]` for the `[[` at `start`. Returns `start` on failure. |
| 90 | +inline std::size_t find_attr_close(llvm::StringRef src, std::size_t start) { |
| 91 | + if (start + 1 >= src.size() || src[start] != '[' || src[start+1] != '[') return start; |
| 92 | + std::size_t i = start + 2; |
| 93 | + int paren_depth = 0; |
| 94 | + while (i + 1 < src.size()) { |
| 95 | + char c = src[i]; |
| 96 | + if (c == '(' || c == '[' || c == '{') ++paren_depth; |
| 97 | + else if (c == ')' || c == '}') --paren_depth; |
| 98 | + else if (c == ']') { |
| 99 | + if (paren_depth == 0 && src[i+1] == ']') return i; |
| 100 | + --paren_depth; |
| 101 | + } |
| 102 | + else if (c == '"' || c == '\'') { |
| 103 | + char q = c; |
| 104 | + ++i; |
| 105 | + while (i < src.size() && src[i] != q) { |
| 106 | + if (src[i] == '\\' && i + 1 < src.size()) ++i; |
| 107 | + ++i; |
| 108 | + } |
| 109 | + } |
| 110 | + ++i; |
| 111 | + } |
| 112 | + return start; |
| 113 | +} |
| 114 | + |
| 115 | +// Split an attribute block's contents by top-level commas. |
| 116 | +inline std::vector<llvm::StringRef> split_attrs(llvm::StringRef block) { |
| 117 | + std::vector<llvm::StringRef> out; |
| 118 | + int depth = 0; |
| 119 | + std::size_t start = 0; |
| 120 | + for (std::size_t i = 0; i < block.size(); ++i) { |
| 121 | + char c = block[i]; |
| 122 | + if (c == '(' || c == '[' || c == '{') ++depth; |
| 123 | + else if (c == ')' || c == ']' || c == '}') --depth; |
| 124 | + else if (c == '"' || c == '\'') { |
| 125 | + char q = c; |
| 126 | + ++i; |
| 127 | + while (i < block.size() && block[i] != q) { |
| 128 | + if (block[i] == '\\' && i + 1 < block.size()) ++i; |
| 129 | + ++i; |
| 130 | + } |
| 131 | + } |
| 132 | + else if (c == ',' && depth == 0) { |
| 133 | + out.push_back(block.substr(start, i - start)); |
| 134 | + start = i + 1; |
| 135 | + } |
| 136 | + } |
| 137 | + out.push_back(block.substr(start)); |
| 138 | + return out; |
| 139 | +} |
| 140 | + |
| 141 | +// Rewrite one attribute spec. If it starts with `h5::<recognized-name>`, |
| 142 | +// convert to `clang::annotate("h5::<name>", <args>)`. Otherwise leave verbatim. |
| 143 | +inline std::string rewrite_one_attr(llvm::StringRef spec) { |
| 144 | + std::size_t start = 0; |
| 145 | + while (start < spec.size() && std::isspace(static_cast<unsigned char>(spec[start]))) ++start; |
| 146 | + llvm::StringRef leading = spec.substr(0, start); |
| 147 | + llvm::StringRef body = spec.substr(start); |
| 148 | + |
| 149 | + constexpr llvm::StringRef ns = "h5::"; |
| 150 | + if (!body.starts_with(ns)) return spec.str(); |
| 151 | + |
| 152 | + body = body.drop_front(ns.size()); |
| 153 | + std::size_t i = 0; |
| 154 | + while (i < body.size() |
| 155 | + && (std::isalnum(static_cast<unsigned char>(body[i])) || body[i] == '_')) ++i; |
| 156 | + if (i == 0) return spec.str(); |
| 157 | + llvm::StringRef name = body.substr(0, i); |
| 158 | + if (!is_h5_attr_name(name)) return spec.str(); |
| 159 | + |
| 160 | + while (i < body.size() && std::isspace(static_cast<unsigned char>(body[i]))) ++i; |
| 161 | + |
| 162 | + std::string out; |
| 163 | + out.append(leading.str()); |
| 164 | + if (i >= body.size() || body[i] != '(') { |
| 165 | + out.append("clang::annotate(\"h5::"); |
| 166 | + out.append(name.str()); |
| 167 | + out.append("\")"); |
| 168 | + out.append(body.substr(i).str()); |
| 169 | + return out; |
| 170 | + } |
| 171 | + std::size_t paren_end = find_matching_paren(body, i); |
| 172 | + if (paren_end == i) return spec.str(); |
| 173 | + llvm::StringRef args_inside = body.substr(i + 1, paren_end - i - 1); |
| 174 | + out.append("clang::annotate(\"h5::"); |
| 175 | + out.append(name.str()); |
| 176 | + out.append("\""); |
| 177 | + if (!args_inside.trim().empty()) { |
| 178 | + out.append(", "); |
| 179 | + out.append(args_inside.str()); |
| 180 | + } |
| 181 | + out.append(")"); |
| 182 | + if (paren_end + 1 < body.size()) { |
| 183 | + out.append(body.substr(paren_end + 1).str()); |
| 184 | + } |
| 185 | + return out; |
| 186 | +} |
| 187 | + |
| 188 | +// Whole-file rewrite. Respects // and /* */ comments and string/char literals. |
| 189 | +inline std::string rewrite(llvm::StringRef src) { |
| 190 | + std::string out; |
| 191 | + out.reserve(src.size() + src.size() / 16); |
| 192 | + std::size_t i = 0, n = src.size(); |
| 193 | + while (i < n) { |
| 194 | + char c = src[i]; |
| 195 | + if (c == '/' && i + 1 < n && src[i+1] == '/') { |
| 196 | + while (i < n && src[i] != '\n') out.push_back(src[i++]); |
| 197 | + continue; |
| 198 | + } |
| 199 | + if (c == '/' && i + 1 < n && src[i+1] == '*') { |
| 200 | + out.push_back(src[i++]); out.push_back(src[i++]); |
| 201 | + while (i + 1 < n && !(src[i] == '*' && src[i+1] == '/')) out.push_back(src[i++]); |
| 202 | + if (i + 1 < n) { out.push_back(src[i++]); out.push_back(src[i++]); } |
| 203 | + continue; |
| 204 | + } |
| 205 | + if (c == '"' || c == '\'') { |
| 206 | + char q = c; |
| 207 | + out.push_back(src[i++]); |
| 208 | + while (i < n && src[i] != q) { |
| 209 | + if (src[i] == '\\' && i + 1 < n) { |
| 210 | + out.push_back(src[i++]); |
| 211 | + out.push_back(src[i++]); |
| 212 | + continue; |
| 213 | + } |
| 214 | + out.push_back(src[i++]); |
| 215 | + } |
| 216 | + if (i < n) out.push_back(src[i++]); |
| 217 | + continue; |
| 218 | + } |
| 219 | + if (c == '[' && i + 1 < n && src[i+1] == '[') { |
| 220 | + std::size_t close = find_attr_close(src, i); |
| 221 | + if (close == i) { |
| 222 | + out.push_back(src[i++]); |
| 223 | + continue; |
| 224 | + } |
| 225 | + llvm::StringRef block = src.substr(i + 2, close - i - 2); |
| 226 | + if (block.contains("h5::")) { |
| 227 | + auto attrs = split_attrs(block); |
| 228 | + out.append("[["); |
| 229 | + for (std::size_t k = 0; k < attrs.size(); ++k) { |
| 230 | + if (k) out.append(","); |
| 231 | + out.append(rewrite_one_attr(attrs[k])); |
| 232 | + } |
| 233 | + out.append("]]"); |
| 234 | + } else { |
| 235 | + out.append(src.substr(i, close + 2 - i).str()); |
| 236 | + } |
| 237 | + i = close + 2; |
| 238 | + continue; |
| 239 | + } |
| 240 | + out.push_back(src[i++]); |
| 241 | + } |
| 242 | + return out; |
| 243 | +} |
| 244 | + |
| 245 | +inline std::string read_file(const std::string& path) { |
| 246 | + std::ifstream f(path); |
| 247 | + if (!f) return {}; |
| 248 | + std::stringstream ss; ss << f.rdbuf(); |
| 249 | + return ss.str(); |
| 250 | +} |
| 251 | + |
| 252 | +inline void install_virtual_files(clang::tooling::ClangTool& Tool, |
| 253 | + const std::vector<std::string>& paths, |
| 254 | + std::vector<std::string>& storage) { |
| 255 | + storage.reserve(paths.size()); |
| 256 | + for (const auto& p : paths) { |
| 257 | + std::string content = read_file(p); |
| 258 | + if (content.empty()) continue; |
| 259 | + if (content.find("h5::") == std::string::npos) continue; |
| 260 | + storage.push_back(rewrite(content)); |
| 261 | + Tool.mapVirtualFile(p, storage.back()); |
| 262 | + } |
| 263 | +} |
| 264 | + |
| 265 | +} // namespace h5_attr_translator |
0 commit comments