Skip to content

Commit ba848d5

Browse files
committed
Merge remote-tracking branch 'origin/30-pb-producer' into staging
2 parents 5c77c8f + aeb3727 commit ba848d5

17 files changed

Lines changed: 882 additions & 3 deletions

src/consumer_pb.hpp

Lines changed: 403 additions & 0 deletions
Large diffs are not rendered by default.

src/h5cpp.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "consumer_rlp.hpp"
2222
#include "consumer_bson.hpp"
2323
#include "consumer_avro.hpp"
24+
#include "producer_pb.hpp"
25+
#include "consumer_pb.hpp"
2426
#include "h5_attr_translator.hpp"
2527
#include "consumer_json.hpp"
2628
#include "consumer_msgpack.hpp"
@@ -52,6 +54,24 @@ clang::ast_matchers::StatementMatcher h5templateMatcher = clang::ast_matchers::c
5254

5355
enum class OutputFormat { hdf5, protobuf, json, msgpack, cbor, bson, avro, rlp,
5456
sql_postgres, sql_mysql, sql_lite3 };
57+
// pbTemplateMatcher: same shape as h5templateMatcher, but triggers on the
58+
// pb.hpp public surface. Covers pb::encode, pb::decode, and pb::encode_into.
59+
clang::ast_matchers::StatementMatcher pbTemplateMatcher = clang::ast_matchers::callExpr( clang::ast_matchers::allOf(
60+
clang::ast_matchers::hasDescendant( clang::ast_matchers::declRefExpr( clang::ast_matchers::to( clang::ast_matchers::varDecl().bind("variableDecl") ) ) ),
61+
clang::ast_matchers::hasDescendant( clang::ast_matchers::declRefExpr( clang::ast_matchers::to(
62+
clang::ast_matchers::functionDecl( clang::ast_matchers::allOf(
63+
clang::ast_matchers::eachOf(
64+
clang::ast_matchers::hasName("pb::encode"), clang::ast_matchers::hasName("pb::decode"),
65+
clang::ast_matchers::hasName("pb::encode_into")
66+
),
67+
clang::ast_matchers::hasTemplateArgument(0, clang::ast_matchers::refersToType( clang::ast_matchers::qualType(
68+
clang::ast_matchers::hasDeclaration( clang::ast_matchers::cxxRecordDecl(clang::ast_matchers::isStruct()).bind("cxxRecordDecl"))
69+
) )),
70+
clang::ast_matchers::isTemplateInstantiation()
71+
)) )))
72+
));
73+
74+
enum class OutputFormat { hdf5, protobuf, json, msgpack, cbor, bson, avro, rlp };
5575

5676
static llvm::cl::OptionCategory MyToolCategory("h5cpp options");
5777
static llvm::cl::extrahelp CommonHelp(clang::tooling::CommonOptionsParser::HelpMessage);
@@ -123,14 +143,21 @@ int main(int argc, const char **argv) {
123143
rc = Tool.run(clang::tooling::newFrontendActionFactory(&Finder).get());
124144
break;
125145
}
126-
case OutputFormat::protobuf:
127-
llvm::errs() << "h5cpp-compiler: --format protobuf not yet implemented\n";
128-
rc = 1;
146+
case OutputFormat::protobuf: {
147+
PbTemplateCallback<PbProducer> callback(work_path);
148+
clang::ast_matchers::MatchFinder Finder;
149+
Finder.addMatcher(pbTemplateMatcher, &callback);
150+
rc = Tool.run(clang::tooling::newFrontendActionFactory(&Finder).get());
151+
if (rc == 0 && callback.error()) rc = 1;
129152
break;
130153
case OutputFormat::json: {
131154
JsonTemplateCallback callback(work_path);
132155
Finder.addMatcher(h5templateMatcher, &callback);
133156
rc = Tool.run(clang::tooling::newFrontendActionFactory(&Finder).get());
157+
}
158+
case OutputFormat::json:
159+
llvm::errs() << "h5cpp-compiler: --format json not yet implemented\n";
160+
rc = 1;
134161
break;
135162
}
136163
case OutputFormat::msgpack:

src/producer_pb.hpp

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/* Copyright (c) 2018-2026 Steven Varga, steven@vargalabs.com Toronto, ON Canada */
2+
3+
#pragma once
4+
5+
#include "producer.hpp"
6+
7+
#include <cstdint>
8+
#include <map>
9+
#include <ostream>
10+
#include <string>
11+
#include <utility>
12+
#include <vector>
13+
14+
// Tier-1 backend for vargalabs/sandbox/pb.hpp. Emits:
15+
// template<> struct pb::meta::descriptor_t<T> {
16+
// static constexpr auto fields = std::tuple{
17+
// pb::field<N, &T::m>{}, ...
18+
// };
19+
// };
20+
// One specialization per encountered record type, in topological order
21+
// (nested types before outer types). Field numbers come from per-field
22+
// [[clang::annotate("pb::field=N")]] attributes — the consumer (consumer_pb.hpp)
23+
// parses the annotations and supplies N to type_insert.
24+
struct PbProducer : Producer<PbProducer> {
25+
26+
void file_begin_impl(){
27+
io << "/* Generated by h5cpp-compiler --protocol-buffers. Do not edit. */\n";
28+
io << "#pragma once\n\n";
29+
io << "#include <pb.hpp>\n\n";
30+
}
31+
32+
void file_end_impl(){
33+
io << "\n";
34+
}
35+
36+
// template_decl is the per-record opener. `record` is the qualified type
37+
// name (e.g. "::sn::sensor::reading_t"). Each record's descriptor_t is a
38+
// standalone specialization at file scope.
39+
void template_decl_impl(const std::string& record){
40+
record_name = record;
41+
first_field = true;
42+
io << "template<> struct pb::meta::descriptor_t<" << record << "> {\n";
43+
io << " static constexpr auto fields = std::tuple{";
44+
}
45+
46+
// record_decl is called for each topologically-prior record (nested
47+
// types). For PbProducer we don't emit nested compound declarations —
48+
// nested message types get their OWN descriptor_t specialization in a
49+
// prior pass. The variable name is unused in pb output but tracked for
50+
// the cache contract.
51+
void record_decl_impl(const std::string& var, const std::string& /*record_name*/){
52+
(void)var;
53+
// no-op for pb; nested messages reference their own descriptor_t<T>
54+
}
55+
56+
// array_decl: C arrays (T[N]) don't have a clean proto3 wire mapping.
57+
// Stage 1 refuses; users must use std::vector<T> (FR3 repeated) or
58+
// std::array<T,N> (planned future).
59+
void array_decl_impl(const std::string& var, const std::string& /*type*/, uint64_t /*size*/){
60+
(void)var;
61+
io << "\n // ERROR: C arrays not supported by pb backend (use std::vector<T> instead)";
62+
}
63+
64+
// type_insert: emit one pb::field<N, &Record::member>{} entry.
65+
// `var` is overloaded by the consumer to carry the field number as a
66+
// decimal string ("1", "2", ...) instead of the HDF5 vname. record_name
67+
// is the qualified record type; field_name is the member identifier.
68+
void type_insert_impl(const std::string& var, const std::string& field_name,
69+
const std::string& rec_name, const std::string& /*type*/){
70+
if (!first_field) io << ",";
71+
io << "\n pb::field<" << var << ", &" << rec_name << "::" << field_name << ">{}";
72+
first_field = false;
73+
}
74+
75+
void return_type_impl(const std::string& /*var*/){
76+
io << "\n };\n};\n\n";
77+
}
78+
79+
void type_release_impl(){
80+
// pb has no resource-id lifetime to manage — descriptor_t entries are
81+
// pure compile-time. No-op.
82+
}
83+
84+
// The cache contract is HDF5-shaped (cpp-type → hid_t-var mapping).
85+
// PbProducer doesn't need that vocabulary — pb dispatches via traits at
86+
// the library level, not via per-type variable aliases. Implement as
87+
// identity stubs so the existing consumer driver still compiles.
88+
bool cache_add_impl(const std::string& /*key*/, const std::string& /*type*/){ return true; }
89+
std::string cache_impl(const std::string& type){ return type; }
90+
91+
// FR6/stage 4: oneof emission. Called directly by PbTemplateCallback for
92+
// std::variant member fields (not via the CRTP Producer<> hook surface,
93+
// which is HDF5-shaped). Emits a pb::oneof<&R::m, pb::alt<T1,N1>,...>{}
94+
// entry in the descriptor's fields tuple.
95+
void emit_oneof(const std::string& member_name,
96+
const std::string& record_name,
97+
const std::vector<std::string>& alt_types,
98+
const std::vector<std::uint32_t>& tags){
99+
if (!first_field) io << ",";
100+
io << "\n pb::oneof<&" << record_name << "::" << member_name;
101+
for (std::size_t i = 0; i < alt_types.size(); ++i) {
102+
io << ", pb::alt<" << alt_types[i] << ", " << tags[i] << ">";
103+
}
104+
io << ">{}";
105+
first_field = false;
106+
}
107+
108+
// Stage 5: wire-spec annotation. Same shape as type_insert but emits an
109+
// explicit WireSpec template arg on pb::field<N, Ptr, WireSpec>. Called
110+
// when the consumer sees a [[clang::annotate("pb::wire=spec")]] attribute
111+
// on a field. wire_spec is the spec name without the `_t` suffix, e.g.
112+
// "sint32", "fixed64", "sfixed32".
113+
void emit_field_with_spec(std::uint32_t tag,
114+
const std::string& field_name,
115+
const std::string& record_name,
116+
const std::string& wire_spec){
117+
if (!first_field) io << ",";
118+
io << "\n pb::field<" << tag
119+
<< ", &" << record_name << "::" << field_name
120+
<< ", pb::wire::" << wire_spec << "_t>{}";
121+
first_field = false;
122+
}
123+
124+
// Stage 6 (Tier 4): adapter_field emission. Called when the consumer
125+
// sees a [[clang::annotate("pb::adapter=Name")]] attribute on a field.
126+
// Name is the adapter shorthand ("Timestamp", "Duration"), which maps
127+
// to the library symbol pb::Name_adapter (Timestamp_adapter, etc.).
128+
void emit_adapter_field(std::uint32_t tag,
129+
const std::string& field_name,
130+
const std::string& record_name,
131+
const std::string& adapter_name){
132+
if (!first_field) io << ",";
133+
io << "\n pb::adapter_field<" << tag
134+
<< ", &" << record_name << "::" << field_name
135+
<< ", pb::" << adapter_name << "_adapter>{}";
136+
first_field = false;
137+
}
138+
139+
private:
140+
std::string record_name;
141+
bool first_field = true;
142+
};

tests/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,19 @@ foreach(fixture IN LISTS FIXTURES)
7777
add_h5cpp_fixture_test(${fixture})
7878
endforeach()
7979

80+
set(PB_FIXTURES
81+
pb_primitives
82+
pb_strings_enums
83+
pb_composites
84+
pb_variant
85+
pb_wire_specs
86+
pb_chrono
87+
)
88+
89+
foreach(fixture IN LISTS PB_FIXTURES)
90+
add_h5cpp_backend_fixture_test(${fixture} protobuf)
91+
endforeach()
92+
8093
set(RLP_FIXTURES
8194
rlp_primitives
8295
rlp_arrays

tests/fixtures/pb_chrono.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "pb_stub.hpp"
2+
3+
#include <chrono>
4+
#include <cstdint>
5+
6+
// Tier-4 fixture (stage 6): chrono adapter annotation. A user struct with
7+
// std::chrono::system_clock::time_point + std::chrono::nanoseconds members
8+
// gets pb::adapter_field<N, Ptr, pb::Timestamp_adapter> / Duration_adapter
9+
// entries in the emitted descriptor. The library bridges the C++ types
10+
// to proto3 google.protobuf.Timestamp / Duration wire messages.
11+
namespace sn::pb_test {
12+
13+
struct event_t {
14+
[[clang::annotate("pb::field=1")]]
15+
std::int64_t id;
16+
17+
[[clang::annotate("pb::field=2")]]
18+
[[clang::annotate("pb::adapter=Timestamp")]]
19+
std::chrono::system_clock::time_point when;
20+
21+
[[clang::annotate("pb::field=3")]]
22+
[[clang::annotate("pb::adapter=Duration")]]
23+
std::chrono::nanoseconds ttl;
24+
};
25+
26+
} // namespace sn::pb_test
27+
28+
void use() {
29+
sn::pb_test::event_t e{};
30+
auto buf = pb::encode(e);
31+
(void)buf;
32+
}

tests/fixtures/pb_composites.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "pb_stub.hpp"
2+
3+
#include <cstdint>
4+
#include <optional>
5+
#include <string>
6+
#include <vector>
7+
8+
// Tier-3 fixture (stage 3): composite shapes — std::vector (repeated),
9+
// std::optional (proto3 optional explicit presence). The compiler emits the
10+
// same pb::field<N, &T::m>{} entries; pb.hpp's trait dispatch handles the
11+
// shape at compile time via is_pb_repeated_v and is_pb_optional_v.
12+
//
13+
// stdlib types are NOT walked into for descriptor emission (only the outer
14+
// user record gets a descriptor_t<>).
15+
namespace sn::pb_test {
16+
17+
struct profile_t {
18+
[[clang::annotate("pb::field=1")]] std::string name;
19+
[[clang::annotate("pb::field=2")]] std::vector<std::int32_t> scores;
20+
[[clang::annotate("pb::field=3")]] std::optional<std::string> nickname;
21+
[[clang::annotate("pb::field=4")]] std::vector<std::string> aliases;
22+
[[clang::annotate("pb::field=5")]] std::optional<std::int64_t> user_id;
23+
};
24+
25+
} // namespace sn::pb_test
26+
27+
void use() {
28+
sn::pb_test::profile_t p{};
29+
auto buf = pb::encode(p);
30+
(void)buf;
31+
}

tests/fixtures/pb_primitives.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#include "pb_stub.hpp"
2+
3+
#include <cstdint>
4+
5+
// Tier-1 fixture: numeric scalars only. Every member carries
6+
// [[clang::annotate("pb::field=N")]]; the compiler emits a
7+
// pb::meta::descriptor_t<T> specialization with one pb::field<N,&T::m>{}
8+
// entry per member, in source-declaration order.
9+
namespace sn::pb_test {
10+
struct primitives_t {
11+
[[clang::annotate("pb::field=1")]] bool _bool;
12+
[[clang::annotate("pb::field=2")]] std::int32_t _i32;
13+
[[clang::annotate("pb::field=3")]] std::int64_t _i64;
14+
[[clang::annotate("pb::field=4")]] std::uint32_t _u32;
15+
[[clang::annotate("pb::field=5")]] std::uint64_t _u64;
16+
[[clang::annotate("pb::field=6")]] float _f;
17+
[[clang::annotate("pb::field=7")]] double _d;
18+
};
19+
} // namespace sn::pb_test
20+
21+
void use() {
22+
sn::pb_test::primitives_t p{};
23+
auto buf = pb::encode(p);
24+
(void)buf;
25+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "pb_stub.hpp"
2+
3+
#include <cstdint>
4+
#include <string>
5+
6+
// Tier-1 fixture (stage 2): non-POD members — std::string is the canonical
7+
// length-delimited scalar; enum class is the canonical proto3 enum mapping.
8+
// The compiler emits a single descriptor_t<log_entry_t> with one pb::field
9+
// entry per member; std::string is NOT walked into (it's a stdlib leaf).
10+
namespace sn::pb_test {
11+
12+
enum class severity_e : std::int32_t {
13+
UNSPECIFIED = 0,
14+
INFO = 1,
15+
WARN = 2,
16+
ERROR = 3,
17+
};
18+
19+
struct log_entry_t {
20+
[[clang::annotate("pb::field=1")]] std::int64_t timestamp_ns;
21+
[[clang::annotate("pb::field=2")]] std::string message;
22+
[[clang::annotate("pb::field=3")]] severity_e level;
23+
[[clang::annotate("pb::field=4")]] double elapsed_ms;
24+
};
25+
26+
} // namespace sn::pb_test
27+
28+
void use() {
29+
sn::pb_test::log_entry_t e{};
30+
auto buf = pb::encode(e);
31+
(void)buf;
32+
}

tests/fixtures/pb_variant.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include "pb_stub.hpp"
2+
3+
#include <cstdint>
4+
#include <string>
5+
#include <variant>
6+
7+
// Tier-3 fixture (stage 4): std::variant as proto3 oneof. The variant must
8+
// start with std::monostate (the "absent" state); each subsequent alternative
9+
// gets a field number from the [[clang::annotate("pb::oneof_tags=N1,N2,…")]]
10+
// list on the variant member. Compiler emits a pb::oneof<&T::v,
11+
// pb::alt<T1, N1>, ...>{} entry in the descriptor.
12+
namespace sn::pb_test {
13+
14+
struct event_t {
15+
[[clang::annotate("pb::field=1")]] std::int64_t timestamp_ns;
16+
[[clang::annotate("pb::oneof_tags=5,6,7")]]
17+
std::variant<std::monostate, std::string, std::int64_t, double> payload;
18+
};
19+
20+
} // namespace sn::pb_test
21+
22+
void use() {
23+
sn::pb_test::event_t e{};
24+
auto buf = pb::encode(e);
25+
(void)buf;
26+
}

0 commit comments

Comments
 (0)