Skip to content

Commit 2b7d775

Browse files
facontidavideclaude
andcommitted
feat(pj_base): align VideoFrame proto to Foxglove + add zero-copy view decode
Swap PJ.VideoFrame field numbers to match foxglove.CompressedVideo (timestamp=1, frame_id=2, data=3, format=4) so a single decoder serves both the canonical and Foxglove video schemas. Add deserializeVideoFrameView(), which returns a frame whose `data` aliases the input buffer (no copy of the H.264/H.265 bitstream) and carries the caller-supplied anchor; the owning deserializeVideoFrame() is unchanged. BREAKING (wire format): canonical PJ.VideoFrame payloads written before this change (format=3, data=4) are no longer read correctly and must be regenerated. The C++ sdk::VideoFrame struct layout is unchanged, so abidiff stays clean. Per the versioning policy a canonical-schema wire-format change is a MAJOR; the maintainer should cut the corresponding release on merge. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 4d39204 commit 2b7d775

5 files changed

Lines changed: 156 additions & 30 deletions

File tree

pj_base/include/pj_base/builtin/video_frame_codec.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <string_view>
88
#include <vector>
99

10+
#include "pj_base/buffer_anchor.hpp"
1011
#include "pj_base/builtin/video_frame.hpp"
1112
#include "pj_base/expected.hpp"
1213

@@ -22,7 +23,17 @@ inline constexpr std::string_view kSchemaVideoFrame = "PJ.VideoFrame";
2223
[[nodiscard]] std::vector<uint8_t> serializeVideoFrame(const sdk::VideoFrame& frame);
2324

2425
/// Decodes canonical PJ.VideoFrame wire bytes into sdk::VideoFrame. The
25-
/// returned frame owns its bytes via `anchor`.
26+
/// returned frame owns its bytes via `anchor` (a fresh copy of the `data`
27+
/// field). Use this when the wire buffer does not outlive the call.
2628
[[nodiscard]] Expected<sdk::VideoFrame> deserializeVideoFrame(const uint8_t* data, size_t size);
2729

30+
/// Decodes canonical PJ.VideoFrame / foxglove.CompressedVideo wire bytes into
31+
/// sdk::VideoFrame without copying the compressed bitstream. The returned
32+
/// frame's `data` ALIASES the input buffer and its `anchor` is set to the
33+
/// supplied `anchor`, which the caller must keep alive for as long as the frame
34+
/// (and its `data` span) is used. The two schemas are wire-identical, so this
35+
/// one decoder serves both.
36+
[[nodiscard]] Expected<sdk::VideoFrame> deserializeVideoFrameView(
37+
const uint8_t* data, size_t size, sdk::BufferAnchor anchor);
38+
2839
} // namespace PJ

pj_base/proto/pj/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ rationale.
4040
- `OccupancyGridUpdate`
4141
- **`Log.proto`** — a single textual log message (severity level + text + originating name) for a log/console panel; mirrors the core of Foxglove's `Log` (file/line omitted).
4242
- `Log`
43-
- **`VideoFrame.proto`** — one frame of an inter-frame-coded video stream (`h264`, `h265`, `vp9`, `av1`) when per-frame `Image` messages would be wasteful.
43+
- **`VideoFrame.proto`** — one frame of an inter-frame-coded video stream (`h264`, `h265`, `vp9`, `av1`) when per-frame `Image` messages would be wasteful. Field layout is wire-identical to `foxglove.CompressedVideo` (timestamp=1, frame_id=2, data=3, format=4), so one decoder parses both.
4444
- `VideoFrame`
4545
- **`AssetVideo.proto`** — reference to a file-backed video plus typed playback metadata (path, MIME type, dimensions, frame rate) so consumers can size playback windows without opening the file.
4646
- `AssetVideo`

pj_base/proto/pj/VideoFrame.proto

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ package PJ;
3030
// On the SDK side, `data` is exposed as `Span<const uint8_t>` plus a `BufferAnchor` that keeps the underlying
3131
// allocation alive (same byte-backed view pattern as Image, DepthImage, and PointCloud). The anchor is a C++ lifetime
3232
// concept with no wire-format equivalent.
33+
//
34+
// The field layout is wire-identical to `foxglove.CompressedVideo` (timestamp=1, frame_id=2, data=3, format=4), so a
35+
// single decoder parses both this canonical schema and the Foxglove one.
3336
message VideoFrame {
3437
// Timestamp of the frame
3538
google.protobuf.Timestamp timestamp = 1;
@@ -38,9 +41,9 @@ message VideoFrame {
3841
// the camera (into the scene).
3942
string frame_id = 2;
4043

41-
// Codec identifier, lowercase. Recognized values: "h264", "h265", "vp9", "av1".
42-
string format = 3;
43-
4444
// Compressed bitstream containing exactly one frame given prior stream state.
45-
bytes data = 4;
45+
bytes data = 3;
46+
47+
// Codec identifier, lowercase. Recognized values: "h264", "h265", "vp9", "av1".
48+
string format = 4;
4649
}

pj_base/src/builtin/video_frame_codec.cpp

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@ using builtin_wire::Reader;
2020
using builtin_wire::Tag;
2121
using builtin_wire::WireType;
2222
using builtin_wire::Writer;
23+
using sdk::BufferAnchor;
2324
using sdk::VideoFrame;
2425

25-
bool readBytesIntoFrame(Reader& reader, VideoFrame& out) {
26+
// Reads the length-delimited `data` field (field 3) into an owning copy. The
27+
// returned frame's `anchor` owns a fresh vector, so `data` stays valid past the
28+
// lifetime of the wire buffer.
29+
bool readBytesOwning(Reader& reader, VideoFrame& out) {
2630
const uint8_t* data = nullptr;
2731
size_t size = 0;
2832
if (!reader.readBytes(data, size)) {
@@ -34,29 +38,26 @@ bool readBytesIntoFrame(Reader& reader, VideoFrame& out) {
3438
return true;
3539
}
3640

37-
} // namespace
38-
39-
std::vector<uint8_t> serializeVideoFrame(const VideoFrame& frame) {
40-
std::vector<uint8_t> out;
41-
Writer writer(out);
42-
43-
writer.message(1, [&](Writer& nested) { builtin_wire::writeTimestamp(nested, frame.timestamp_ns); });
44-
writer.string(2, frame.frame_id);
45-
writer.string(3, frame.format);
46-
writer.bytes(4, frame.data.data(), frame.data.size());
47-
48-
return out;
49-
}
50-
51-
Expected<sdk::VideoFrame> deserializeVideoFrame(const uint8_t* data, size_t size) {
52-
if (data == nullptr || size == 0) {
53-
return unexpected(std::string("VideoFrame wire: empty buffer"));
41+
// Reads the length-delimited `data` field (field 3) as a non-owning view that
42+
// ALIASES the wire buffer. The caller-supplied `anchor` keeps that buffer alive;
43+
// no copy of the bitstream is made.
44+
bool readBytesView(Reader& reader, const BufferAnchor& anchor, VideoFrame& out) {
45+
const uint8_t* data = nullptr;
46+
size_t size = 0;
47+
if (!reader.readBytes(data, size)) {
48+
return false;
5449
}
50+
out.data = Span<const uint8_t>(data, size);
51+
out.anchor = anchor;
52+
return true;
53+
}
5554

56-
Reader reader(data, size);
57-
sdk::VideoFrame frame;
58-
59-
const bool ok = parseFields(reader, [&](Tag tag, Reader& r) {
55+
// Drives the shared field dispatch. `read_data` consumes the `data` field
56+
// (field 3); the two deserialize entry points differ only in whether that
57+
// callback copies or aliases the wire bytes. All other fields are identical.
58+
template <typename ReadData>
59+
bool parseVideoFrame(Reader& reader, VideoFrame& frame, ReadData&& read_data) {
60+
return parseFields(reader, [&](Tag tag, Reader& r) {
6061
switch (tag.field) {
6162
case 1:
6263
if (tag.type != WireType::kLengthDelimited) {
@@ -72,16 +73,58 @@ Expected<sdk::VideoFrame> deserializeVideoFrame(const uint8_t* data, size_t size
7273
if (tag.type != WireType::kLengthDelimited) {
7374
return false;
7475
}
75-
return r.readString(frame.format);
76+
return read_data(r, frame);
7677
case 4:
7778
if (tag.type != WireType::kLengthDelimited) {
7879
return false;
7980
}
80-
return readBytesIntoFrame(r, frame);
81+
return r.readString(frame.format);
8182
default:
8283
return false;
8384
}
8485
});
86+
}
87+
88+
} // namespace
89+
90+
std::vector<uint8_t> serializeVideoFrame(const VideoFrame& frame) {
91+
std::vector<uint8_t> out;
92+
Writer writer(out);
93+
94+
writer.message(1, [&](Writer& nested) { builtin_wire::writeTimestamp(nested, frame.timestamp_ns); });
95+
writer.string(2, frame.frame_id);
96+
writer.bytes(3, frame.data.data(), frame.data.size());
97+
writer.string(4, frame.format);
98+
99+
return out;
100+
}
101+
102+
Expected<sdk::VideoFrame> deserializeVideoFrame(const uint8_t* data, size_t size) {
103+
if (data == nullptr || size == 0) {
104+
return unexpected(std::string("VideoFrame wire: empty buffer"));
105+
}
106+
107+
Reader reader(data, size);
108+
sdk::VideoFrame frame;
109+
110+
const bool ok = parseVideoFrame(reader, frame, [](Reader& r, VideoFrame& f) { return readBytesOwning(r, f); });
111+
112+
if (!ok) {
113+
return unexpected(std::string("VideoFrame wire: decode failed"));
114+
}
115+
116+
return frame;
117+
}
118+
119+
Expected<sdk::VideoFrame> deserializeVideoFrameView(const uint8_t* data, size_t size, sdk::BufferAnchor anchor) {
120+
if (data == nullptr || size == 0) {
121+
return unexpected(std::string("VideoFrame wire: empty buffer"));
122+
}
123+
124+
Reader reader(data, size);
125+
sdk::VideoFrame frame;
126+
127+
const bool ok = parseVideoFrame(reader, frame, [&](Reader& r, VideoFrame& f) { return readBytesView(r, anchor, f); });
85128

86129
if (!ok) {
87130
return unexpected(std::string("VideoFrame wire: decode failed"));

pj_base/tests/video_frame_codec_test.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <cstdint>
99
#include <cstring>
10+
#include <memory>
1011
#include <vector>
1112

1213
#include "protobuf_wire_test_helpers.hpp"
@@ -43,5 +44,73 @@ TEST(VideoFrameCodecTest, RoundTripRealisticPayload) {
4344
EXPECT_EQ(std::memcmp(out->data.data(), payload.data(), payload.size()), 0);
4445
}
4546

47+
// Locks the on-wire field layout to match foxglove.CompressedVideo:
48+
// timestamp=1, frame_id=2, data=3 (bytes), format=4 (string). The golden bytes
49+
// are built independently of the codec so a future field-number regression is
50+
// caught here.
51+
TEST(VideoFrameCodecTest, WireLayoutMatchesFoxglove) {
52+
VideoFrame in;
53+
in.timestamp_ns = 1'700'000'000'500'000'000LL;
54+
in.frame_id = "cam0";
55+
in.format = "h265";
56+
const std::vector<uint8_t> payload = {0xDE, 0xAD, 0xBE, 0xEF};
57+
in.data = Span<const uint8_t>(payload.data(), payload.size());
58+
59+
std::vector<uint8_t> expected;
60+
pb::appendTag(expected, 1, 2); // timestamp (message)
61+
pb::appendLenDelim(expected, pb::encodeTimestamp(in.timestamp_ns));
62+
pb::appendTag(expected, 2, 2); // frame_id (string)
63+
pb::appendString(expected, in.frame_id);
64+
pb::appendTag(expected, 3, 2); // data (bytes)
65+
pb::appendBytes(expected, payload.data(), payload.size());
66+
pb::appendTag(expected, 4, 2); // format (string)
67+
pb::appendString(expected, in.format);
68+
69+
const auto bytes = serializeVideoFrame(in);
70+
EXPECT_EQ(bytes, expected);
71+
}
72+
73+
// deserializeVideoFrameView must NOT copy the compressed bitstream: the
74+
// returned data span has to point straight into the wire buffer, and the frame
75+
// must keep the supplied anchor alive.
76+
TEST(VideoFrameCodecTest, ViewAliasesInputBuffer) {
77+
VideoFrame in;
78+
in.timestamp_ns = 42;
79+
in.frame_id = "cam";
80+
in.format = "av1";
81+
const std::vector<uint8_t> payload = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06};
82+
in.data = Span<const uint8_t>(payload.data(), payload.size());
83+
84+
// Own the wire bytes through a shared_ptr so it can double as the anchor.
85+
auto wire = std::make_shared<std::vector<uint8_t>>(serializeVideoFrame(in));
86+
sdk::BufferAnchor anchor = wire;
87+
88+
auto out = deserializeVideoFrameView(wire->data(), wire->size(), anchor);
89+
ASSERT_TRUE(out.has_value());
90+
EXPECT_EQ(out->timestamp_ns, in.timestamp_ns);
91+
EXPECT_EQ(out->frame_id, in.frame_id);
92+
EXPECT_EQ(out->format, in.format);
93+
94+
// Round-trips the payload contents...
95+
ASSERT_EQ(out->data.size(), payload.size());
96+
EXPECT_EQ(std::memcmp(out->data.data(), payload.data(), payload.size()), 0);
97+
98+
// ...and aliases the input buffer: the span points inside `wire`, not at a
99+
// fresh copy.
100+
const uint8_t* wire_begin = wire->data();
101+
const uint8_t* wire_end = wire->data() + wire->size();
102+
EXPECT_GE(out->data.data(), wire_begin);
103+
EXPECT_LE(out->data.data() + out->data.size(), wire_end);
104+
105+
// The frame's anchor must reference the same allocation we handed in, keeping
106+
// the aliased bytes alive.
107+
EXPECT_EQ(out->anchor, anchor);
108+
}
109+
110+
TEST(VideoFrameCodecTest, ViewEmptyBufferProducesError) {
111+
sdk::BufferAnchor anchor;
112+
EXPECT_FALSE(deserializeVideoFrameView(nullptr, 0, anchor).has_value());
113+
}
114+
46115
} // namespace
47116
} // namespace PJ

0 commit comments

Comments
 (0)