Skip to content

Commit 82f1ed6

Browse files
data_load_ulog: harden raw record extraction and test the real decode path (#167)
The importer flattens each ULog MessageFormat and reads raw bytes at resolved field offsets, rather than using ulog_cpp's typed accessors. That offset/array/nesting logic lived inline in three hand-synchronized traversals and was only ever validated indirectly (the tests exercised ulog_cpp's own accessors, never the plugin's raw path). - Extract the SDK-free traversal into ulog_flatten.hpp: a single forEachFlatLeaf primitive (yielding absolute offset, type, size) plus collectFlatFieldNames, with one isSkippedField rule. The type/value passes become thin visitors, so the skip/array/nested-offset logic exists in exactly one place instead of three. - Bounds-check value extraction: a leaf whose offset+size exceeds the record size now pushes NullValue instead of reading out of bounds, preserving name<->value alignment on truncated/corrupt records. - Add <cstdio>, <map>, <memory> (previously pulled in transitively). - Add tests that drive the real traversal against a nested+array+scalar record with known bytes, asserting names, decoded values at the reported offsets, the sizeBytes() boundary, and name<->leaf alignment. No behavior change on valid logs. All 10 tests pass. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent ab94211 commit 82f1ed6

3 files changed

Lines changed: 265 additions & 102 deletions

File tree

data_load_ulog/tests/ulog_source_test.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
#include <gtest/gtest.h>
22

3+
#include <algorithm>
4+
#include <cstddef>
5+
#include <cstdint>
36
#include <cstring>
47
#include <memory>
58
#include <string>
69
#include <ulog_cpp/data_container.hpp>
710
#include <ulog_cpp/reader.hpp>
811
#include <vector>
912

13+
#include "../ulog_flatten.hpp"
14+
1015
namespace {
1116

1217
// Helper to build a binary ULog stream in memory.
@@ -357,4 +362,149 @@ TEST(ULogSourceTest, AllNumericTypes) {
357362
EXPECT_EQ(sample["flag"].as<bool>(), true);
358363
}
359364

365+
// --- Tests for the plugin's own flatten/extract path (ulog_flatten.hpp) ---
366+
//
367+
// The tests above validate ulog_cpp's typed accessors. The importer does NOT use
368+
// those at runtime: it flattens the format and reads raw bytes at resolved
369+
// offsets. These tests drive that production path directly.
370+
371+
// Decode a scalar leaf from raw bytes the same way the plugin does, returning a
372+
// double for comparison against known inputs.
373+
double decodeLeaf(const std::vector<uint8_t>& raw, size_t offset, ulog_cpp::Field::BasicType type) {
374+
const uint8_t* p = raw.data() + offset;
375+
switch (type) {
376+
case ulog_cpp::Field::BasicType::INT8:
377+
return static_cast<double>(*reinterpret_cast<const int8_t*>(p));
378+
case ulog_cpp::Field::BasicType::UINT8:
379+
case ulog_cpp::Field::BasicType::CHAR:
380+
return static_cast<double>(*p);
381+
case ulog_cpp::Field::BasicType::BOOL:
382+
return static_cast<double>(*p != 0);
383+
case ulog_cpp::Field::BasicType::INT16: {
384+
int16_t v;
385+
std::memcpy(&v, p, sizeof(v));
386+
return v;
387+
}
388+
case ulog_cpp::Field::BasicType::UINT16: {
389+
uint16_t v;
390+
std::memcpy(&v, p, sizeof(v));
391+
return v;
392+
}
393+
case ulog_cpp::Field::BasicType::INT32: {
394+
int32_t v;
395+
std::memcpy(&v, p, sizeof(v));
396+
return v;
397+
}
398+
case ulog_cpp::Field::BasicType::UINT32: {
399+
uint32_t v;
400+
std::memcpy(&v, p, sizeof(v));
401+
return v;
402+
}
403+
case ulog_cpp::Field::BasicType::INT64: {
404+
int64_t v;
405+
std::memcpy(&v, p, sizeof(v));
406+
return static_cast<double>(v);
407+
}
408+
case ulog_cpp::Field::BasicType::UINT64: {
409+
uint64_t v;
410+
std::memcpy(&v, p, sizeof(v));
411+
return static_cast<double>(v);
412+
}
413+
case ulog_cpp::Field::BasicType::FLOAT: {
414+
float v;
415+
std::memcpy(&v, p, sizeof(v));
416+
return v;
417+
}
418+
case ulog_cpp::Field::BasicType::DOUBLE: {
419+
double v;
420+
std::memcpy(&v, p, sizeof(v));
421+
return v;
422+
}
423+
default:
424+
return 0.0;
425+
}
426+
}
427+
428+
// Build a message that exercises nesting + arrays + a scalar tail, then verify the
429+
// flattened names and the values decoded at the offsets forEachFlatLeaf reports.
430+
// This locks in the nested base-offset accumulation the importer relies on.
431+
TEST(ULogFlattenTest, NamesAndOffsetsMatchKnownRecord) {
432+
ULogBuilder builder;
433+
builder.writeHeader(0);
434+
builder.writeFlagBits();
435+
builder.writeFormat("vec3:float x;float y;float z");
436+
builder.writeFormat("sample:uint64_t timestamp;vec3 pos;uint8_t mode;float[2] arr;int32_t code");
437+
builder.writeSubscription(1, 0, "sample");
438+
439+
{
440+
FieldDataBuilder fields;
441+
fields.append<uint64_t>(7000000); // timestamp (skipped)
442+
fields.append<float>(1.0f); // pos.x
443+
fields.append<float>(2.0f); // pos.y
444+
fields.append<float>(3.0f); // pos.z
445+
fields.append<uint8_t>(42); // mode
446+
fields.append<float>(4.0f); // arr.00
447+
fields.append<float>(5.0f); // arr.01
448+
fields.append<int32_t>(-77); // code
449+
builder.writeData(1, fields.build());
450+
}
451+
452+
auto container = parseBuilder(builder);
453+
ASSERT_FALSE(container->hadFatalError());
454+
auto sub = container->subscription("sample");
455+
ASSERT_NE(sub, nullptr);
456+
ASSERT_EQ(sub->size(), 1u);
457+
458+
// Names, in flattened order.
459+
std::vector<std::string> names;
460+
ulog_flatten::collectFlatFieldNames(*sub->format(), {}, names);
461+
const std::vector<std::string> expected_names = {"pos.x", "pos.y", "pos.z", "mode", "arr.00", "arr.01", "code"};
462+
EXPECT_EQ(names, expected_names);
463+
464+
// Values decoded at the offsets forEachFlatLeaf reports, in the same order.
465+
const auto& raw = sub->rawSamples()[0].data();
466+
std::vector<double> values;
467+
size_t max_end = 0;
468+
ulog_flatten::forEachFlatLeaf(
469+
*sub->format(), 0, [&](size_t offset, ulog_cpp::Field::BasicType type, size_t elem_size) {
470+
ASSERT_LE(offset + elem_size, raw.size()); // offsets stay inside the record
471+
max_end = std::max(max_end, offset + elem_size);
472+
values.push_back(decodeLeaf(raw, offset, type));
473+
});
474+
475+
const std::vector<double> expected_values = {1.0, 2.0, 3.0, 42.0, 4.0, 5.0, -77.0};
476+
ASSERT_EQ(values.size(), expected_values.size());
477+
for (size_t i = 0; i < values.size(); ++i) {
478+
EXPECT_DOUBLE_EQ(values[i], expected_values[i]) << "leaf " << i << " (" << names[i] << ")";
479+
}
480+
481+
// The furthest byte any leaf reads must equal the record size, so a caller can
482+
// bound-check with `offset + size <= raw_size` and a truncated record trips it.
483+
EXPECT_EQ(max_end, raw.size());
484+
EXPECT_EQ(max_end, static_cast<size_t>(sub->format()->sizeBytes()));
485+
}
486+
487+
// Names and leaf count must stay in lockstep so the importer can zip them.
488+
TEST(ULogFlattenTest, NameAndLeafCountsAgree) {
489+
ULogBuilder builder;
490+
builder.writeHeader(0);
491+
builder.writeFlagBits();
492+
builder.writeFormat("vec3:float x;float y;float z");
493+
builder.writeFormat("sample:uint64_t timestamp;vec3 pos;float[2] arr;int32_t code");
494+
builder.writeSubscription(1, 0, "sample");
495+
496+
auto container = parseBuilder(builder);
497+
ASSERT_FALSE(container->hadFatalError());
498+
auto sub = container->subscription("sample");
499+
ASSERT_NE(sub, nullptr);
500+
501+
std::vector<std::string> names;
502+
ulog_flatten::collectFlatFieldNames(*sub->format(), {}, names);
503+
504+
size_t leaf_count = 0;
505+
ulog_flatten::forEachFlatLeaf(*sub->format(), 0, [&](size_t, ulog_cpp::Field::BasicType, size_t) { ++leaf_count; });
506+
507+
EXPECT_EQ(names.size(), leaf_count);
508+
}
509+
360510
} // namespace

data_load_ulog/ulog_flatten.hpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#pragma once
2+
3+
// SDK-free helpers that flatten a ulog_cpp MessageFormat into the ordered list
4+
// of scalar leaves PlotJuggler ingests. The traversal logic (field skipping,
5+
// array expansion, and the nested base-offset accumulation) lives here so it can
6+
// be unit-tested directly, without pulling in the plugin SDK. The SDK-coupled
7+
// pieces (ValueRef construction, PrimitiveType mapping) stay in ulog_source.cpp.
8+
9+
#include <cstddef>
10+
#include <cstdio>
11+
#include <string>
12+
#include <ulog_cpp/messages.hpp>
13+
#include <vector>
14+
15+
namespace ulog_flatten {
16+
17+
/// Fields the importer never emits as time series: the leading message timestamp
18+
/// (used as the time axis) and ULog alignment padding.
19+
inline bool isSkippedField(const std::string& name) {
20+
return name == "timestamp" || name.starts_with("_padding");
21+
}
22+
23+
/// Recursively collect flattened field names for a ulog_cpp MessageFormat.
24+
/// Nested fields are separated by "." and array elements get ".00", ".01", etc.
25+
inline void collectFlatFieldNames(
26+
const ulog_cpp::MessageFormat& format, const std::string& prefix, std::vector<std::string>& out) {
27+
for (const auto& field_ptr : format.fields()) {
28+
const auto& field = *field_ptr;
29+
const std::string& name = field.name();
30+
if (isSkippedField(name)) {
31+
continue;
32+
}
33+
34+
std::string new_prefix = prefix.empty() ? name : prefix + "." + name;
35+
36+
int arr_len = field.arrayLength();
37+
int count = (arr_len < 0) ? 1 : arr_len;
38+
bool is_array = (arr_len > 0);
39+
40+
for (int i = 0; i < count; ++i) {
41+
std::string elem_name = new_prefix;
42+
if (is_array) {
43+
char buf[16];
44+
std::snprintf(buf, sizeof(buf), ".%02d", i);
45+
elem_name += buf;
46+
}
47+
48+
if (field.type().type == ulog_cpp::Field::BasicType::NESTED) {
49+
auto nested_fmt = field.nestedFormat();
50+
if (nested_fmt) {
51+
collectFlatFieldNames(*nested_fmt, elem_name, out);
52+
}
53+
} else {
54+
out.push_back(elem_name);
55+
}
56+
}
57+
}
58+
}
59+
60+
/// Visit every scalar leaf of a MessageFormat in the same order as
61+
/// collectFlatFieldNames, invoking `visit(absolute_byte_offset, basic_type,
62+
/// element_size)`. `base_offset` is the byte offset of `format` within the raw
63+
/// record (0 at the top level); nested structs accumulate their start offset so
64+
/// each leaf's offset is absolute, matching ulog_cpp's own resolution scheme
65+
/// (MessageFormat::resolveDefinition assigns per-format 0-based field offsets).
66+
template <typename Visitor>
67+
void forEachFlatLeaf(const ulog_cpp::MessageFormat& format, size_t base_offset, Visitor&& visit) {
68+
for (const auto& field_ptr : format.fields()) {
69+
const auto& field = *field_ptr;
70+
if (isSkippedField(field.name())) {
71+
continue;
72+
}
73+
74+
int arr_len = field.arrayLength();
75+
int count = (arr_len < 0) ? 1 : arr_len;
76+
size_t field_offset = base_offset + static_cast<size_t>(field.offsetInMessage());
77+
78+
if (field.type().type == ulog_cpp::Field::BasicType::NESTED) {
79+
auto nested_fmt = field.nestedFormat();
80+
if (!nested_fmt) {
81+
continue;
82+
}
83+
auto nested_size = static_cast<size_t>(nested_fmt->sizeBytes());
84+
for (size_t i = 0; i < static_cast<size_t>(count); ++i) {
85+
forEachFlatLeaf(*nested_fmt, field_offset + i * nested_size, visit);
86+
}
87+
} else {
88+
size_t elem_size = static_cast<size_t>(field.type().size);
89+
for (size_t i = 0; i < static_cast<size_t>(count); ++i) {
90+
visit(field_offset + i * elem_size, field.type().type, elem_size);
91+
}
92+
}
93+
}
94+
}
95+
96+
} // namespace ulog_flatten

0 commit comments

Comments
 (0)