From dc3bafab9e006335fd9494f77050bfe24f3dd249 Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 28 Apr 2026 11:44:23 +0200 Subject: [PATCH 1/6] [ntuple] Fix RFieldFundamental::GenerateColumns using the wrong repr idx (cherry picked from commit b73cca9c6584592648ca68aa08e229cc204ab6c5) --- .../inc/ROOT/RField/RFieldFundamental.hxx | 4 +-- tree/ntuple/test/ntuple_multi_column.cxx | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx index 3fcaf295b5cabd..7e596770d4b845 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx @@ -400,11 +400,11 @@ protected: fAvailableColumns.emplace_back(ROOT::Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Trunc) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); } else if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Quant) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); assert(coldesc.GetValueRange().has_value()); const auto [valMin, valMax] = *coldesc.GetValueRange(); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index 7a4e6bb6eb4a43..4cc69ec5cfeff0 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -377,6 +377,36 @@ TEST(RNTuple, MultiColumnRepresentationBulk) EXPECT_FLOAT_EQ(2.0, arr[0]); } +TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) +{ + FileRaii fileGuard("test_ntuple_multi_column_representation_varbitwidth.root"); + + { + auto model = RNTupleModel::Create(); + auto fldPx = std::make_unique>("px"); + fldPx->SetTruncated(26); + fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal32}, {ROOT::ENTupleColumnType::kReal32Trunc}}); + model->AddField(std::move(fldPx)); + auto ptrPx = model->GetDefaultEntry().GetPtr("px"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrPx = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetConstField("px")), 1); + *ptrPx = 2.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + auto fldPx = reader->GetModel().GetDefaultEntry().GetPtr("px"); + + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(1.0, *fldPx); + reader->LoadEntry(1); + EXPECT_FLOAT_EQ(2.0, *fldPx); +} + TEST(RNTuple, MultiColumnRepresentationDedup) { FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); From 66307bdba8b8caae6334cc0d4cc1954e5c57afc5 Mon Sep 17 00:00:00 2001 From: silverweed Date: Fri, 17 Apr 2026 11:08:30 +0200 Subject: [PATCH 2/6] [ntuple] update merger test to make sure we test nRepetitions (cherry picked from commit 3ad754799b8cdcef52b887fd218b9a928f9950af) --- tree/ntuple/test/ntuple_merger.cxx | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/test/ntuple_merger.cxx b/tree/ntuple/test/ntuple_merger.cxx index de27ead34d3f95..8e1279e3ed1438 100644 --- a/tree/ntuple/test/ntuple_merger.cxx +++ b/tree/ntuple/test/ntuple_merger.cxx @@ -1015,12 +1015,13 @@ TEST(RNTupleMerger, MergeLateModelExtension) { auto model = RNTupleModel::Create(); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto fieldBar = model->MakeField("bar"); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath(), RNTupleWriteOptions()); for (size_t i = 0; i < 10; ++i) { fieldFoo->insert(std::make_pair(std::to_string(i), i * 123)); - *fieldVfoo = {(int)i * 123}; + fieldVfoo[0] = {(int)i * 123}; + fieldVfoo[2] = {(int)i * 345}; *fieldBar = i * 321; ntuple->Fill(); } @@ -1031,14 +1032,15 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto model = RNTupleModel::Create(); auto fieldBaz = model->MakeField("baz"); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto wopts = RNTupleWriteOptions(); wopts.SetCompression(0); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath(), wopts); for (size_t i = 0; i < 10; ++i) { *fieldBaz = i * 567; fieldFoo->insert(std::make_pair(std::to_string(i), i * 765)); - *fieldVfoo = {(int)i * 765}; + fieldVfoo[0] = {(int)i * 765}; + fieldVfoo[2] = {(int)i * 987}; ntuple->Fill(); } } @@ -1072,21 +1074,25 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto ntuple = RNTupleReader::Open("ntuple", fileGuard3.GetPath()); EXPECT_EQ(ntuple->GetNEntries(), 20); auto foo = ntuple->GetModel().GetDefaultEntry().GetPtr>("foo"); - auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr>("vfoo"); + auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr[3]>("vfoo"); auto bar = ntuple->GetModel().GetDefaultEntry().GetPtr("bar"); auto baz = ntuple->GetModel().GetDefaultEntry().GetPtr("baz"); for (int i = 0; i < 10; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i)], i * 123); - ASSERT_EQ((*vfoo)[0], i * 123); + ASSERT_EQ(vfoo[0][0], i * 123); + ASSERT_EQ(vfoo[2][0], i * 345); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, i * 321); ASSERT_EQ(*baz, 0); } for (int i = 10; i < 20; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i - 10)], (i - 10) * 765); - ASSERT_EQ((*vfoo)[0], (i - 10) * 765); + ASSERT_EQ(vfoo[0][0], (i - 10) * 765); + ASSERT_EQ(vfoo[2][0], (i - 10) * 987); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, 0); ASSERT_EQ(*baz, (i - 10) * 567); } From 7dd1dff4fda8d72cf0728feb1f6a194607d9538a Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 22 Apr 2026 14:47:11 +0200 Subject: [PATCH 3/6] [ntuple] Clarify a bit RFieldBase::EntryToColumnElementIndex Also fix the type of result (cherry picked from commit 38883b359b58eb7b3ba7c4733bbe1efa53e4f1b6) --- tree/ntuple/inc/ROOT/RFieldBase.hxx | 7 ++++--- tree/ntuple/src/RFieldBase.cxx | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RFieldBase.hxx b/tree/ntuple/inc/ROOT/RFieldBase.hxx index 3bd6175c2f5671..e6996b97be5a41 100644 --- a/tree/ntuple/inc/ROOT/RFieldBase.hxx +++ b/tree/ntuple/inc/ROOT/RFieldBase.hxx @@ -247,14 +247,15 @@ private: func(target); } - /// Translate an entry index to a column element index of the principal column and vice versa. These functions - /// take into account the role and number of repetitions on each level of the field hierarchy as follows: + /// Translate an entry index to a column element index of the principal column. This function + /// takes into account the role and number of repetitions on each level of the field hierarchy as follows: /// - Top level fields: element index == entry index /// - Record fields propagate their principal column index to the principal columns of direct descendant fields /// - Collection and variant fields set the principal column index of their children to 0 /// /// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a - /// field with type `std::array, 2>`, this function returns 8 for the innermost field. + /// field with type `std::array, 2>`, this function called with `globalIndex == 1` + /// returns 8 for the innermost field. ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const; /// Flushes data from active columns diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index 0fd55c0183a678..91296069f0063f 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -668,14 +668,14 @@ void ROOT::RFieldBase::Attach(std::unique_ptr child, std::stri ROOT::NTupleSize_t ROOT::RFieldBase::EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const { - std::size_t result = globalIndex; + ROOT::NTupleSize_t result = globalIndex; for (auto f = this; f != nullptr; f = f->GetParent()) { auto parent = f->GetParent(); if (parent && (parent->GetStructure() == ROOT::ENTupleStructure::kCollection || parent->GetStructure() == ROOT::ENTupleStructure::kVariant)) { return 0U; } - result *= std::max(f->GetNRepetitions(), std::size_t{1U}); + result *= std::max(f->GetNRepetitions(), ROOT::NTupleSize_t{1U}); } return result; } From 9c7dbdaa8719902d9c233d7a2084edd8806f0601 Mon Sep 17 00:00:00 2001 From: silverweed Date: Mon, 4 May 2026 14:45:20 +0200 Subject: [PATCH 4/6] [ntuple] Introduce feature flag 0 (nested deferred columns) In code, this feature is represented by an update in the logic of RClusterDescriptorBuilder::AddExtendedColumnRanges(), which now handles properly the case where a column is added in a later cluster and therefore cannot rely on the (NEntries * NRepetitions) calculation, so it instead copies its FirstElementIndex and NElements from the 0th representation (which is guaranteed to have valid numbers for them). (cherry picked from commit 6c8f6ce07f36b504c4360c348fb34e3525d412d3) --- tree/ntuple/doc/BinaryFormatSpecification.md | 9 +++++++-- tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx | 19 ++++++++++++++++++- tree/ntuple/src/RNTupleDescriptor.cxx | 17 +++++++++++++++-- tree/ntuple/test/ntuple_compat.cxx | 4 ++-- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md index 2a87e514a87527..bc52bfc4a761f5 100644 --- a/tree/ntuple/doc/BinaryFormatSpecification.md +++ b/tree/ntuple/doc/BinaryFormatSpecification.md @@ -1,4 +1,4 @@ -# RNTuple Binary Format Specification 1.0.2.0 +# RNTuple Binary Format Specification 1.0.2.1 ## Versioning Notes @@ -167,7 +167,12 @@ That means that readers need to continue reading feature flags as long as their Readers should gracefully abort reading when they encounter unknown bits set. -At the moment, there are no feature flag bits defined. +Here is the list of all currently-defined feature flags. Note that the flag name is only for informational purposes +and is not normative. + +| Flag Bit | Introduced in | Name | Meaning | +|----------|---------------|-------------------------|----------------------------------------------| +| 0 | 1.0.2.1 | Nested Deferred Columns | Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
(i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
backed by columns with different encoding, e.g. a `vector` whose elements are represented by SplitReal32
in the first ntuple and by Real32 in the second. | ## Frames diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx index a5a736010c223b..dcd5e0432a7b1a 100644 --- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx @@ -769,7 +769,24 @@ private: RNTupleDescriptor CloneSchema() const; public: - static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing + /// All known feature flags. + /// Note that the flag values represent the bit _index_, not the already-bitshifted integer. + enum EFeatureFlags { + /// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended + /// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field + /// backed by columns with different encoding, e.g. a vector whose elements are represented by SplitReal32 + /// in the first ntuple and by Real32 in the second. + /// Added in version 1.1.0.0 of the binary format. + kFeatureFlag_NestedDeferredColumns = 0, + // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature + // flags values, the check in RNTupleSerialize must be updated. + + // End of regular feature flags + kFeatureFlag_COUNT, + + /// Reserved for forward-compatibility testing + kFeatureFlag_Test = 137 + }; class RColumnDescriptorIterable; class RFieldDescriptorIterable; diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index f4a8b72b62c8bd..bae1d3ad0ae60f 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -961,8 +961,21 @@ ROOT::Internal::RClusterDescriptorBuilder::AddExtendedColumnRanges(const RNTuple // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the // field zero excluding subfields of collection and variant fields. if (c.IsDeferredColumn()) { - columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); - columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + if (c.GetRepresentationIndex() == 0) { + columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); + columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + } else { + // Deferred representations which are not the first cannot count on the number of elements being + // equal to Entries * nRepetitions because they might have been added in a later cluster. But they + // can rely on the first representation having the correct FirstElement/NElements (by definition + // the first representation cannot be an "extended" one), therefore they can just copy the value + // from it. + const auto &field = desc.GetFieldDescriptor(fieldId); + const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()]; + const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId]; + columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex()); + columnRange.SetNElements(firstReprColumnRange.GetNElements()); + } if (!columnRange.IsSuppressed()) { auto &pageRange = fCluster.fPageRanges[physicalId]; pageRange.fPhysicalColumnId = physicalId; diff --git a/tree/ntuple/test/ntuple_compat.cxx b/tree/ntuple/test/ntuple_compat.cxx index 7b5abb7b5932db..3f0e1097b68fae 100644 --- a/tree/ntuple/test/ntuple_compat.cxx +++ b/tree/ntuple/test/ntuple_compat.cxx @@ -36,7 +36,7 @@ TEST(RNTupleCompat, FeatureFlag) RNTupleDescriptorBuilder descBuilder; descBuilder.SetVersionForWriting(); descBuilder.SetNTuple("ntpl", ""); - descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlagTest); + descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlag_Test); descBuilder.AddField(RFieldDescriptorBuilder::FromField(ROOT::RFieldZero()).FieldId(0).MakeDescriptor().Unwrap()); ASSERT_TRUE(static_cast(descBuilder.EnsureValidDescriptor())); @@ -87,7 +87,7 @@ TEST(RNTupleCompat, FeatureFlagInFooter) writer->WriteNTupleHeader(buffer.get(), ctx.GetHeaderSize(), ctx.GetHeaderSize()); // Write the feature flags in the footer - descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlagTest); + descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlag_Test); auto szFooter = serializer.SerializeFooter(nullptr, descBuilder.GetDescriptor(), ctx).Unwrap(); buffer = std::make_unique(szFooter); From 3a42a1e0dd24632d2411c5ef6a4269ace030a85b Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 18 Jun 2026 10:25:33 +0200 Subject: [PATCH 5/6] [ntuple] patch up column ids in the header extension when shifting (cherry picked from commit d32b64cf4a198dd4802e1856cb34b74d1d04028f) --- tree/ntuple/src/RNTupleDescriptor.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index bae1d3ad0ae60f..95f104e01c038c 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -1393,6 +1393,14 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ShiftAliasColumns(std::uint32_t o R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0); fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c)); } + + // Patch up column ids in the header extension + if (auto &xHeader = fDescriptor.fHeaderExtension) { + for (auto &columnId : xHeader->fExtendedColumnRepresentations) { + if (columnId >= fDescriptor.GetNPhysicalColumns()) + columnId += offset; + } + } } ROOT::RResult ROOT::Internal::RNTupleDescriptorBuilder::AddCluster(RClusterDescriptor &&clusterDesc) From 609ba8123a7aea17824551f845071bf3b294f555 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 17 Jun 2026 17:02:01 +0200 Subject: [PATCH 6/6] [ntuple] Allow adding duplicate column representations This is necessary to support multiple representations that use the same column type but different metadata (e.g. different bit width on Real32Trunc columns) (cherry picked from commit 6bf5355cc45d8566ae4558e535637088b5ef4d9b) --- tree/ntuple/src/RFieldBase.cxx | 5 +---- tree/ntuple/test/ntuple_multi_column.cxx | 9 --------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index 91296069f0063f..c67b4941570337 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -835,10 +835,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se if (itRepresentative == std::end(validTypes)) throw RException(R__FAIL("invalid column representative")); - // don't add a duplicate representation - if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(), - [&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end()) - fColumnRepresentatives.emplace_back(*itRepresentative); + fColumnRepresentatives.emplace_back(*itRepresentative); } } diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index 4cc69ec5cfeff0..963c5fabd9277e 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -406,12 +406,3 @@ TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) reader->LoadEntry(1); EXPECT_FLOAT_EQ(2.0, *fldPx); } - -TEST(RNTuple, MultiColumnRepresentationDedup) -{ - FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); - - auto fldPx = RFieldBase::Create("px", "float").Unwrap(); - fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}}); - EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1); -}