From 13ce8b8a7a99e432d63cbfba854e430acf5ebfc1 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 28 Mar 2026 22:02:58 +0100 Subject: [PATCH 1/5] [df] simple test for RNTupleCardinality field --- tree/dataframe/test/datasource_ntuple.cxx | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tree/dataframe/test/datasource_ntuple.cxx b/tree/dataframe/test/datasource_ntuple.cxx index 9e25a83f1a060..91fb026581078 100644 --- a/tree/dataframe/test/datasource_ntuple.cxx +++ b/tree/dataframe/test/datasource_ntuple.cxx @@ -68,6 +68,8 @@ class RNTupleDSTest : public ::testing::Test { auto fldVecElectron = model->MakeField>("VecElectron"); fldVecElectron->push_back(*fldElectron); fldVecElectron->push_back(*fldElectron); + auto fldNElectron = std::make_unique>>("nElectron"); + model->AddProjectedField(std::move(fldNElectron), [](const std::string &) { return "VecElectron"; }); { auto ntuple = RNTupleWriter::Recreate(std::move(model), fNtplName, fFileName); ntuple->Fill(); @@ -84,7 +86,7 @@ TEST_F(RNTupleDSTest, ColTypeNames) RNTupleDS ds(fNtplName, fFileName); auto colNames = ds.GetColumnNames(); - ASSERT_EQ(15, colNames.size()); + ASSERT_EQ(16, colNames.size()); EXPECT_TRUE(ds.HasColumn("pt")); EXPECT_TRUE(ds.HasColumn("energy")); @@ -96,12 +98,14 @@ TEST_F(RNTupleDSTest, ColTypeNames) EXPECT_TRUE(ds.HasColumn("R_rdf_sizeof_VecElectron")); EXPECT_TRUE(ds.HasColumn("VecElectron.pt")); EXPECT_TRUE(ds.HasColumn("R_rdf_sizeof_VecElectron.pt")); + EXPECT_TRUE(ds.HasColumn("nElectron")); EXPECT_FALSE(ds.HasColumn("Address")); EXPECT_STREQ("std::string", ds.GetTypeName("tag").c_str()); EXPECT_STREQ("float", ds.GetTypeName("energy").c_str()); EXPECT_STREQ("std::size_t", ds.GetTypeName("R_rdf_sizeof_jets").c_str()); EXPECT_STREQ("ROOT::VecOps::RVec", ds.GetTypeName("rvec").c_str()); + EXPECT_STREQ("ROOT::RNTupleCardinality", ds.GetTypeName("nElectron").c_str()); try { ds.GetTypeName("Address"); @@ -142,6 +146,13 @@ TEST_F(RNTupleDSTest, CardinalityColumn) EXPECT_EQ(3, *max_rvec2); } +TEST_F(RNTupleDSTest, ProjectedCardinalityColumn) +{ + auto df = ROOT::RDF::FromRNTuple(fNtplName, fFileName); + + EXPECT_EQ(2u, *df.Filter("nElectron == 2").Max("nElectron")); +} + static void ReadTest(const std::string &name, const std::string &fname) { auto df = ROOT::RDF::FromRNTuple(name, fname); From cbbb7dded3bda13eb5a527006dd37a542d8178c3 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 28 Mar 2026 23:56:14 +0100 Subject: [PATCH 2/5] [df] make RRDFCardinalityField templated --- tree/dataframe/src/RNTupleDS.cxx | 26 ++++++++++++++--------- tree/dataframe/test/datasource_ntuple.cxx | 4 +++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tree/dataframe/src/RNTupleDS.cxx b/tree/dataframe/src/RNTupleDS.cxx index 261327e29794e..57562091e10f8 100644 --- a/tree/dataframe/src/RNTupleDS.cxx +++ b/tree/dataframe/src/RNTupleDS.cxx @@ -32,6 +32,7 @@ #include #include #include +#include #include // clang-format off @@ -53,25 +54,30 @@ namespace ROOT::Internal::RDF { /// collection sizes. It is used to provide the "number of" RDF columns for collections, e.g. /// `R_rdf_sizeof_jets` for a collection named `jets`. /// -/// This field owns the collection offset field but instead of exposing the collection offsets it exposes -/// the collection sizes (offset(N+1) - offset(N)). For the time being, we offer this functionality only in RDataFrame. -/// TODO(jblomer): consider providing a general set of useful virtual fields as part of RNTuple. +/// This is similar to the RCardinalityField but it presents itself as an integer type. +/// The template argument T must be an integral type. +template class RRDFCardinalityField final : public ROOT::RFieldBase { + static_assert(std::is_integral_v, "T must be an integral type"); + protected: std::unique_ptr CloneImpl(std::string_view newName) const final { return std::make_unique(newName); } - void ConstructValue(void *where) const final { *static_cast(where) = 0; } + void ConstructValue(void *where) const final { *static_cast(where) = 0; } // We construct these fields and know that they match the page source void ReconcileOnDiskField(const RNTupleDescriptor &) final {} public: RRDFCardinalityField(std::string_view name) - : ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */) + : ROOT::RFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(T)), ROOT::ENTupleStructure::kPlain, + false /* isSimple */) { } + RRDFCardinalityField(const RRDFCardinalityField &other) = delete; + RRDFCardinalityField &operator=(const RRDFCardinalityField &other) = delete; RRDFCardinalityField(RRDFCardinalityField &&other) = default; RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default; ~RRDFCardinalityField() override = default; @@ -92,8 +98,8 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { GenerateColumnsImpl(desc); } - size_t GetValueSize() const final { return sizeof(std::size_t); } - size_t GetAlignment() const final { return alignof(std::size_t); } + std::size_t GetValueSize() const final { return sizeof(T); } + std::size_t GetAlignment() const final { return alignof(T); } /// Get the number of elements of the collection identified by globalIndex void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final @@ -101,7 +107,7 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { RNTupleLocalIndex collectionStart; ROOT::NTupleSize_t size; fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &size); - *static_cast(to) = size; + *static_cast(to) = size; } /// Get the number of elements of the collection identified by clusterIndex @@ -110,7 +116,7 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { RNTupleLocalIndex collectionStart; ROOT::NTupleSize_t size; fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &size); - *static_cast(to) = size; + *static_cast(to) = size; } }; @@ -337,7 +343,7 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st if (info.fNRepetitions > 0) { cardinalityField = std::make_unique(name, info.fNRepetitions); } else { - cardinalityField = std::make_unique(name); + cardinalityField = std::make_unique>(name); } cardinalityField->SetOnDiskId(info.fFieldId); } diff --git a/tree/dataframe/test/datasource_ntuple.cxx b/tree/dataframe/test/datasource_ntuple.cxx index 91fb026581078..575e8ed29464e 100644 --- a/tree/dataframe/test/datasource_ntuple.cxx +++ b/tree/dataframe/test/datasource_ntuple.cxx @@ -1,4 +1,5 @@ #include +#include #include #include @@ -14,6 +15,7 @@ #include "ClassWithArrays.h" #include +#include #include @@ -103,7 +105,7 @@ TEST_F(RNTupleDSTest, ColTypeNames) EXPECT_STREQ("std::string", ds.GetTypeName("tag").c_str()); EXPECT_STREQ("float", ds.GetTypeName("energy").c_str()); - EXPECT_STREQ("std::size_t", ds.GetTypeName("R_rdf_sizeof_jets").c_str()); + EXPECT_EQ(ROOT::Internal::GetRenormalizedTypeName(typeid(std::size_t)), ds.GetTypeName("R_rdf_sizeof_jets")); EXPECT_STREQ("ROOT::VecOps::RVec", ds.GetTypeName("rvec").c_str()); EXPECT_STREQ("ROOT::RNTupleCardinality", ds.GetTypeName("nElectron").c_str()); From 13fae1a6af3be428a2bdff451c3584e3dba96a66 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 29 Mar 2026 21:58:34 +0200 Subject: [PATCH 3/5] [df] present RNTuple cardinality fields as integer --- tree/dataframe/src/RNTupleDS.cxx | 12 ++++++++++++ tree/dataframe/test/dataframe_snapshot_ntuple.cxx | 5 +---- tree/dataframe/test/datasource_ntuple.cxx | 4 +++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tree/dataframe/src/RNTupleDS.cxx b/tree/dataframe/src/RNTupleDS.cxx index 57562091e10f8..110dfde2f3791 100644 --- a/tree/dataframe/src/RNTupleDS.cxx +++ b/tree/dataframe/src/RNTupleDS.cxx @@ -331,6 +331,18 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st if (!fieldOrException) return; auto valueField = fieldOrException.Unwrap(); + if (const auto cardinalityField = dynamic_cast(valueField.get())) { + // Cardinality fields in RDataFrame are presented as integers + if (cardinalityField->As32Bit()) { + valueField = + std::make_unique>(fieldDesc.GetFieldName()); + } else if (cardinalityField->As64Bit()) { + valueField = + std::make_unique>(fieldDesc.GetFieldName()); + } else { + R__ASSERT(false && "cardinality field stored with an unexpected integer type"); + } + } valueField->SetOnDiskId(fieldId); for (auto &f : *valueField) { f.SetOnDiskId(desc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId())); diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx index 0d5fdd6acf7ed..6784d4f559bb4 100644 --- a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -563,10 +563,7 @@ TEST(RDFSnapshotRNTuple, CardinalityColumns) opts.fMode = "UPDATE"; opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; ROOT::RDataFrame df("ntuple", fileGuard.GetPath()); - - ROOT_EXPECT_WARNING(df.Snapshot("ntuple_snap", fileGuard.GetPath(), "", opts), "Snapshot", - "Column \"nElectrons\" is a read-only \"ROOT::RNTupleCardinality\" column. It " - "will be snapshot as its inner type \"std::uint32_t\" instead."); + df.Snapshot("ntuple_snap", fileGuard.GetPath(), "", opts); ROOT::RDataFrame sdf("ntuple_snap", fileGuard.GetPath()); EXPECT_EQ("std::uint32_t", sdf.GetColumnType("nElectrons")); diff --git a/tree/dataframe/test/datasource_ntuple.cxx b/tree/dataframe/test/datasource_ntuple.cxx index 575e8ed29464e..30d7f62cc87fb 100644 --- a/tree/dataframe/test/datasource_ntuple.cxx +++ b/tree/dataframe/test/datasource_ntuple.cxx @@ -107,7 +107,7 @@ TEST_F(RNTupleDSTest, ColTypeNames) EXPECT_STREQ("float", ds.GetTypeName("energy").c_str()); EXPECT_EQ(ROOT::Internal::GetRenormalizedTypeName(typeid(std::size_t)), ds.GetTypeName("R_rdf_sizeof_jets")); EXPECT_STREQ("ROOT::VecOps::RVec", ds.GetTypeName("rvec").c_str()); - EXPECT_STREQ("ROOT::RNTupleCardinality", ds.GetTypeName("nElectron").c_str()); + EXPECT_STREQ("std::uint64_t", ds.GetTypeName("nElectron").c_str()); try { ds.GetTypeName("Address"); @@ -153,6 +153,8 @@ TEST_F(RNTupleDSTest, ProjectedCardinalityColumn) auto df = ROOT::RDF::FromRNTuple(fNtplName, fFileName); EXPECT_EQ(2u, *df.Filter("nElectron == 2").Max("nElectron")); + + EXPECT_EQ(2u, *df.Filter([](std::uint64_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); } static void ReadTest(const std::string &name, const std::string &fname) From 8ed2edf4a81801708a069dce0d16e564830df534 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 29 Mar 2026 22:36:53 +0200 Subject: [PATCH 4/5] [df] allow for alternative int types for cardinality columns --- tree/dataframe/src/RNTupleDS.cxx | 103 ++++++++++++++++------ tree/dataframe/test/datasource_ntuple.cxx | 8 ++ 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/tree/dataframe/src/RNTupleDS.cxx b/tree/dataframe/src/RNTupleDS.cxx index 110dfde2f3791..edf19e2755eb1 100644 --- a/tree/dataframe/src/RNTupleDS.cxx +++ b/tree/dataframe/src/RNTupleDS.cxx @@ -50,6 +50,41 @@ // clang-format on namespace ROOT::Internal::RDF { +class RRDFCardinalityFieldBase : public ROOT::RFieldBase { +protected: + // We construct these fields and know that they match the page source + void ReconcileOnDiskField(const RNTupleDescriptor &) final {} + + RRDFCardinalityFieldBase(std::string_view name, std::string_view type) + : ROOT::RFieldBase(name, type, ROOT::ENTupleStructure::kPlain, false /* isSimple */) + { + } + + // Field is only used for reading + void GenerateColumns() final { throw RException(R__FAIL("Cardinality fields must only be used for reading")); } + void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final + { + GenerateColumnsImpl(desc); + } + +public: + RRDFCardinalityFieldBase(const RRDFCardinalityFieldBase &other) = delete; + RRDFCardinalityFieldBase &operator=(const RRDFCardinalityFieldBase &other) = delete; + RRDFCardinalityFieldBase(RRDFCardinalityFieldBase &&other) = default; + RRDFCardinalityFieldBase &operator=(RRDFCardinalityFieldBase &&other) = default; + ~RRDFCardinalityFieldBase() override = default; + + const RColumnRepresentations &GetColumnRepresentations() const final + { + static RColumnRepresentations representations({{ENTupleColumnType::kSplitIndex64}, + {ENTupleColumnType::kIndex64}, + {ENTupleColumnType::kSplitIndex32}, + {ENTupleColumnType::kIndex32}}, + {}); + return representations; + } +}; + /// An artificial field that transforms an RNTuple column that contains the offset of collections into /// collection sizes. It is used to provide the "number of" RDF columns for collections, e.g. /// `R_rdf_sizeof_jets` for a collection named `jets`. @@ -57,7 +92,7 @@ namespace ROOT::Internal::RDF { /// This is similar to the RCardinalityField but it presents itself as an integer type. /// The template argument T must be an integral type. template -class RRDFCardinalityField final : public ROOT::RFieldBase { +class RRDFCardinalityField final : public RRDFCardinalityFieldBase { static_assert(std::is_integral_v, "T must be an integral type"); protected: @@ -67,13 +102,9 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { } void ConstructValue(void *where) const final { *static_cast(where) = 0; } - // We construct these fields and know that they match the page source - void ReconcileOnDiskField(const RNTupleDescriptor &) final {} - public: RRDFCardinalityField(std::string_view name) - : ROOT::RFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(T)), ROOT::ENTupleStructure::kPlain, - false /* isSimple */) + : RRDFCardinalityFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(T))) { } RRDFCardinalityField(const RRDFCardinalityField &other) = delete; @@ -82,22 +113,6 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default; ~RRDFCardinalityField() override = default; - const RColumnRepresentations &GetColumnRepresentations() const final - { - static RColumnRepresentations representations({{ENTupleColumnType::kSplitIndex64}, - {ENTupleColumnType::kIndex64}, - {ENTupleColumnType::kSplitIndex32}, - {ENTupleColumnType::kIndex32}}, - {}); - return representations; - } - // Field is only used for reading - void GenerateColumns() final { throw RException(R__FAIL("Cardinality fields must only be used for reading")); } - void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final - { - GenerateColumnsImpl(desc); - } - std::size_t GetValueSize() const final { return sizeof(T); } std::size_t GetAlignment() const final { return alignof(T); } @@ -150,7 +165,8 @@ class RArraySizeField final : public ROOT::RFieldBase { public: RArraySizeField(std::string_view name, std::size_t arrayLength) - : ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */), + : ROOT::RFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(std::size_t)), + ROOT::ENTupleStructure::kPlain, false /* isSimple */), fArrayLength(arrayLength) { } @@ -493,7 +509,7 @@ ROOT::RFieldBase *ROOT::RDF::RNTupleDS::GetFieldWithTypeChecks(std::string_view // If the field corresponding to the provided name is not a cardinality column and the requested type is different // from the proto field that was created when the data source was constructed, we first have to create an // alternative proto field for the column reader. Otherwise, we can directly use the existing proto field. - if (fieldName.substr(0, 13) != "R_rdf_sizeof_" && requestedType != fColumnTypes[index]) { + if (requestedType != fColumnTypes[index]) { auto &altProtoFields = fAlternativeProtoFields[index]; // If we can find the requested type in the registered alternative protofields, return the corresponding field @@ -506,12 +522,41 @@ ROOT::RFieldBase *ROOT::RDF::RNTupleDS::GetFieldWithTypeChecks(std::string_view } // Otherwise, create a new protofield and register it in the alternatives before returning - auto newAltProtoFieldOrException = ROOT::RFieldBase::Create(std::string(fieldName), requestedType); - if (!newAltProtoFieldOrException) { - throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + - "\" for column \"" + std::string(fieldName) + "\""); + std::unique_ptr newAltProtoField; + const std::string strName = std::string(fieldName); + if (dynamic_cast(fProtoFields[index].get())) { + if (requestedType == "bool") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "char") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::int8_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::uint8_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::int16_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::uint16_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::int32_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::uint32_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::int64_t") { + newAltProtoField = std::make_unique>(strName); + } else if (requestedType == "std::uint64_t") { + newAltProtoField = std::make_unique>(strName); + } else { + throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + + "\" for column \"" + std::string(fieldName) + "\""); + } + } else { + auto newAltProtoFieldOrException = ROOT::RFieldBase::Create(strName, requestedType); + if (!newAltProtoFieldOrException) { + throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + + "\" for column \"" + std::string(fieldName) + "\""); + } + newAltProtoField = newAltProtoFieldOrException.Unwrap(); } - auto newAltProtoField = newAltProtoFieldOrException.Unwrap(); newAltProtoField->SetOnDiskId(fProtoFields[index]->GetOnDiskId()); auto *newField = newAltProtoField.get(); altProtoFields.emplace_back(std::move(newAltProtoField)); diff --git a/tree/dataframe/test/datasource_ntuple.cxx b/tree/dataframe/test/datasource_ntuple.cxx index 30d7f62cc87fb..1e7e6bafa7f09 100644 --- a/tree/dataframe/test/datasource_ntuple.cxx +++ b/tree/dataframe/test/datasource_ntuple.cxx @@ -155,6 +155,14 @@ TEST_F(RNTupleDSTest, ProjectedCardinalityColumn) EXPECT_EQ(2u, *df.Filter("nElectron == 2").Max("nElectron")); EXPECT_EQ(2u, *df.Filter([](std::uint64_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::int32_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::uint32_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::int16_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::uint16_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::int8_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](std::uint8_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](char x) { return x == 2; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(2u, *df.Filter([](bool x) { return x; }, {"nElectron"}).Max("nElectron")); } static void ReadTest(const std::string &name, const std::string &fname) From a7fa47f1cb57942664724456a0aa4ea8cfed8425 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 29 Mar 2026 23:40:35 +0200 Subject: [PATCH 5/5] [df] integer overflow check for RNTuple cardinality columns --- tree/dataframe/src/RNTupleDS.cxx | 13 +++++++++ tree/dataframe/test/datasource_ntuple.cxx | 32 +++++++++++++---------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/tree/dataframe/src/RNTupleDS.cxx b/tree/dataframe/src/RNTupleDS.cxx index edf19e2755eb1..312baa948669d 100644 --- a/tree/dataframe/src/RNTupleDS.cxx +++ b/tree/dataframe/src/RNTupleDS.cxx @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -95,6 +96,16 @@ template class RRDFCardinalityField final : public RRDFCardinalityFieldBase { static_assert(std::is_integral_v, "T must be an integral type"); + inline void CheckSize(ROOT::NTupleSize_t size) const + { + if constexpr (std::is_same_v || std::is_same_v) + return; + if (size > std::numeric_limits::max()) { + throw RException(R__FAIL(std::string("integer overflow in field ") + GetFieldName() + + ". Please read the column with a larger-sized integral type.")); + } + } + protected: std::unique_ptr CloneImpl(std::string_view newName) const final { @@ -122,6 +133,7 @@ class RRDFCardinalityField final : public RRDFCardinalityFieldBase { RNTupleLocalIndex collectionStart; ROOT::NTupleSize_t size; fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &size); + CheckSize(size); *static_cast(to) = size; } @@ -131,6 +143,7 @@ class RRDFCardinalityField final : public RRDFCardinalityFieldBase { RNTupleLocalIndex collectionStart; ROOT::NTupleSize_t size; fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &size); + CheckSize(size); *static_cast(to) = size; } }; diff --git a/tree/dataframe/test/datasource_ntuple.cxx b/tree/dataframe/test/datasource_ntuple.cxx index 1e7e6bafa7f09..a264614ed78b1 100644 --- a/tree/dataframe/test/datasource_ntuple.cxx +++ b/tree/dataframe/test/datasource_ntuple.cxx @@ -68,8 +68,8 @@ class RNTupleDSTest : public ::testing::Test { auto fldElectron = model->MakeField("electron"); fldElectron->pt = 137.0; auto fldVecElectron = model->MakeField>("VecElectron"); - fldVecElectron->push_back(*fldElectron); - fldVecElectron->push_back(*fldElectron); + for (int i = 0; i < 128; ++i) + fldVecElectron->push_back(*fldElectron); auto fldNElectron = std::make_unique>>("nElectron"); model->AddProjectedField(std::move(fldNElectron), [](const std::string &) { return "VecElectron"; }); { @@ -152,17 +152,21 @@ TEST_F(RNTupleDSTest, ProjectedCardinalityColumn) { auto df = ROOT::RDF::FromRNTuple(fNtplName, fFileName); - EXPECT_EQ(2u, *df.Filter("nElectron == 2").Max("nElectron")); - - EXPECT_EQ(2u, *df.Filter([](std::uint64_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::int32_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::uint32_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::int16_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::uint16_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::int8_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](std::uint8_t x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](char x) { return x == 2; }, {"nElectron"}).Max("nElectron")); - EXPECT_EQ(2u, *df.Filter([](bool x) { return x; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter("nElectron == 128").Max("nElectron")); + + EXPECT_EQ(128u, *df.Filter([](std::uint64_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](std::int32_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](std::uint32_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](std::int16_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](std::uint16_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](std::uint8_t x) { return x == 128; }, {"nElectron"}).Max("nElectron")); + EXPECT_EQ(128u, *df.Filter([](bool x) { return x; }, {"nElectron"}).Max("nElectron")); + try { + *df.Filter([](std::int8_t x) { return x == 0; }, {"nElectron"}).Count(); + FAIL() << "integer overflow should fail"; + } catch (const ROOT::RException &e) { + EXPECT_THAT(e.what(), ::testing::HasSubstr("integer overflow")); + } } static void ReadTest(const std::string &name, const std::string &fname) @@ -206,7 +210,7 @@ static void ReadTest(const std::string &name, const std::string &fname) EXPECT_TRUE(All(rvec->at(0) == ROOT::RVecI{1, 2, 3})); EXPECT_TRUE(All(vectorasrvec->at(0) == ROOT::RVecF{1.f, 2.f})); EXPECT_FLOAT_EQ(137.0, sumElectronPt.GetValue()); - EXPECT_FLOAT_EQ(2. * 137.0, sumVecElectronPt.GetValue()); + EXPECT_FLOAT_EQ(128. * 137.0, sumVecElectronPt.GetValue()); } static void ChainTest(const std::string &name, const std::string &fname)