Skip to content

Commit e659937

Browse files
committed
[ntuple] Support merging columns with same type and different metadata
1 parent a573f5f commit e659937

4 files changed

Lines changed: 91 additions & 49 deletions

File tree

tree/ntuple/src/RFieldBase.cxx

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se
835835
if (itRepresentative == std::end(validTypes))
836836
throw RException(R__FAIL("invalid column representative"));
837837

838-
// don't add a duplicate representation
839-
if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(),
840-
[&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end())
841-
fColumnRepresentatives.emplace_back(*itRepresentative);
838+
fColumnRepresentatives.emplace_back(*itRepresentative);
842839
}
843840
}
844841

tree/ntuple/src/RNTupleMerger.cxx

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -588,40 +588,15 @@ CompareDescriptorStructure(const ROOT::RNTupleDescriptor &dst, const ROOT::RNTup
588588
const auto &srcCol = src.GetColumnDescriptor(srcColId);
589589
const auto dstColId = dstColumns[dstReprIdx * dstColCardinality + reprColIdx];
590590
const auto &dstCol = dst.GetColumnDescriptor(dstColId);
591-
if (srcCol.GetType() != dstCol.GetType()) {
591+
if (srcCol.GetType() != dstCol.GetType() ||
592+
srcCol.GetBitsOnStorage() != dstCol.GetBitsOnStorage() ||
593+
srcCol.GetValueRange() != dstCol.GetValueRange()) {
592594
matches = false;
593595
break;
594596
}
595597
}
596598

597599
if (matches) {
598-
// If this column representation matches by column type, we need to make sure that it also has
599-
// matching column metadata. Since we currently do not support multiple column representations
600-
// that only differ by such metadata, we forbid merging such columns (e.g. we cannot merge two
601-
// Real32Trunc columns with different bit widths). This could technically be supported, but it
602-
// would require significant effort, so we currently don't.
603-
for (auto reprColIdx = 0u; reprColIdx < srcColCardinality; ++reprColIdx) {
604-
const auto srcColId = srcColumns[srcReprIdx * srcColCardinality + reprColIdx];
605-
const auto &srcCol = src.GetColumnDescriptor(srcColId);
606-
const auto dstColId = dstColumns[dstReprIdx * dstColCardinality + reprColIdx];
607-
const auto &dstCol = dst.GetColumnDescriptor(dstColId);
608-
if (srcCol.GetBitsOnStorage() != dstCol.GetBitsOnStorage() ||
609-
srcCol.GetValueRange() != dstCol.GetValueRange()) {
610-
std::stringstream ss;
611-
ss << "Source field `" << field.fSrc->GetFieldName()
612-
<< "` has a matching column representation as its destination field, however one or "
613-
"more "
614-
"of its columns have different column metadata (bit width and/or value range). "
615-
"Merging variable-sized columns is currently only supported if all metadata is "
616-
"identical between source and destination columns."
617-
<< "\n bit width src: " << srcCol.GetBitsOnStorage()
618-
<< ", dst: " << dstCol.GetBitsOnStorage() << ""
619-
<< "\n value range src: " << srcCol.GetValueRange()
620-
<< ", dst: " << dstCol.GetValueRange();
621-
errors.push_back(ss.str());
622-
break;
623-
}
624-
}
625600
matchingRepr = dstReprIdx;
626601
break;
627602
}

tree/ntuple/test/ntuple_merger.cxx

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4186,10 +4186,17 @@ TEST(RNTupleMerger, MergeReal32Trunc)
41864186
RNTupleMergeOptions opts;
41874187
opts.fMergingMode = mmode;
41884188
auto res = merger.Merge(sourcePtrs, opts);
4189-
// Currently we're not supporting merging columns with the same type but different metadata.
4190-
// TODO: support this.
4191-
EXPECT_FALSE(bool(res));
4192-
EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("have different column metadata"));
4189+
EXPECT_TRUE(bool(res));
4190+
}
4191+
{
4192+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4193+
EXPECT_EQ(reader->GetNEntries(), 20);
4194+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4195+
auto pFlt = reader->GetModel().GetDefaultEntry().GetPtr<float>("flt");
4196+
for (auto i : reader->GetEntryRange()) {
4197+
reader->LoadEntry(i);
4198+
EXPECT_NEAR(*pFlt, i, 0.01f);
4199+
}
41934200
}
41944201
}
41954202
}
@@ -4244,10 +4251,17 @@ TEST(RNTupleMerger, MergeReal32Quant)
42444251
RNTupleMergeOptions opts;
42454252
opts.fMergingMode = mmode;
42464253
auto res = merger.Merge(sourcePtrs, opts);
4247-
// Currently we're not supporting merging columns with the same type but different metadata.
4248-
// TODO: support this.
4249-
ASSERT_FALSE(bool(res));
4250-
EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("have different column metadata"));
4254+
EXPECT_TRUE(bool(res));
4255+
}
4256+
{
4257+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4258+
EXPECT_EQ(reader->GetNEntries(), 20);
4259+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4260+
auto pFlt = reader->GetModel().GetDefaultEntry().GetPtr<float>("flt");
4261+
for (auto i : reader->GetEntryRange()) {
4262+
reader->LoadEntry(i);
4263+
EXPECT_NEAR(*pFlt, i, 0.01f);
4264+
}
42514265
}
42524266
}
42534267
}
@@ -4317,3 +4331,68 @@ TEST(RNTupleMerger, MergeReal32TruncQuantMixed)
43174331
}
43184332
}
43194333
}
4334+
4335+
TEST(RNTupleMerger, MergeRealRegularQuantMixed)
4336+
{
4337+
// Merge two files, both containing the same field, but with the first being a SplitReal64 and the second Real32Quant
4338+
FileRaii fileGuard1("test_ntuple_merge_realregquant_in_1.root");
4339+
{
4340+
auto model = RNTupleModel::Create();
4341+
auto fieldDbl = model->MakeField<double>("dbl");
4342+
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath());
4343+
for (int i = 0; i < 10; ++i) {
4344+
*fieldDbl = i;
4345+
ntuple->Fill();
4346+
}
4347+
}
4348+
FileRaii fileGuard2("test_ntuple_merge_realregquant_in_2.root");
4349+
{
4350+
auto model = RNTupleModel::Create();
4351+
auto field = std::make_unique<RField<double>>("dbl");
4352+
field->SetQuantized(0., 20., 29);
4353+
model->AddField(std::move(field));
4354+
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath());
4355+
auto fieldDbl = ntuple->GetModel().GetDefaultEntry().GetPtr<double>("dbl");
4356+
for (int i = 0; i < 10; ++i) {
4357+
*fieldDbl = 10 + i;
4358+
ntuple->Fill();
4359+
}
4360+
}
4361+
{
4362+
// Gather the input sources
4363+
std::vector<std::unique_ptr<RPageSource>> sources;
4364+
sources.push_back(RPageSource::Create("ntuple", fileGuard1.GetPath(), RNTupleReadOptions()));
4365+
sources.push_back(RPageSource::Create("ntuple", fileGuard2.GetPath(), RNTupleReadOptions()));
4366+
std::vector<RPageSource *> sourcePtrs;
4367+
for (const auto &s : sources) {
4368+
sourcePtrs.push_back(s.get());
4369+
}
4370+
4371+
// Now merge the inputs
4372+
for (const auto mmode : {ENTupleMergingMode::kFilter, ENTupleMergingMode::kStrict, ENTupleMergingMode::kUnion}) {
4373+
SCOPED_TRACE(std::string("with merging mode = ") + ToString(mmode));
4374+
FileRaii fileGuardOut("test_ntuple_merge_realregquant_out.root");
4375+
{
4376+
auto destination = std::make_unique<RPageSinkFile>("ntuple", fileGuardOut.GetPath(), RNTupleWriteOptions());
4377+
RNTupleMerger merger{std::move(destination)};
4378+
RNTupleMergeOptions opts;
4379+
opts.fMergingMode = mmode;
4380+
auto res = merger.Merge(sourcePtrs, opts);
4381+
EXPECT_TRUE(bool(res));
4382+
}
4383+
{
4384+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4385+
EXPECT_EQ(reader->GetNEntries(), 20);
4386+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4387+
auto pDbl = reader->GetModel().GetDefaultEntry().GetPtr<double>("dbl");
4388+
for (auto i : reader->GetEntryRange()) {
4389+
reader->LoadEntry(i);
4390+
if (i < 10)
4391+
EXPECT_DOUBLE_EQ(*pDbl, i);
4392+
else
4393+
EXPECT_NEAR(*pDbl, i, 0.01f);
4394+
}
4395+
}
4396+
}
4397+
}
4398+
}

tree/ntuple/test/ntuple_multi_column.cxx

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -376,12 +376,3 @@ TEST(RNTuple, MultiColumnRepresentationBulk)
376376
arr = static_cast<float *>(bulk.ReadBulk(RNTupleLocalIndex(1, 0), mask.get(), 1));
377377
EXPECT_FLOAT_EQ(2.0, arr[0]);
378378
}
379-
380-
TEST(RNTuple, MultiColumnRepresentationDedup)
381-
{
382-
FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root");
383-
384-
auto fldPx = RFieldBase::Create("px", "float").Unwrap();
385-
fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}});
386-
EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1);
387-
}

0 commit comments

Comments
 (0)