Skip to content

Commit d9f93eb

Browse files
committed
[ntuple] Support merging columns with same type and different metadata
1 parent 4bb2f69 commit d9f93eb

4 files changed

Lines changed: 96 additions & 36 deletions

File tree

tree/ntuple/src/RFieldBase.cxx

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -834,10 +834,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se
834834
if (itRepresentative == std::end(validTypes))
835835
throw RException(R__FAIL("invalid column representative"));
836836

837-
// don't add a duplicate representation
838-
if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(),
839-
[&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end())
840-
fColumnRepresentatives.emplace_back(*itRepresentative);
837+
fColumnRepresentatives.emplace_back(*itRepresentative);
841838
}
842839
}
843840

tree/ntuple/src/RNTupleMerger.cxx

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -595,25 +595,18 @@ CompareDescriptorStructure(const ROOT::RNTupleDescriptor &dst, const ROOT::RNTup
595595
const auto &srcCol = src.GetColumnDescriptor(srcColId);
596596
const auto dstColId = dstColumns[dstReprIdx * dstColCardinality + reprColIdx];
597597
const auto &dstCol = dst.GetColumnDescriptor(dstColId);
598-
if (srcCol.GetBitsOnStorage() != dstCol.GetBitsOnStorage() ||
598+
if (srcCol.GetType() != dstCol.GetType() ||
599+
srcCol.GetBitsOnStorage() != dstCol.GetBitsOnStorage() ||
599600
srcCol.GetValueRange() != dstCol.GetValueRange()) {
600-
std::stringstream ss;
601-
ss << "Source field `" << field.fSrc->GetFieldName()
602-
<< "` has a matching column representation as its destination field, however one or "
603-
"more "
604-
"of its columns have different column metadata (bit width and/or value range). "
605-
"Merging variable-sized columns is currently only supported if all metadata is "
606-
"identical between source and destination columns."
607-
<< "\n bit width src: " << srcCol.GetBitsOnStorage()
608-
<< ", dst: " << dstCol.GetBitsOnStorage() << ""
609-
<< "\n value range src: " << srcCol.GetValueRange()
610-
<< ", dst: " << dstCol.GetValueRange();
611-
errors.push_back(ss.str());
601+
matches = false;
612602
break;
613603
}
614604
}
615-
matchingRepr = dstReprIdx;
616-
break;
605+
606+
if (matches) {
607+
matchingRepr = dstReprIdx;
608+
break;
609+
}
617610
}
618611
}
619612

tree/ntuple/test/ntuple_merger.cxx

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4273,10 +4273,17 @@ TEST(RNTupleMerger, MergeReal32Trunc)
42734273
RNTupleMergeOptions opts;
42744274
opts.fMergingMode = mmode;
42754275
auto res = merger.Merge(sourcePtrs, opts);
4276-
// Currently we're not supporting merging columns with the same type but different metadata.
4277-
// TODO: support this.
4278-
EXPECT_FALSE(bool(res));
4279-
EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("have different column metadata"));
4276+
EXPECT_TRUE(bool(res));
4277+
}
4278+
{
4279+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4280+
EXPECT_EQ(reader->GetNEntries(), 20);
4281+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4282+
auto pFlt = reader->GetModel().GetDefaultEntry().GetPtr<float>("flt");
4283+
for (auto i : reader->GetEntryRange()) {
4284+
reader->LoadEntry(i);
4285+
EXPECT_NEAR(*pFlt, i, 0.01f);
4286+
}
42804287
}
42814288
}
42824289
}
@@ -4331,10 +4338,17 @@ TEST(RNTupleMerger, MergeReal32Quant)
43314338
RNTupleMergeOptions opts;
43324339
opts.fMergingMode = mmode;
43334340
auto res = merger.Merge(sourcePtrs, opts);
4334-
// Currently we're not supporting merging columns with the same type but different metadata.
4335-
// TODO: support this.
4336-
ASSERT_FALSE(bool(res));
4337-
EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("have different column metadata"));
4341+
EXPECT_TRUE(bool(res));
4342+
}
4343+
{
4344+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4345+
EXPECT_EQ(reader->GetNEntries(), 20);
4346+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4347+
auto pFlt = reader->GetModel().GetDefaultEntry().GetPtr<float>("flt");
4348+
for (auto i : reader->GetEntryRange()) {
4349+
reader->LoadEntry(i);
4350+
EXPECT_NEAR(*pFlt, i, 0.01f);
4351+
}
43384352
}
43394353
}
43404354
}
@@ -4404,3 +4418,68 @@ TEST(RNTupleMerger, MergeReal32TruncQuantMixed)
44044418
}
44054419
}
44064420
}
4421+
4422+
TEST(RNTupleMerger, MergeRealRegularQuantMixed)
4423+
{
4424+
// Merge two files, both containing the same field, but with the first being a SplitReal64 and the second Real32Quant
4425+
FileRaii fileGuard1("test_ntuple_merge_realregquant_in_1.root");
4426+
{
4427+
auto model = RNTupleModel::Create();
4428+
auto fieldDbl = model->MakeField<double>("dbl");
4429+
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath());
4430+
for (int i = 0; i < 10; ++i) {
4431+
*fieldDbl = i;
4432+
ntuple->Fill();
4433+
}
4434+
}
4435+
FileRaii fileGuard2("test_ntuple_merge_realregquant_in_2.root");
4436+
{
4437+
auto model = RNTupleModel::Create();
4438+
auto field = std::make_unique<RField<double>>("dbl");
4439+
field->SetQuantized(29, {0., 20.});
4440+
model->AddField(std::move(field));
4441+
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath());
4442+
auto fieldDbl = ntuple->GetModel().GetDefaultEntry().GetPtr<double>("dbl");
4443+
for (int i = 0; i < 10; ++i) {
4444+
*fieldDbl = 10 + i;
4445+
ntuple->Fill();
4446+
}
4447+
}
4448+
{
4449+
// Gather the input sources
4450+
std::vector<std::unique_ptr<RPageSource>> sources;
4451+
sources.push_back(RPageSource::Create("ntuple", fileGuard1.GetPath(), RNTupleReadOptions()));
4452+
sources.push_back(RPageSource::Create("ntuple", fileGuard2.GetPath(), RNTupleReadOptions()));
4453+
std::vector<RPageSource *> sourcePtrs;
4454+
for (const auto &s : sources) {
4455+
sourcePtrs.push_back(s.get());
4456+
}
4457+
4458+
// Now merge the inputs
4459+
for (const auto mmode : {ENTupleMergingMode::kFilter, ENTupleMergingMode::kStrict, ENTupleMergingMode::kUnion}) {
4460+
SCOPED_TRACE(std::string("with merging mode = ") + ToString(mmode));
4461+
FileRaii fileGuardOut("test_ntuple_merge_realregquant_out.root");
4462+
{
4463+
auto destination = std::make_unique<RPageSinkFile>("ntuple", fileGuardOut.GetPath(), RNTupleWriteOptions());
4464+
RNTupleMerger merger{std::move(destination)};
4465+
RNTupleMergeOptions opts;
4466+
opts.fMergingMode = mmode;
4467+
auto res = merger.Merge(sourcePtrs, opts);
4468+
EXPECT_TRUE(bool(res));
4469+
}
4470+
{
4471+
auto reader = ROOT::RNTupleReader::Open("ntuple", fileGuardOut.GetPath());
4472+
EXPECT_EQ(reader->GetNEntries(), 20);
4473+
EXPECT_EQ(reader->GetDescriptor().GetNPhysicalColumns(), 2);
4474+
auto pDbl = reader->GetModel().GetDefaultEntry().GetPtr<double>("dbl");
4475+
for (auto i : reader->GetEntryRange()) {
4476+
reader->LoadEntry(i);
4477+
if (i < 10)
4478+
EXPECT_DOUBLE_EQ(*pDbl, i);
4479+
else
4480+
EXPECT_NEAR(*pDbl, i, 0.01f);
4481+
}
4482+
}
4483+
}
4484+
}
4485+
}

tree/ntuple/test/ntuple_multi_column.cxx

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -406,12 +406,3 @@ TEST(RNTuple, MultiColumnRepresentationVariableBitWidth)
406406
reader->LoadEntry(1);
407407
EXPECT_FLOAT_EQ(2.0, *fldPx);
408408
}
409-
410-
TEST(RNTuple, MultiColumnRepresentationDedup)
411-
{
412-
FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root");
413-
414-
auto fldPx = RFieldBase::Create("px", "float").Unwrap();
415-
fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}});
416-
EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1);
417-
}

0 commit comments

Comments
 (0)