Skip to content

Commit 7f40a1b

Browse files
committed
[ntuple] Add test for late column extension
1 parent 369455b commit 7f40a1b

File tree

4 files changed

+327
-0
lines changed

4 files changed

+327
-0
lines changed

tree/ntuple/inc/ROOT/RNTupleFillContext.hxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131

3232
namespace ROOT {
3333

34+
namespace Internal {
35+
// Used for testing
36+
RPageSink &GetWriterSink(ROOT::RNTupleWriter &writer);
37+
} // namespace Internal
38+
3439
// clang-format off
3540
/**
3641
\class ROOT::RNTupleFillContext
@@ -49,6 +54,7 @@ sequential writing, please refer to RNTupleWriter.
4954
class RNTupleFillContext {
5055
friend class ROOT::RNTupleWriter;
5156
friend class RNTupleParallelWriter;
57+
friend Internal::RPageSink &Internal::GetWriterSink(RNTupleWriter &);
5258

5359
private:
5460
/// The page sink's parallel page compression scheduler if IMT is on.

tree/ntuple/inc/ROOT/RNTupleWriter.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ class RNTupleWriter {
121121
friend std::unique_ptr<RNTupleWriter>
122122
Experimental::RNTupleWriter_Append(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntuplePath,
123123
ROOT::Experimental::RFile &file, const ROOT::RNTupleWriteOptions &options);
124+
friend Internal::RPageSink &Internal::GetWriterSink(RNTupleWriter &);
124125

125126
private:
126127
RNTupleFillContext fFillContext;

tree/ntuple/src/RNTupleWriter.cxx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,8 @@ ROOT::Experimental::RNTupleWriter_Append(std::unique_ptr<ROOT::RNTupleModel> mod
160160
auto sink = std::make_unique<ROOT::Internal::RPageSinkFile>(ntupleBasename, file, ntupleDir, options);
161161
return ROOT::RNTupleWriter::Create(std::move(model), std::move(sink), options);
162162
}
163+
164+
ROOT::Internal::RPageSink &ROOT::Internal::GetWriterSink(ROOT::RNTupleWriter &writer)
165+
{
166+
return *writer.fFillContext.fSink;
167+
}

tree/ntuple/test/ntuple_modelext.cxx

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,3 +852,318 @@ R"({
852852
// clang-format on
853853
EXPECT_EQ(expect, os.str());
854854
}
855+
856+
TEST(RNTuple, LateColumnExtension)
857+
{
858+
FileRaii fileGuard("test_ntuple_latecolumnext.ntuple");
859+
860+
auto model = RNTupleModel::Create();
861+
auto pF = model->MakeField<float>("f"); // this has column representation {kSplitReal32}.
862+
863+
auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());
864+
for (int i = 0; i < 100; ++i) {
865+
*pF = i;
866+
writer->Fill();
867+
}
868+
auto &modelRef = const_cast<RNTupleModel &>(writer->GetModel());
869+
modelRef.Unfreeze();
870+
871+
auto &field = modelRef.GetMutableField("f");
872+
ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
873+
changeset.AddColumnRepr(&field, {{ROOT::ENTupleColumnType::kReal32}});
874+
875+
modelRef.Freeze();
876+
877+
auto &sink = ROOT::Internal::GetWriterSink(*writer);
878+
sink.UpdateSchema(changeset, 0);
879+
880+
// Keep writing 50 entries with the old active representation, then switch it.
881+
for (int i = 0; i < 100; ++i) {
882+
if (i == 50) {
883+
writer->CommitCluster();
884+
ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation(field, 1);
885+
}
886+
*pF = 100 + i;
887+
writer->Fill();
888+
}
889+
890+
writer.reset();
891+
892+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
893+
// check that we can read data fine
894+
auto vF = reader->GetView<float>("f");
895+
EXPECT_EQ(reader->GetNEntries(), 200);
896+
for (auto idx : reader->GetEntryRange()) {
897+
EXPECT_FLOAT_EQ(vF(idx), idx);
898+
}
899+
// check that metadata is correct
900+
const auto &desc = reader->GetDescriptor();
901+
auto fieldId = desc.FindFieldId("f");
902+
const auto &fdesc = desc.GetFieldDescriptor(fieldId);
903+
ASSERT_EQ(fdesc.GetLogicalColumnIds().size(), 2);
904+
EXPECT_EQ(fdesc.GetColumnCardinality(), 1);
905+
const auto &col1Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
906+
const auto &col2Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[1]);
907+
EXPECT_EQ(col1Desc.GetType(), ROOT::ENTupleColumnType::kSplitReal32);
908+
EXPECT_EQ(col2Desc.GetType(), ROOT::ENTupleColumnType::kReal32);
909+
EXPECT_EQ(col1Desc.GetFieldId(), fieldId);
910+
EXPECT_EQ(col2Desc.GetFieldId(), fieldId);
911+
EXPECT_EQ(col1Desc.GetRepresentationIndex(), 0);
912+
EXPECT_EQ(col2Desc.GetRepresentationIndex(), 1);
913+
EXPECT_EQ(col1Desc.GetIndex(), 0);
914+
EXPECT_EQ(col2Desc.GetIndex(), 0);
915+
EXPECT_EQ(col1Desc.GetFirstElementIndex(), 0);
916+
EXPECT_EQ(col2Desc.GetFirstElementIndex(), 0);
917+
918+
const auto cluster1Id = desc.FindClusterId(0, 0);
919+
const auto &cluster1Desc = desc.GetClusterDescriptor(cluster1Id);
920+
ASSERT_TRUE(cluster1Desc.ContainsColumn(0));
921+
ASSERT_TRUE(cluster1Desc.ContainsColumn(1));
922+
EXPECT_FALSE(cluster1Desc.GetColumnRange(0).IsSuppressed());
923+
EXPECT_TRUE(cluster1Desc.GetColumnRange(1).IsSuppressed());
924+
EXPECT_EQ(cluster1Desc.GetFirstEntryIndex(), 0);
925+
EXPECT_EQ(cluster1Desc.GetNEntries(), 150);
926+
927+
const auto cluster2Id = desc.FindNextClusterId(cluster1Id);
928+
const auto &cluster2Desc = desc.GetClusterDescriptor(cluster2Id);
929+
ASSERT_TRUE(cluster2Desc.ContainsColumn(0));
930+
ASSERT_TRUE(cluster2Desc.ContainsColumn(1));
931+
EXPECT_TRUE(cluster2Desc.GetColumnRange(0).IsSuppressed());
932+
EXPECT_FALSE(cluster2Desc.GetColumnRange(1).IsSuppressed());
933+
EXPECT_EQ(cluster2Desc.GetFirstEntryIndex(), 150);
934+
EXPECT_EQ(cluster2Desc.GetNEntries(), 50);
935+
}
936+
937+
TEST(RNTuple, LateColumnExtension2)
938+
{
939+
FileRaii fileGuard("test_ntuple_latecolumnext2.ntuple");
940+
941+
auto model = RNTupleModel::Create();
942+
// this has representation {kSplitIndex64, kChar}
943+
auto pS = model->MakeField<std::string>("s");
944+
945+
auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());
946+
for (int i = 0; i < 100; ++i) {
947+
*pS = std::to_string(i);
948+
writer->Fill();
949+
}
950+
auto &modelRef = const_cast<RNTupleModel &>(writer->GetModel());
951+
modelRef.Unfreeze();
952+
953+
auto &field = modelRef.GetMutableField("s");
954+
ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
955+
changeset.AddColumnRepr(&field, {{ROOT::ENTupleColumnType::kIndex32, ROOT::ENTupleColumnType::kChar}});
956+
957+
modelRef.Freeze();
958+
959+
auto &sink = ROOT::Internal::GetWriterSink(*writer);
960+
sink.UpdateSchema(changeset, 0);
961+
962+
// Keep writing 50 entries with the old active representation, then switch it.
963+
for (int i = 0; i < 100; ++i) {
964+
if (i == 50) {
965+
writer->CommitCluster();
966+
ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation(field, 1);
967+
}
968+
*pS = std::to_string(100 + i);
969+
writer->Fill();
970+
}
971+
972+
writer.reset();
973+
974+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
975+
// check that we can read data fine
976+
auto vF = reader->GetView<std::string>("s");
977+
EXPECT_EQ(reader->GetNEntries(), 200);
978+
for (auto idx : reader->GetEntryRange()) {
979+
EXPECT_EQ(vF(idx), std::to_string(idx));
980+
}
981+
// check that metadata is correct
982+
const auto &desc = reader->GetDescriptor();
983+
auto fieldId = desc.FindFieldId("s");
984+
const auto &fdesc = desc.GetFieldDescriptor(fieldId);
985+
ASSERT_EQ(fdesc.GetLogicalColumnIds().size(), 4);
986+
EXPECT_EQ(fdesc.GetColumnCardinality(), 2);
987+
const auto &col1Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
988+
const auto &col2Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[1]);
989+
const auto &col3Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[2]);
990+
const auto &col4Desc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[3]);
991+
EXPECT_EQ(col1Desc.GetType(), ROOT::ENTupleColumnType::kSplitIndex64);
992+
EXPECT_EQ(col2Desc.GetType(), ROOT::ENTupleColumnType::kChar);
993+
EXPECT_EQ(col3Desc.GetType(), ROOT::ENTupleColumnType::kIndex32);
994+
EXPECT_EQ(col4Desc.GetType(), ROOT::ENTupleColumnType::kChar);
995+
EXPECT_EQ(col1Desc.GetRepresentationIndex(), 0);
996+
EXPECT_EQ(col2Desc.GetRepresentationIndex(), 0);
997+
EXPECT_EQ(col3Desc.GetRepresentationIndex(), 1);
998+
EXPECT_EQ(col4Desc.GetRepresentationIndex(), 1);
999+
EXPECT_EQ(col1Desc.GetIndex(), 0);
1000+
EXPECT_EQ(col2Desc.GetIndex(), 1);
1001+
EXPECT_EQ(col3Desc.GetIndex(), 0);
1002+
EXPECT_EQ(col4Desc.GetIndex(), 1);
1003+
EXPECT_EQ(col1Desc.GetFirstElementIndex(), 0);
1004+
EXPECT_EQ(col2Desc.GetFirstElementIndex(), 0);
1005+
EXPECT_EQ(col3Desc.GetFirstElementIndex(), 0);
1006+
EXPECT_EQ(col4Desc.GetFirstElementIndex(), 0);
1007+
1008+
const auto cluster1Id = desc.FindClusterId(0, 0);
1009+
const auto &cluster1Desc = desc.GetClusterDescriptor(cluster1Id);
1010+
ASSERT_TRUE(cluster1Desc.ContainsColumn(0));
1011+
ASSERT_TRUE(cluster1Desc.ContainsColumn(1));
1012+
ASSERT_TRUE(cluster1Desc.ContainsColumn(2));
1013+
ASSERT_TRUE(cluster1Desc.ContainsColumn(3));
1014+
EXPECT_FALSE(cluster1Desc.GetColumnRange(0).IsSuppressed());
1015+
EXPECT_FALSE(cluster1Desc.GetColumnRange(1).IsSuppressed());
1016+
EXPECT_TRUE(cluster1Desc.GetColumnRange(2).IsSuppressed());
1017+
EXPECT_TRUE(cluster1Desc.GetColumnRange(3).IsSuppressed());
1018+
EXPECT_EQ(cluster1Desc.GetFirstEntryIndex(), 0);
1019+
EXPECT_EQ(cluster1Desc.GetNEntries(), 150);
1020+
1021+
const auto cluster2Id = desc.FindNextClusterId(cluster1Id);
1022+
const auto &cluster2Desc = desc.GetClusterDescriptor(cluster2Id);
1023+
ASSERT_TRUE(cluster2Desc.ContainsColumn(0));
1024+
ASSERT_TRUE(cluster2Desc.ContainsColumn(1));
1025+
ASSERT_TRUE(cluster2Desc.ContainsColumn(2));
1026+
ASSERT_TRUE(cluster2Desc.ContainsColumn(3));
1027+
EXPECT_TRUE(cluster2Desc.GetColumnRange(0).IsSuppressed());
1028+
EXPECT_TRUE(cluster2Desc.GetColumnRange(1).IsSuppressed());
1029+
EXPECT_FALSE(cluster2Desc.GetColumnRange(2).IsSuppressed());
1030+
EXPECT_FALSE(cluster2Desc.GetColumnRange(3).IsSuppressed());
1031+
EXPECT_EQ(cluster2Desc.GetFirstEntryIndex(), 150);
1032+
EXPECT_EQ(cluster2Desc.GetNEntries(), 50);
1033+
}
1034+
1035+
TEST(RNTuple, LateColumnExtensionDeferred)
1036+
{
1037+
FileRaii fileGuard("test_ntuple_latecolumnext_deferred.ntuple");
1038+
1039+
auto model = RNTupleModel::Create();
1040+
auto pF = model->MakeField<float>("f");
1041+
1042+
auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());
1043+
for (int i = 0; i < 50; ++i) {
1044+
*pF = i;
1045+
writer->Fill();
1046+
}
1047+
1048+
// Add a new field (no new columns)
1049+
{
1050+
auto updater = writer->CreateModelUpdater();
1051+
updater->BeginUpdate();
1052+
// this has representation {kSplitIndex64, kChar}
1053+
updater->AddField(std::make_unique<ROOT::RField<std::string>>("s"));
1054+
updater->CommitUpdate();
1055+
}
1056+
1057+
auto pS = writer->GetModel().GetDefaultEntry().GetPtr<std::string>("s");
1058+
for (int i = 0; i < 50; ++i) {
1059+
*pF = 50 + i;
1060+
*pS = std::to_string(50 + i);
1061+
writer->Fill();
1062+
}
1063+
1064+
// Add a new column to the new field
1065+
auto &modelRef = const_cast<RNTupleModel &>(writer->GetModel());
1066+
modelRef.Unfreeze();
1067+
auto &field = modelRef.GetMutableField("s");
1068+
ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
1069+
changeset.AddColumnRepr(&field, {{ROOT::ENTupleColumnType::kIndex32, ROOT::ENTupleColumnType::kChar}});
1070+
modelRef.Freeze();
1071+
auto &sink = ROOT::Internal::GetWriterSink(*writer);
1072+
sink.UpdateSchema(changeset, 50);
1073+
1074+
// Keep writing 50 entries with the old active representation, then switch it.
1075+
for (int i = 0; i < 100; ++i) {
1076+
if (i == 50) {
1077+
writer->CommitCluster();
1078+
ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation(field, 1);
1079+
}
1080+
*pF = 100 + i;
1081+
*pS = std::to_string(100 + i);
1082+
writer->Fill();
1083+
}
1084+
1085+
writer.reset();
1086+
1087+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
1088+
EXPECT_EQ(reader->GetNEntries(), 200);
1089+
1090+
// check that we can read data fine
1091+
auto prF = reader->GetModel().GetDefaultEntry().GetPtr<float>("f");
1092+
auto prS = reader->GetModel().GetDefaultEntry().GetPtr<std::string>("s");
1093+
for (auto idx : reader->GetEntryRange()) {
1094+
reader->LoadEntry(idx);
1095+
EXPECT_FLOAT_EQ(*prF, idx);
1096+
EXPECT_EQ(*prS, idx >= 50 ? std::to_string(idx) : "");
1097+
}
1098+
1099+
// check that metadata is correct
1100+
const auto &desc = reader->GetDescriptor();
1101+
auto fieldFId = desc.FindFieldId("f");
1102+
const auto &ffdesc = desc.GetFieldDescriptor(fieldFId);
1103+
ASSERT_EQ(ffdesc.GetLogicalColumnIds().size(), 1);
1104+
EXPECT_EQ(ffdesc.GetColumnCardinality(), 1);
1105+
auto fieldSId = desc.FindFieldId("s");
1106+
const auto &fsdesc = desc.GetFieldDescriptor(fieldSId);
1107+
ASSERT_EQ(fsdesc.GetLogicalColumnIds().size(), 4);
1108+
EXPECT_EQ(fsdesc.GetColumnCardinality(), 2);
1109+
const auto &col1Desc = desc.GetColumnDescriptor(ffdesc.GetLogicalColumnIds()[0]);
1110+
const auto &col2Desc = desc.GetColumnDescriptor(fsdesc.GetLogicalColumnIds()[0]);
1111+
const auto &col3Desc = desc.GetColumnDescriptor(fsdesc.GetLogicalColumnIds()[1]);
1112+
const auto &col4Desc = desc.GetColumnDescriptor(fsdesc.GetLogicalColumnIds()[2]);
1113+
const auto &col5Desc = desc.GetColumnDescriptor(fsdesc.GetLogicalColumnIds()[3]);
1114+
EXPECT_EQ(col1Desc.GetType(), ROOT::ENTupleColumnType::kSplitReal32);
1115+
EXPECT_EQ(col2Desc.GetType(), ROOT::ENTupleColumnType::kSplitIndex64);
1116+
EXPECT_EQ(col3Desc.GetType(), ROOT::ENTupleColumnType::kChar);
1117+
EXPECT_EQ(col4Desc.GetType(), ROOT::ENTupleColumnType::kIndex32);
1118+
EXPECT_EQ(col5Desc.GetType(), ROOT::ENTupleColumnType::kChar);
1119+
EXPECT_EQ(col1Desc.GetFieldId(), fieldFId);
1120+
EXPECT_EQ(col2Desc.GetFieldId(), fieldSId);
1121+
EXPECT_EQ(col3Desc.GetFieldId(), fieldSId);
1122+
EXPECT_EQ(col4Desc.GetFieldId(), fieldSId);
1123+
EXPECT_EQ(col5Desc.GetFieldId(), fieldSId);
1124+
EXPECT_EQ(col1Desc.GetRepresentationIndex(), 0);
1125+
EXPECT_EQ(col2Desc.GetRepresentationIndex(), 0);
1126+
EXPECT_EQ(col3Desc.GetRepresentationIndex(), 0);
1127+
EXPECT_EQ(col4Desc.GetRepresentationIndex(), 1);
1128+
EXPECT_EQ(col5Desc.GetRepresentationIndex(), 1);
1129+
EXPECT_EQ(col1Desc.GetIndex(), 0);
1130+
EXPECT_EQ(col2Desc.GetIndex(), 0);
1131+
EXPECT_EQ(col3Desc.GetIndex(), 1);
1132+
EXPECT_EQ(col4Desc.GetIndex(), 0);
1133+
EXPECT_EQ(col5Desc.GetIndex(), 1);
1134+
EXPECT_EQ(col1Desc.GetFirstElementIndex(), 0);
1135+
EXPECT_EQ(col2Desc.GetFirstElementIndex(), 50);
1136+
EXPECT_EQ(col3Desc.GetFirstElementIndex(), 0); // string data column is never deferred
1137+
EXPECT_EQ(col4Desc.GetFirstElementIndex(), 50);
1138+
EXPECT_EQ(col5Desc.GetFirstElementIndex(), 0); // string data column is never deferred
1139+
1140+
const auto cluster1Id = desc.FindClusterId(0, 0);
1141+
const auto &cluster1Desc = desc.GetClusterDescriptor(cluster1Id);
1142+
ASSERT_TRUE(cluster1Desc.ContainsColumn(0));
1143+
ASSERT_TRUE(cluster1Desc.ContainsColumn(1));
1144+
ASSERT_TRUE(cluster1Desc.ContainsColumn(2));
1145+
ASSERT_TRUE(cluster1Desc.ContainsColumn(3));
1146+
ASSERT_TRUE(cluster1Desc.ContainsColumn(4));
1147+
EXPECT_FALSE(cluster1Desc.GetColumnRange(0).IsSuppressed());
1148+
EXPECT_FALSE(cluster1Desc.GetColumnRange(1).IsSuppressed());
1149+
EXPECT_FALSE(cluster1Desc.GetColumnRange(2).IsSuppressed());
1150+
EXPECT_TRUE(cluster1Desc.GetColumnRange(3).IsSuppressed());
1151+
EXPECT_TRUE(cluster1Desc.GetColumnRange(4).IsSuppressed());
1152+
EXPECT_EQ(cluster1Desc.GetFirstEntryIndex(), 0);
1153+
EXPECT_EQ(cluster1Desc.GetNEntries(), 150);
1154+
1155+
const auto cluster2Id = desc.FindNextClusterId(cluster1Id);
1156+
const auto &cluster2Desc = desc.GetClusterDescriptor(cluster2Id);
1157+
ASSERT_TRUE(cluster2Desc.ContainsColumn(0));
1158+
ASSERT_TRUE(cluster2Desc.ContainsColumn(1));
1159+
ASSERT_TRUE(cluster2Desc.ContainsColumn(2));
1160+
ASSERT_TRUE(cluster2Desc.ContainsColumn(3));
1161+
ASSERT_TRUE(cluster2Desc.ContainsColumn(4));
1162+
EXPECT_FALSE(cluster2Desc.GetColumnRange(0).IsSuppressed());
1163+
EXPECT_TRUE(cluster2Desc.GetColumnRange(1).IsSuppressed());
1164+
EXPECT_TRUE(cluster2Desc.GetColumnRange(2).IsSuppressed());
1165+
EXPECT_FALSE(cluster2Desc.GetColumnRange(3).IsSuppressed());
1166+
EXPECT_FALSE(cluster2Desc.GetColumnRange(4).IsSuppressed());
1167+
EXPECT_EQ(cluster2Desc.GetFirstEntryIndex(), 150);
1168+
EXPECT_EQ(cluster2Desc.GetNEntries(), 50);
1169+
}

0 commit comments

Comments
 (0)