@@ -852,3 +852,318 @@ R"({
852852 // clang-format on
853853 EXPECT_EQ (expect, os.str ());
854854}
855+
856+ TEST (RNTuple, LateColumnExtension)
857+ {
858+ FileRaii fileGuard (" test_ntuple_latecolumnext.ntuple" );
859+
860+ auto model = RNTupleModel::Create ();
861+ auto pF = model->MakeField <float >(" f" ); // this has column representation {kSplitReal32}.
862+
863+ auto writer = ROOT::RNTupleWriter::Recreate (std::move (model), " ntuple" , fileGuard.GetPath ());
864+ for (int i = 0 ; i < 100 ; ++i) {
865+ *pF = i;
866+ writer->Fill ();
867+ }
868+ auto &modelRef = const_cast <RNTupleModel &>(writer->GetModel ());
869+ modelRef.Unfreeze ();
870+
871+ auto &field = modelRef.GetMutableField (" f" );
872+ ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
873+ changeset.AddColumnRepr (&field, {{ROOT::ENTupleColumnType::kReal32 }});
874+
875+ modelRef.Freeze ();
876+
877+ auto &sink = ROOT::Internal::GetWriterSink (*writer);
878+ sink.UpdateSchema (changeset, 0 );
879+
880+ // Keep writing 50 entries with the old active representation, then switch it.
881+ for (int i = 0 ; i < 100 ; ++i) {
882+ if (i == 50 ) {
883+ writer->CommitCluster ();
884+ ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation (field, 1 );
885+ }
886+ *pF = 100 + i;
887+ writer->Fill ();
888+ }
889+
890+ writer.reset ();
891+
892+ auto reader = RNTupleReader::Open (" ntuple" , fileGuard.GetPath ());
893+ // check that we can read data fine
894+ auto vF = reader->GetView <float >(" f" );
895+ EXPECT_EQ (reader->GetNEntries (), 200 );
896+ for (auto idx : reader->GetEntryRange ()) {
897+ EXPECT_FLOAT_EQ (vF (idx), idx);
898+ }
899+ // check that metadata is correct
900+ const auto &desc = reader->GetDescriptor ();
901+ auto fieldId = desc.FindFieldId (" f" );
902+ const auto &fdesc = desc.GetFieldDescriptor (fieldId);
903+ ASSERT_EQ (fdesc.GetLogicalColumnIds ().size (), 2 );
904+ EXPECT_EQ (fdesc.GetColumnCardinality (), 1 );
905+ const auto &col1Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[0 ]);
906+ const auto &col2Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[1 ]);
907+ EXPECT_EQ (col1Desc.GetType (), ROOT::ENTupleColumnType::kSplitReal32 );
908+ EXPECT_EQ (col2Desc.GetType (), ROOT::ENTupleColumnType::kReal32 );
909+ EXPECT_EQ (col1Desc.GetFieldId (), fieldId);
910+ EXPECT_EQ (col2Desc.GetFieldId (), fieldId);
911+ EXPECT_EQ (col1Desc.GetRepresentationIndex (), 0 );
912+ EXPECT_EQ (col2Desc.GetRepresentationIndex (), 1 );
913+ EXPECT_EQ (col1Desc.GetIndex (), 0 );
914+ EXPECT_EQ (col2Desc.GetIndex (), 0 );
915+ EXPECT_EQ (col1Desc.GetFirstElementIndex (), 0 );
916+ EXPECT_EQ (col2Desc.GetFirstElementIndex (), 0 );
917+
918+ const auto cluster1Id = desc.FindClusterId (0 , 0 );
919+ const auto &cluster1Desc = desc.GetClusterDescriptor (cluster1Id);
920+ ASSERT_TRUE (cluster1Desc.ContainsColumn (0 ));
921+ ASSERT_TRUE (cluster1Desc.ContainsColumn (1 ));
922+ EXPECT_FALSE (cluster1Desc.GetColumnRange (0 ).IsSuppressed ());
923+ EXPECT_TRUE (cluster1Desc.GetColumnRange (1 ).IsSuppressed ());
924+ EXPECT_EQ (cluster1Desc.GetFirstEntryIndex (), 0 );
925+ EXPECT_EQ (cluster1Desc.GetNEntries (), 150 );
926+
927+ const auto cluster2Id = desc.FindNextClusterId (cluster1Id);
928+ const auto &cluster2Desc = desc.GetClusterDescriptor (cluster2Id);
929+ ASSERT_TRUE (cluster2Desc.ContainsColumn (0 ));
930+ ASSERT_TRUE (cluster2Desc.ContainsColumn (1 ));
931+ EXPECT_TRUE (cluster2Desc.GetColumnRange (0 ).IsSuppressed ());
932+ EXPECT_FALSE (cluster2Desc.GetColumnRange (1 ).IsSuppressed ());
933+ EXPECT_EQ (cluster2Desc.GetFirstEntryIndex (), 150 );
934+ EXPECT_EQ (cluster2Desc.GetNEntries (), 50 );
935+ }
936+
937+ TEST (RNTuple, LateColumnExtension2)
938+ {
939+ FileRaii fileGuard (" test_ntuple_latecolumnext2.ntuple" );
940+
941+ auto model = RNTupleModel::Create ();
942+ // this has representation {kSplitIndex64, kChar}
943+ auto pS = model->MakeField <std::string>(" s" );
944+
945+ auto writer = ROOT::RNTupleWriter::Recreate (std::move (model), " ntuple" , fileGuard.GetPath ());
946+ for (int i = 0 ; i < 100 ; ++i) {
947+ *pS = std::to_string (i);
948+ writer->Fill ();
949+ }
950+ auto &modelRef = const_cast <RNTupleModel &>(writer->GetModel ());
951+ modelRef.Unfreeze ();
952+
953+ auto &field = modelRef.GetMutableField (" s" );
954+ ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
955+ changeset.AddColumnRepr (&field, {{ROOT::ENTupleColumnType::kIndex32 , ROOT::ENTupleColumnType::kChar }});
956+
957+ modelRef.Freeze ();
958+
959+ auto &sink = ROOT::Internal::GetWriterSink (*writer);
960+ sink.UpdateSchema (changeset, 0 );
961+
962+ // Keep writing 50 entries with the old active representation, then switch it.
963+ for (int i = 0 ; i < 100 ; ++i) {
964+ if (i == 50 ) {
965+ writer->CommitCluster ();
966+ ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation (field, 1 );
967+ }
968+ *pS = std::to_string (100 + i);
969+ writer->Fill ();
970+ }
971+
972+ writer.reset ();
973+
974+ auto reader = RNTupleReader::Open (" ntuple" , fileGuard.GetPath ());
975+ // check that we can read data fine
976+ auto vF = reader->GetView <std::string>(" s" );
977+ EXPECT_EQ (reader->GetNEntries (), 200 );
978+ for (auto idx : reader->GetEntryRange ()) {
979+ EXPECT_EQ (vF (idx), std::to_string (idx));
980+ }
981+ // check that metadata is correct
982+ const auto &desc = reader->GetDescriptor ();
983+ auto fieldId = desc.FindFieldId (" s" );
984+ const auto &fdesc = desc.GetFieldDescriptor (fieldId);
985+ ASSERT_EQ (fdesc.GetLogicalColumnIds ().size (), 4 );
986+ EXPECT_EQ (fdesc.GetColumnCardinality (), 2 );
987+ const auto &col1Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[0 ]);
988+ const auto &col2Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[1 ]);
989+ const auto &col3Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[2 ]);
990+ const auto &col4Desc = desc.GetColumnDescriptor (fdesc.GetLogicalColumnIds ()[3 ]);
991+ EXPECT_EQ (col1Desc.GetType (), ROOT::ENTupleColumnType::kSplitIndex64 );
992+ EXPECT_EQ (col2Desc.GetType (), ROOT::ENTupleColumnType::kChar );
993+ EXPECT_EQ (col3Desc.GetType (), ROOT::ENTupleColumnType::kIndex32 );
994+ EXPECT_EQ (col4Desc.GetType (), ROOT::ENTupleColumnType::kChar );
995+ EXPECT_EQ (col1Desc.GetRepresentationIndex (), 0 );
996+ EXPECT_EQ (col2Desc.GetRepresentationIndex (), 0 );
997+ EXPECT_EQ (col3Desc.GetRepresentationIndex (), 1 );
998+ EXPECT_EQ (col4Desc.GetRepresentationIndex (), 1 );
999+ EXPECT_EQ (col1Desc.GetIndex (), 0 );
1000+ EXPECT_EQ (col2Desc.GetIndex (), 1 );
1001+ EXPECT_EQ (col3Desc.GetIndex (), 0 );
1002+ EXPECT_EQ (col4Desc.GetIndex (), 1 );
1003+ EXPECT_EQ (col1Desc.GetFirstElementIndex (), 0 );
1004+ EXPECT_EQ (col2Desc.GetFirstElementIndex (), 0 );
1005+ EXPECT_EQ (col3Desc.GetFirstElementIndex (), 0 );
1006+ EXPECT_EQ (col4Desc.GetFirstElementIndex (), 0 );
1007+
1008+ const auto cluster1Id = desc.FindClusterId (0 , 0 );
1009+ const auto &cluster1Desc = desc.GetClusterDescriptor (cluster1Id);
1010+ ASSERT_TRUE (cluster1Desc.ContainsColumn (0 ));
1011+ ASSERT_TRUE (cluster1Desc.ContainsColumn (1 ));
1012+ ASSERT_TRUE (cluster1Desc.ContainsColumn (2 ));
1013+ ASSERT_TRUE (cluster1Desc.ContainsColumn (3 ));
1014+ EXPECT_FALSE (cluster1Desc.GetColumnRange (0 ).IsSuppressed ());
1015+ EXPECT_FALSE (cluster1Desc.GetColumnRange (1 ).IsSuppressed ());
1016+ EXPECT_TRUE (cluster1Desc.GetColumnRange (2 ).IsSuppressed ());
1017+ EXPECT_TRUE (cluster1Desc.GetColumnRange (3 ).IsSuppressed ());
1018+ EXPECT_EQ (cluster1Desc.GetFirstEntryIndex (), 0 );
1019+ EXPECT_EQ (cluster1Desc.GetNEntries (), 150 );
1020+
1021+ const auto cluster2Id = desc.FindNextClusterId (cluster1Id);
1022+ const auto &cluster2Desc = desc.GetClusterDescriptor (cluster2Id);
1023+ ASSERT_TRUE (cluster2Desc.ContainsColumn (0 ));
1024+ ASSERT_TRUE (cluster2Desc.ContainsColumn (1 ));
1025+ ASSERT_TRUE (cluster2Desc.ContainsColumn (2 ));
1026+ ASSERT_TRUE (cluster2Desc.ContainsColumn (3 ));
1027+ EXPECT_TRUE (cluster2Desc.GetColumnRange (0 ).IsSuppressed ());
1028+ EXPECT_TRUE (cluster2Desc.GetColumnRange (1 ).IsSuppressed ());
1029+ EXPECT_FALSE (cluster2Desc.GetColumnRange (2 ).IsSuppressed ());
1030+ EXPECT_FALSE (cluster2Desc.GetColumnRange (3 ).IsSuppressed ());
1031+ EXPECT_EQ (cluster2Desc.GetFirstEntryIndex (), 150 );
1032+ EXPECT_EQ (cluster2Desc.GetNEntries (), 50 );
1033+ }
1034+
1035+ TEST (RNTuple, LateColumnExtensionDeferred)
1036+ {
1037+ FileRaii fileGuard (" test_ntuple_latecolumnext_deferred.ntuple" );
1038+
1039+ auto model = RNTupleModel::Create ();
1040+ auto pF = model->MakeField <float >(" f" );
1041+
1042+ auto writer = ROOT::RNTupleWriter::Recreate (std::move (model), " ntuple" , fileGuard.GetPath ());
1043+ for (int i = 0 ; i < 50 ; ++i) {
1044+ *pF = i;
1045+ writer->Fill ();
1046+ }
1047+
1048+ // Add a new field (no new columns)
1049+ {
1050+ auto updater = writer->CreateModelUpdater ();
1051+ updater->BeginUpdate ();
1052+ // this has representation {kSplitIndex64, kChar}
1053+ updater->AddField (std::make_unique<ROOT::RField<std::string>>(" s" ));
1054+ updater->CommitUpdate ();
1055+ }
1056+
1057+ auto pS = writer->GetModel ().GetDefaultEntry ().GetPtr <std::string>(" s" );
1058+ for (int i = 0 ; i < 50 ; ++i) {
1059+ *pF = 50 + i;
1060+ *pS = std::to_string (50 + i);
1061+ writer->Fill ();
1062+ }
1063+
1064+ // Add a new column to the new field
1065+ auto &modelRef = const_cast <RNTupleModel &>(writer->GetModel ());
1066+ modelRef.Unfreeze ();
1067+ auto &field = modelRef.GetMutableField (" s" );
1068+ ROOT::Internal::RNTupleModelChangeset changeset{modelRef};
1069+ changeset.AddColumnRepr (&field, {{ROOT::ENTupleColumnType::kIndex32 , ROOT::ENTupleColumnType::kChar }});
1070+ modelRef.Freeze ();
1071+ auto &sink = ROOT::Internal::GetWriterSink (*writer);
1072+ sink.UpdateSchema (changeset, 50 );
1073+
1074+ // Keep writing 50 entries with the old active representation, then switch it.
1075+ for (int i = 0 ; i < 100 ; ++i) {
1076+ if (i == 50 ) {
1077+ writer->CommitCluster ();
1078+ ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation (field, 1 );
1079+ }
1080+ *pF = 100 + i;
1081+ *pS = std::to_string (100 + i);
1082+ writer->Fill ();
1083+ }
1084+
1085+ writer.reset ();
1086+
1087+ auto reader = RNTupleReader::Open (" ntuple" , fileGuard.GetPath ());
1088+ EXPECT_EQ (reader->GetNEntries (), 200 );
1089+
1090+ // check that we can read data fine
1091+ auto prF = reader->GetModel ().GetDefaultEntry ().GetPtr <float >(" f" );
1092+ auto prS = reader->GetModel ().GetDefaultEntry ().GetPtr <std::string>(" s" );
1093+ for (auto idx : reader->GetEntryRange ()) {
1094+ reader->LoadEntry (idx);
1095+ EXPECT_FLOAT_EQ (*prF, idx);
1096+ EXPECT_EQ (*prS, idx >= 50 ? std::to_string (idx) : " " );
1097+ }
1098+
1099+ // check that metadata is correct
1100+ const auto &desc = reader->GetDescriptor ();
1101+ auto fieldFId = desc.FindFieldId (" f" );
1102+ const auto &ffdesc = desc.GetFieldDescriptor (fieldFId);
1103+ ASSERT_EQ (ffdesc.GetLogicalColumnIds ().size (), 1 );
1104+ EXPECT_EQ (ffdesc.GetColumnCardinality (), 1 );
1105+ auto fieldSId = desc.FindFieldId (" s" );
1106+ const auto &fsdesc = desc.GetFieldDescriptor (fieldSId);
1107+ ASSERT_EQ (fsdesc.GetLogicalColumnIds ().size (), 4 );
1108+ EXPECT_EQ (fsdesc.GetColumnCardinality (), 2 );
1109+ const auto &col1Desc = desc.GetColumnDescriptor (ffdesc.GetLogicalColumnIds ()[0 ]);
1110+ const auto &col2Desc = desc.GetColumnDescriptor (fsdesc.GetLogicalColumnIds ()[0 ]);
1111+ const auto &col3Desc = desc.GetColumnDescriptor (fsdesc.GetLogicalColumnIds ()[1 ]);
1112+ const auto &col4Desc = desc.GetColumnDescriptor (fsdesc.GetLogicalColumnIds ()[2 ]);
1113+ const auto &col5Desc = desc.GetColumnDescriptor (fsdesc.GetLogicalColumnIds ()[3 ]);
1114+ EXPECT_EQ (col1Desc.GetType (), ROOT::ENTupleColumnType::kSplitReal32 );
1115+ EXPECT_EQ (col2Desc.GetType (), ROOT::ENTupleColumnType::kSplitIndex64 );
1116+ EXPECT_EQ (col3Desc.GetType (), ROOT::ENTupleColumnType::kChar );
1117+ EXPECT_EQ (col4Desc.GetType (), ROOT::ENTupleColumnType::kIndex32 );
1118+ EXPECT_EQ (col5Desc.GetType (), ROOT::ENTupleColumnType::kChar );
1119+ EXPECT_EQ (col1Desc.GetFieldId (), fieldFId);
1120+ EXPECT_EQ (col2Desc.GetFieldId (), fieldSId);
1121+ EXPECT_EQ (col3Desc.GetFieldId (), fieldSId);
1122+ EXPECT_EQ (col4Desc.GetFieldId (), fieldSId);
1123+ EXPECT_EQ (col5Desc.GetFieldId (), fieldSId);
1124+ EXPECT_EQ (col1Desc.GetRepresentationIndex (), 0 );
1125+ EXPECT_EQ (col2Desc.GetRepresentationIndex (), 0 );
1126+ EXPECT_EQ (col3Desc.GetRepresentationIndex (), 0 );
1127+ EXPECT_EQ (col4Desc.GetRepresentationIndex (), 1 );
1128+ EXPECT_EQ (col5Desc.GetRepresentationIndex (), 1 );
1129+ EXPECT_EQ (col1Desc.GetIndex (), 0 );
1130+ EXPECT_EQ (col2Desc.GetIndex (), 0 );
1131+ EXPECT_EQ (col3Desc.GetIndex (), 1 );
1132+ EXPECT_EQ (col4Desc.GetIndex (), 0 );
1133+ EXPECT_EQ (col5Desc.GetIndex (), 1 );
1134+ EXPECT_EQ (col1Desc.GetFirstElementIndex (), 0 );
1135+ EXPECT_EQ (col2Desc.GetFirstElementIndex (), 50 );
1136+ EXPECT_EQ (col3Desc.GetFirstElementIndex (), 0 ); // string data column is never deferred
1137+ EXPECT_EQ (col4Desc.GetFirstElementIndex (), 50 );
1138+ EXPECT_EQ (col5Desc.GetFirstElementIndex (), 0 ); // string data column is never deferred
1139+
1140+ const auto cluster1Id = desc.FindClusterId (0 , 0 );
1141+ const auto &cluster1Desc = desc.GetClusterDescriptor (cluster1Id);
1142+ ASSERT_TRUE (cluster1Desc.ContainsColumn (0 ));
1143+ ASSERT_TRUE (cluster1Desc.ContainsColumn (1 ));
1144+ ASSERT_TRUE (cluster1Desc.ContainsColumn (2 ));
1145+ ASSERT_TRUE (cluster1Desc.ContainsColumn (3 ));
1146+ ASSERT_TRUE (cluster1Desc.ContainsColumn (4 ));
1147+ EXPECT_FALSE (cluster1Desc.GetColumnRange (0 ).IsSuppressed ());
1148+ EXPECT_FALSE (cluster1Desc.GetColumnRange (1 ).IsSuppressed ());
1149+ EXPECT_FALSE (cluster1Desc.GetColumnRange (2 ).IsSuppressed ());
1150+ EXPECT_TRUE (cluster1Desc.GetColumnRange (3 ).IsSuppressed ());
1151+ EXPECT_TRUE (cluster1Desc.GetColumnRange (4 ).IsSuppressed ());
1152+ EXPECT_EQ (cluster1Desc.GetFirstEntryIndex (), 0 );
1153+ EXPECT_EQ (cluster1Desc.GetNEntries (), 150 );
1154+
1155+ const auto cluster2Id = desc.FindNextClusterId (cluster1Id);
1156+ const auto &cluster2Desc = desc.GetClusterDescriptor (cluster2Id);
1157+ ASSERT_TRUE (cluster2Desc.ContainsColumn (0 ));
1158+ ASSERT_TRUE (cluster2Desc.ContainsColumn (1 ));
1159+ ASSERT_TRUE (cluster2Desc.ContainsColumn (2 ));
1160+ ASSERT_TRUE (cluster2Desc.ContainsColumn (3 ));
1161+ ASSERT_TRUE (cluster2Desc.ContainsColumn (4 ));
1162+ EXPECT_FALSE (cluster2Desc.GetColumnRange (0 ).IsSuppressed ());
1163+ EXPECT_TRUE (cluster2Desc.GetColumnRange (1 ).IsSuppressed ());
1164+ EXPECT_TRUE (cluster2Desc.GetColumnRange (2 ).IsSuppressed ());
1165+ EXPECT_FALSE (cluster2Desc.GetColumnRange (3 ).IsSuppressed ());
1166+ EXPECT_FALSE (cluster2Desc.GetColumnRange (4 ).IsSuppressed ());
1167+ EXPECT_EQ (cluster2Desc.GetFirstEntryIndex (), 150 );
1168+ EXPECT_EQ (cluster2Desc.GetNEntries (), 50 );
1169+ }
0 commit comments