Skip to content

Commit 17ead6e

Browse files
committed
[df] Avoid relying on side-effect in test
This commit touches the RNTupleWithGlobalRanges test in the RDatasetSpec suite with RNTuple. Previously, the test was checking a specific value returned after filtering the RNTuple dataset on specific 'rdfentry_' special column values. RDataFrame does not guarantee any ordering for the 'rdfentry_' column and in particular it does not guarantee correspondence between a specific value and the true global entry index in the input dataset. Now the test is enhanced to instead check that for each different specified global range RDataFrame retrieves the correct values from the dataset and that the 'rdfentry_' column values are unique across the different thread tasks.
1 parent 758c3de commit 17ead6e

1 file changed

Lines changed: 62 additions & 33 deletions

File tree

tree/dataframe/test/dataframe_datasetspec.cxx

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ void EXPECT_VEC_SEQ_EQ(const std::vector<ULong64_t> &vec, const ROOT::TSeq<ULong
3131
EXPECT_EQ(vec[i], seq[i]);
3232
}
3333

34+
void expect_vec_eq(const std::vector<ULong64_t> &a, const std::vector<ULong64_t> &b)
35+
{
36+
ASSERT_EQ(a.size(), b.size());
37+
for (decltype(a.size()) i{}; i < a.size(); i++)
38+
EXPECT_EQ(a[i], b[i]);
39+
}
40+
3441
struct RTestSample {
3542
std::string name;
3643
ULong64_t sampleStart;
@@ -944,50 +951,72 @@ TEST_P(RDatasetSpecTest, RNTupleWithGlobalRanges)
944951
spec.AddSample(samp);
945952
spec.AddSample(samp1);
946953
spec.AddSample(samp2);
947-
auto df1 = ROOT::RDataFrame(spec);
948954

949-
std::vector<RDatasetSpec::REntryRange> goodRanges = {{1, 4}, {2, 7}, {6, 19}, {16, 20}};
950-
951-
auto df_final = df1.Filter("x > 3").Count();
955+
auto is_vector_unique = [](std::vector<ULong64_t> &vec) {
956+
std::sort(vec.begin(), vec.end());
957+
return std::adjacent_find(vec.begin(), vec.end()) == vec.end();
958+
};
952959

960+
auto df = ROOT::RDataFrame(spec);
953961
auto definepersamp =
954-
df1.DefinePerSample("lum", [](unsigned int, const ROOT::RDF::RSampleInfo &id) { return id.GetD("lum"); });
962+
df.DefinePerSample("lum", [](unsigned int, const ROOT::RDF::RSampleInfo &id) { return id.GetD("lum"); });
955963
auto df_filtered = definepersamp.Filter("lum == 10.").Count();
964+
auto df_final = df.Filter("x > 3").Count();
965+
auto df_rdfentry = df.Define("entry", [](ULong64_t entry) { return entry; }, {"rdfentry_"}).Take<ULong64_t>("entry");
966+
EXPECT_EQ(df_filtered.GetValue(), 10);
967+
EXPECT_EQ(df_final.GetValue(), 11);
968+
// rdfentry_ should be a unique number per entry, no guarantee about ordering or alignment with dataset entries
969+
EXPECT_TRUE(is_vector_unique(*df_rdfentry));
970+
971+
std::vector<RDatasetSpec::REntryRange> goodRanges = {{1, 4}, {2, 7}, {6, 19}, {16, 20}};
956972

957-
auto df = RDataFrame(spec.WithGlobalRange(goodRanges[0]));
958-
auto filt = df.Filter("rdfentry_ == 2");
959-
auto result = filt.Take<ULong64_t>("x");
960-
auto res = result.GetValue();
961-
auto count_entries = df.Count().GetValue();
962-
EXPECT_EQ(res[0], 2);
963-
EXPECT_EQ(count_entries, 3);
973+
auto df1 = RDataFrame(spec.WithGlobalRange(goodRanges[0]));
974+
auto rptr_1 = df1.Take<ULong64_t>("x");
975+
auto count_entries_1 = df1.Count();
976+
auto df1_rdfentry =
977+
df1.Define("entry", [](ULong64_t entry) { return entry; }, {"rdfentry_"}).Take<ULong64_t>("entry");
978+
// Entries are processed unordered, sort before comparing with expected values
979+
std::sort(rptr_1->begin(), rptr_1->end());
980+
expect_vec_eq(rptr_1.GetValue(), {0, 1, 2});
981+
EXPECT_EQ(count_entries_1.GetValue(), 3);
982+
// rdfentry_ should be a unique number per entry, no guarantee about ordering or alignment with dataset entries
983+
EXPECT_TRUE(is_vector_unique(*df1_rdfentry));
964984

965985
auto df2 = RDataFrame(spec.WithGlobalRange(goodRanges[1]));
966-
auto filt2 = df2.Filter("rdfentry_ == 3");
967-
auto result2 = filt2.Take<ULong64_t>("x");
968-
auto res2 = result2.GetValue();
969-
auto count_entries_2 = df2.Count().GetValue();
970-
EXPECT_EQ(res2[0], 3);
971-
EXPECT_EQ(count_entries_2, 5);
986+
auto rptr_2 = df2.Take<ULong64_t>("x");
987+
auto count_entries_2 = df2.Count();
988+
auto df2_rdfentry =
989+
df2.Define("entry", [](ULong64_t entry) { return entry; }, {"rdfentry_"}).Take<ULong64_t>("entry");
990+
// Entries are processed unordered, sort before comparing with expected values
991+
std::sort(rptr_2->begin(), rptr_2->end());
992+
expect_vec_eq(rptr_2.GetValue(), {0, 0, 1, 2, 4});
993+
EXPECT_EQ(count_entries_2.GetValue(), 5);
994+
// rdfentry_ should be a unique number per entry, no guarantee about ordering or alignment with dataset entries
995+
EXPECT_TRUE(is_vector_unique(*df2_rdfentry));
972996

973997
auto df3 = RDataFrame(spec.WithGlobalRange(goodRanges[2]));
974-
auto filt3 = df3.Filter("rdfentry_ == 8");
975-
auto result3 = filt3.Take<ULong64_t>("x");
976-
auto res3 = result3.GetValue();
977-
auto count_entries_3 = df3.Count().GetValue();
978-
EXPECT_EQ(res3[0], 12);
979-
EXPECT_EQ(count_entries_3, 13);
998+
auto rptr_3 = df3.Take<ULong64_t>("x");
999+
auto count_entries_3 = df3.Count();
1000+
auto df3_rdfentry =
1001+
df3.Define("entry", [](ULong64_t entry) { return entry; }, {"rdfentry_"}).Take<ULong64_t>("entry");
1002+
// Entries are processed unordered, sort before comparing with expected values
1003+
std::sort(rptr_3->begin(), rptr_3->end());
1004+
expect_vec_eq(rptr_3.GetValue(), {0, 0, 0, 2, 3, 4, 4, 6, 6, 8, 8, 9, 12});
1005+
EXPECT_EQ(count_entries_3.GetValue(), 13);
1006+
// rdfentry_ should be a unique number per entry, no guarantee about ordering or alignment with dataset entries
1007+
EXPECT_TRUE(is_vector_unique(*df3_rdfentry));
9801008

9811009
auto df4 = RDataFrame(spec.WithGlobalRange(goodRanges[3]));
982-
auto filt4 = df4.Filter("rdfentry_ == 19");
983-
auto result4 = filt4.Take<ULong64_t>("x");
984-
auto res4 = result4.GetValue();
985-
auto count_entries_4 = df4.Count().GetValue();
986-
EXPECT_EQ(res4[0], 12);
987-
EXPECT_EQ(count_entries_4, 4);
988-
989-
EXPECT_EQ(df_final.GetValue(), 11);
990-
EXPECT_EQ(df_filtered.GetValue(), 10);
1010+
auto rptr_4 = df4.Take<ULong64_t>("x");
1011+
auto count_entries_4 = df4.Count();
1012+
auto df4_rdfentry =
1013+
df4.Define("entry", [](ULong64_t entry) { return entry; }, {"rdfentry_"}).Take<ULong64_t>("entry");
1014+
// Entries are processed unordered, sort before comparing with expected values
1015+
std::sort(rptr_4->begin(), rptr_4->end());
1016+
expect_vec_eq(rptr_4.GetValue(), {0, 3, 6, 9});
1017+
EXPECT_EQ(count_entries_4.GetValue(), 4);
1018+
// rdfentry_ should be a unique number per entry, no guarantee about ordering or alignment with dataset entries
1019+
EXPECT_TRUE(is_vector_unique(*df4_rdfentry));
9911020
}
9921021

9931022
TEST_P(RDatasetSpecTest, FromSpecRNTuple)

0 commit comments

Comments
 (0)