Skip to content

Commit 0c52e4e

Browse files
committed
[ntuple] add GetAttributes methods to RNTupleAttrSetReader
1 parent 1408538 commit 0c52e4e

3 files changed

Lines changed: 292 additions & 2 deletions

File tree

tree/ntuple/inc/ROOT/RNTupleAttrReading.hxx

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class RNTupleModel;
2525

2626
namespace Experimental {
2727

28+
class RNTupleAttrEntryIterable;
29+
2830
// clang-format off
2931
/**
3032
\class ROOT::Experimental::RNTupleAttrRange
@@ -111,6 +113,7 @@ for (auto idx : attrSet->GetAttributes(10)) {
111113
// clang-format on
112114
class RNTupleAttrSetReader final {
113115
friend class ROOT::RNTupleReader;
116+
friend class RNTupleAttrEntryIterable;
114117

115118
/// List containing pairs { entryRange, entryIndex }, used to quickly find out which entries in the Attribute
116119
/// RNTuple contain entries that overlap a given range. The list is sorted by range start, i.e.
@@ -121,6 +124,8 @@ class RNTupleAttrSetReader final {
121124
/// The reconstructed user model
122125
std::unique_ptr<ROOT::RNTupleModel> fUserModel;
123126

127+
static bool EntryRangesAreSorted(const decltype(fEntryRanges) &ranges);
128+
124129
explicit RNTupleAttrSetReader(std::unique_ptr<RNTupleReader> reader);
125130

126131
public:
@@ -148,6 +153,91 @@ public:
148153

149154
/// Returns the number of all attribute entries in this attribute set.
150155
std::size_t GetNEntries() const { return fEntryRanges.size(); }
156+
157+
/// Returns all the attributes in this Set. The returned attributes are sorted by entry range start.
158+
RNTupleAttrEntryIterable GetAttributes();
159+
/// Returns all the attributes whose range contains index `entryIndex`.
160+
RNTupleAttrEntryIterable GetAttributes(NTupleSize_t entryIndex);
161+
/// Returns all the attributes whose range fully contains `[startEntry, endEntry)`
162+
RNTupleAttrEntryIterable GetAttributesContainingRange(NTupleSize_t startEntry, NTupleSize_t endEntry);
163+
/// Returns all the attributes whose range is fully contained in `[startEntry, endEntry)`
164+
RNTupleAttrEntryIterable GetAttributesInRange(NTupleSize_t startEntry, NTupleSize_t endEntry);
165+
};
166+
167+
// clang-format off
168+
/**
169+
\class ROOT::Experimental::RNTupleAttrEntryIterable
170+
\ingroup NTuple
171+
\brief Iterable class used to loop over attribute entries.
172+
173+
This class allows to perform range-for iteration on some set of attributes, typically returned by the
174+
RNTupleAttrSetReader::GetAttributes family of methods.
175+
176+
See the documentation of RNTupleAttrSetReader for example usage.
177+
*/
178+
// clang-format on
179+
class RNTupleAttrEntryIterable final {
180+
public:
181+
struct RFilter {
182+
RNTupleAttrRange fRange;
183+
bool fIsContained;
184+
};
185+
186+
private:
187+
RNTupleAttrSetReader *fReader = nullptr;
188+
std::optional<RFilter> fFilter;
189+
190+
public:
191+
class RIterator final {
192+
private:
193+
using Iter_t = decltype(std::declval<RNTupleAttrSetReader>().fEntryRanges.begin());
194+
Iter_t fCur, fEnd;
195+
std::optional<RFilter> fFilter;
196+
197+
Iter_t SkipFiltered() const;
198+
bool FullyContained(RNTupleAttrRange range) const;
199+
200+
public:
201+
using iterator_category = std::forward_iterator_tag;
202+
using iterator = RIterator;
203+
using value_type = NTupleSize_t;
204+
using difference_type = std::ptrdiff_t;
205+
using pointer = const value_type *;
206+
using reference = const value_type &;
207+
208+
RIterator(Iter_t iter, Iter_t end, std::optional<RFilter> filter) : fCur(iter), fEnd(end), fFilter(filter)
209+
{
210+
if (fFilter) {
211+
if (fFilter->fRange.GetLength() == 0)
212+
fCur = end;
213+
else
214+
fCur = SkipFiltered();
215+
}
216+
}
217+
iterator operator++()
218+
{
219+
++fCur;
220+
fCur = SkipFiltered();
221+
return *this;
222+
}
223+
iterator operator++(int)
224+
{
225+
iterator it = *this;
226+
operator++();
227+
return it;
228+
}
229+
reference operator*() { return fCur->second; }
230+
bool operator!=(const iterator &rh) const { return !operator==(rh); }
231+
bool operator==(const iterator &rh) const { return fCur == rh.fCur; }
232+
};
233+
234+
explicit RNTupleAttrEntryIterable(RNTupleAttrSetReader &reader, std::optional<RFilter> filter = {})
235+
: fReader(&reader), fFilter(filter)
236+
{
237+
}
238+
239+
RIterator begin() { return RIterator{fReader->fEntryRanges.begin(), fReader->fEntryRanges.end(), fFilter}; }
240+
RIterator end() { return RIterator{fReader->fEntryRanges.end(), fReader->fEntryRanges.end(), fFilter}; }
151241
};
152242

153243
} // namespace Experimental

tree/ntuple/src/RNTupleAttrReading.cxx

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,104 @@ std::unique_ptr<ROOT::REntry> ROOT::Experimental::RNTupleAttrSetReader::CreateEn
102102
{
103103
return fUserModel->CreateEntry();
104104
}
105+
106+
// Entry ranges should be sorted with respect to GetStart by construction.
107+
bool ROOT::Experimental::RNTupleAttrSetReader::EntryRangesAreSorted(const decltype(fEntryRanges) &ranges)
108+
{
109+
ROOT::NTupleSize_t prevStart = 0;
110+
for (const auto &[range, _] : ranges) {
111+
if (range.GetStart() < prevStart)
112+
return false;
113+
prevStart = range.GetStart();
114+
}
115+
return true;
116+
};
117+
118+
ROOT::Experimental::RNTupleAttrEntryIterable
119+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributesContainingRange(NTupleSize_t startEntry, NTupleSize_t endEntry)
120+
{
121+
RNTupleAttrRange range;
122+
if (endEntry <= startEntry) {
123+
R__LOG_WARNING(ROOT::Internal::NTupleLog())
124+
<< "empty range given when getting attributes from Attribute Set '" << GetDescriptor().GetName()
125+
<< "' (range given: [" << startEntry << ", " << endEntry << ")).";
126+
// Make sure we find 0 entries
127+
range = RNTupleAttrRange::FromStartLength(startEntry, 0);
128+
} else {
129+
range = RNTupleAttrRange::FromStartEnd(startEntry, endEntry);
130+
}
131+
RNTupleAttrEntryIterable::RFilter filter{range, false};
132+
return RNTupleAttrEntryIterable{*this, filter};
133+
}
134+
135+
ROOT::Experimental::RNTupleAttrEntryIterable
136+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributesInRange(NTupleSize_t startEntry, NTupleSize_t endEntry)
137+
{
138+
RNTupleAttrRange range;
139+
if (endEntry <= startEntry) {
140+
R__LOG_WARNING(ROOT::Internal::NTupleLog())
141+
<< "empty range given when getting attributes from Attribute Set '" << GetDescriptor().GetName()
142+
<< "' (range given: [" << startEntry << ", " << endEntry << ")).";
143+
// Make sure we find 0 entries
144+
range = RNTupleAttrRange::FromStartLength(startEntry, 0);
145+
} else {
146+
range = RNTupleAttrRange::FromStartEnd(startEntry, endEntry);
147+
}
148+
RNTupleAttrEntryIterable::RFilter filter{range, true};
149+
return RNTupleAttrEntryIterable{*this, filter};
150+
}
151+
152+
ROOT::Experimental::RNTupleAttrEntryIterable
153+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributes(NTupleSize_t entryIndex)
154+
{
155+
RNTupleAttrEntryIterable::RFilter filter{RNTupleAttrRange::FromStartEnd(entryIndex, entryIndex + 1), false};
156+
return RNTupleAttrEntryIterable{*this, filter};
157+
}
158+
159+
ROOT::Experimental::RNTupleAttrEntryIterable ROOT::Experimental::RNTupleAttrSetReader::GetAttributes()
160+
{
161+
return RNTupleAttrEntryIterable{*this};
162+
}
163+
164+
//
165+
// RNTupleAttrEntryIterable
166+
//
167+
bool ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::FullyContained(RNTupleAttrRange range) const
168+
{
169+
assert(fFilter);
170+
if (fFilter->fIsContained) {
171+
return fFilter->fRange.GetStart() <= range.GetStart() && range.GetEnd() <= fFilter->fRange.GetEnd();
172+
} else {
173+
return range.GetStart() <= fFilter->fRange.GetStart() && fFilter->fRange.GetEnd() <= range.GetEnd();
174+
}
175+
}
176+
177+
ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::Iter_t
178+
ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::SkipFiltered() const
179+
{
180+
// If we have no filter, every entry is valid.
181+
if (!fFilter)
182+
return fCur;
183+
184+
// TODO: consider using binary search, since fEntryRanges is sorted
185+
// (maybe it should be done only if the size of the list is bigger than a threshold).
186+
for (auto it = fCur; it != fEnd; ++it) {
187+
const auto &[range, index] = *it;
188+
const auto &firstLast = range.GetFirstLast();
189+
// If this is nullopt it means this is a zero-length entry: we always skip those except
190+
// for the "catch-all" GetAttributes() (which is when fFilter is also nullopt).
191+
if (!firstLast)
192+
continue;
193+
194+
const auto &[first, last] = *firstLast;
195+
if (first >= fFilter->fRange.GetEnd()) {
196+
// Since fEntryRanges is sorted we know we are at the end of the iteration
197+
// TODO: tweak fEnd to directly pass the last entry?
198+
return fEnd;
199+
}
200+
201+
if (FullyContained(RNTupleAttrRange::FromStartEnd(first, last + 1)))
202+
return it;
203+
}
204+
return fEnd;
205+
}

tree/ntuple/test/ntuple_attributes.cxx

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,61 @@ TEST(RNTupleAttributes, BasicReadingWriting)
102102
EXPECT_EQ(ntuple, nullptr);
103103
}
104104

105+
/// Reading
105106
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
106107
EXPECT_EQ(reader->GetDescriptor().GetNAttributeSets(), 1);
107108
for (const auto &attrSetIt : reader->GetDescriptor().GetAttrSetIterable()) {
108109
EXPECT_EQ(attrSetIt.GetName(), "AttrSet1");
109110
}
111+
112+
auto attrSetReader = reader->OpenAttributeSet("AttrSet1");
113+
EXPECT_EQ(attrSetReader->GetNEntries(), 1);
114+
auto pAttr = attrSetReader->GetModel().GetDefaultEntry().GetPtr<std::string>("attr");
115+
{
116+
int nAttrs = 0;
117+
// iterate all attributes
118+
for (auto idx : attrSetReader->GetAttributes()) {
119+
attrSetReader->LoadEntry(idx);
120+
EXPECT_EQ(*pAttr, "My Attribute");
121+
nAttrs += 1;
122+
}
123+
EXPECT_EQ(nAttrs, 1);
124+
}
125+
{
126+
int nAttrs = 0;
127+
// attributes containing entry 99
128+
for (auto idx : attrSetReader->GetAttributes(99)) {
129+
attrSetReader->LoadEntry(idx);
130+
EXPECT_EQ(*pAttr, "My Attribute");
131+
nAttrs += 1;
132+
}
133+
EXPECT_EQ(nAttrs, 1);
134+
}
135+
{
136+
// attributes containing entry 100 (no entry)
137+
auto iter = attrSetReader->GetAttributes(100);
138+
EXPECT_EQ(iter.begin(), iter.end());
139+
}
140+
{
141+
// attributes contained in entry range 50-200 (no entry)
142+
auto iter = attrSetReader->GetAttributesInRange(50, 200);
143+
EXPECT_EQ(iter.begin(), iter.end());
144+
}
145+
{
146+
int nAttrs = 0;
147+
// attributes contained in entry range 0-1000
148+
for (auto idx : attrSetReader->GetAttributesInRange(0, 1000)) {
149+
attrSetReader->LoadEntry(idx);
150+
EXPECT_EQ(*pAttr, "My Attribute");
151+
nAttrs += 1;
152+
}
153+
EXPECT_EQ(nAttrs, 1);
154+
}
155+
{
156+
// attributes containing entry range 200-300 (no entry)
157+
auto iter = attrSetReader->GetAttributesContainingRange(200, 300);
158+
EXPECT_EQ(iter.begin(), iter.end());
159+
}
110160
}
111161

112162
TEST(RNTupleAttributes, BasicWritingWithExplicitEntry)
@@ -201,11 +251,11 @@ TEST(RNTupleAttributes, MultipleSets)
201251

202252
auto attrModel1 = RNTupleModel::Create();
203253
auto pInt1 = attrModel1->MakeField<int>("int");
204-
auto attrSet1 = writer->CreateAttributeSet(attrModel1->Clone(), "MyAttrSet1");
254+
auto attrSet1 = writer->CreateAttributeSet(std::move(attrModel1), "MyAttrSet1");
205255

206256
auto attrModel2 = RNTupleModel::Create();
207257
auto pString2 = attrModel2->MakeField<std::string>("string");
208-
auto attrSet2 = writer->CreateAttributeSet(attrModel2->Clone(), "MyAttrSet2");
258+
auto attrSet2 = writer->CreateAttributeSet(std::move(attrModel2), "MyAttrSet2");
209259

210260
auto attrRange2 = attrSet2->BeginRange();
211261
for (int i = 0; i < 100; ++i) {
@@ -231,6 +281,55 @@ TEST(RNTupleAttributes, MultipleSets)
231281
EXPECT_EQ(attrSetReader1->GetNEntries(), 100);
232282
auto attrSetReader2 = reader->OpenAttributeSet("MyAttrSet2");
233283
EXPECT_EQ(attrSetReader2->GetNEntries(), 1);
284+
285+
auto attrEntry1 = attrSetReader1->CreateEntry();
286+
auto pAttrInt = attrEntry1->GetPtr<int>("int");
287+
auto attrEntry2 = attrSetReader2->CreateEntry();
288+
auto pAttrString = attrEntry2->GetPtr<std::string>("string");
289+
{
290+
int nAttrs = 0;
291+
for (auto idx : attrSetReader1->GetAttributesInRange(0, 1000)) {
292+
auto range = attrSetReader1->LoadEntry(idx, *attrEntry1);
293+
EXPECT_EQ(*pAttrInt, idx);
294+
EXPECT_EQ(range.GetStart(), idx);
295+
EXPECT_EQ(range.GetLength(), 1);
296+
nAttrs += 1;
297+
}
298+
EXPECT_EQ(nAttrs, 100);
299+
}
300+
{
301+
int nAttrs = 0;
302+
for (auto idx : attrSetReader1->GetAttributes(42)) {
303+
auto range = attrSetReader1->LoadEntry(idx, *attrEntry1);
304+
EXPECT_EQ(*pAttrInt, 42);
305+
EXPECT_EQ(range.GetStart(), 42);
306+
EXPECT_EQ(range.GetLength(), 1);
307+
nAttrs += 1;
308+
}
309+
EXPECT_EQ(nAttrs, 1);
310+
}
311+
{
312+
int nAttrs = 0;
313+
for (auto idx : attrSetReader2->GetAttributes()) {
314+
auto range = attrSetReader2->LoadEntry(idx, *attrEntry2);
315+
EXPECT_EQ(*pAttrString, "Run 1");
316+
EXPECT_EQ(range.GetStart(), 0);
317+
EXPECT_EQ(range.GetLength(), 100);
318+
nAttrs += 1;
319+
}
320+
EXPECT_EQ(nAttrs, 1);
321+
}
322+
{
323+
for (auto idx : attrSetReader2->GetAttributes()) {
324+
// Reading into the wrong entry
325+
try {
326+
attrSetReader2->LoadEntry(idx, *attrEntry1);
327+
FAIL() << "reading into an unrelated entry should fail";
328+
} catch (const ROOT::RException &ex) {
329+
EXPECT_THAT(ex.what(), testing::HasSubstr("mismatch between entry and model"));
330+
}
331+
}
332+
}
234333
}
235334

236335
TEST(RNTupleAttributes, AttributeInvalidModel)

0 commit comments

Comments
 (0)