Skip to content

Commit 28e4497

Browse files
committed
[ntuple] add GetAttributes methods to RNTupleAttrSetReader
1 parent 3f5e682 commit 28e4497

File tree

3 files changed

+349
-5
lines changed

3 files changed

+349
-5
lines changed

tree/ntuple/inc/ROOT/RNTupleAttrReading.hxx

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class RNTupleModel;
2222

2323
namespace Experimental {
2424

25+
class RNTupleAttrEntryIterable;
26+
2527
// clang-format off
2628
/**
2729
\class ROOT::Experimental::RNTupleAttrRange
@@ -110,6 +112,7 @@ for (auto idx : attrSet->GetAttributes(10)) {
110112
// clang-format on
111113
class RNTupleAttrSetReader final {
112114
friend class ROOT::RNTupleReader;
115+
friend class RNTupleAttrEntryIterable;
113116

114117
/// List containing pairs { entryRange, entryIndex }, used to quickly find out which entries in the Attribute
115118
/// RNTuple contain entries that overlap a given range. The list is sorted by range start, i.e.
@@ -120,8 +123,13 @@ class RNTupleAttrSetReader final {
120123
/// The reconstructed user model
121124
std::unique_ptr<ROOT::RNTupleModel> fUserModel;
122125

126+
static bool EntryRangesAreSorted(const decltype(fEntryRanges) &ranges);
127+
123128
explicit RNTupleAttrSetReader(std::unique_ptr<RNTupleReader> reader);
124129

130+
std::vector<ROOT::NTupleSize_t>
131+
GetAttributesRangeInternal(NTupleSize_t startEntry, NTupleSize_t endEntry, bool rangeIsContained);
132+
125133
public:
126134
RNTupleAttrSetReader(const RNTupleAttrSetReader &) = delete;
127135
RNTupleAttrSetReader &operator=(const RNTupleAttrSetReader &) = delete;
@@ -147,6 +155,92 @@ public:
147155

148156
/// Returns the number of all attribute entries in this attribute set.
149157
std::size_t GetNEntries() const { return fEntryRanges.size(); }
158+
159+
/// Returns all the attributes in this Set. The returned attributes are sorted by entry range start.
160+
RNTupleAttrEntryIterable GetAttributes();
161+
/// Returns all the attributes whose range contains index `entryIndex`.
162+
RNTupleAttrEntryIterable GetAttributes(NTupleSize_t entryIndex);
163+
/// Returns all the attributes whose range fully contains `[startEntry, endEntry)`
164+
RNTupleAttrEntryIterable GetAttributesContainingRange(NTupleSize_t startEntry, NTupleSize_t endEntry);
165+
/// Returns all the attributes whose range is fully contained in `[startEntry, endEntry)`
166+
RNTupleAttrEntryIterable GetAttributesInRange(NTupleSize_t startEntry, NTupleSize_t endEntry);
167+
};
168+
169+
// clang-format off
170+
/**
171+
\class ROOT::Experimental::RNTupleAttrEntryIterable
172+
\ingroup NTuple
173+
\brief Iterable class used to loop over attribute entries.
174+
175+
This class allows to perform range-for iteration on some set of attributes, typically returned by the
176+
RNTupleAttrSetReader::GetAttributes family of methods.
177+
178+
See the documentation of RNTupleAttrSetReader for example usage.
179+
*/
180+
// clang-format on
181+
class RNTupleAttrEntryIterable final {
182+
public:
183+
struct RFilter {
184+
RNTupleAttrRange fRange;
185+
bool fIsContained;
186+
};
187+
188+
private:
189+
RNTupleAttrSetReader &fReader;
190+
std::optional<RFilter> fFilter;
191+
192+
public:
193+
class RIterator final {
194+
private:
195+
using Iter_t = decltype(std::declval<RNTupleAttrSetReader>().fEntryRanges.begin());
196+
Iter_t fCur, fEnd;
197+
std::optional<RFilter> fFilter;
198+
199+
Iter_t Next() const;
200+
bool FullyContained(RNTupleAttrRange range) const;
201+
202+
public:
203+
using iterator_category = std::forward_iterator_tag;
204+
using iterator = RIterator;
205+
using value_type = NTupleSize_t;
206+
using difference_type = std::ptrdiff_t;
207+
using pointer = const value_type *;
208+
using reference = const value_type &;
209+
210+
RIterator(Iter_t iter, Iter_t end, std::optional<RFilter> filter) : fCur(iter), fEnd(end), fFilter(filter)
211+
{
212+
if (fFilter) {
213+
if (fFilter->fRange.GetLength() == 0)
214+
fCur = end;
215+
else
216+
fCur = Next();
217+
}
218+
}
219+
iterator operator++()
220+
{
221+
++fCur;
222+
fCur = Next();
223+
return *this;
224+
}
225+
iterator operator++(int)
226+
{
227+
iterator it = *this;
228+
++fCur;
229+
fCur = Next();
230+
return it;
231+
}
232+
reference operator*() { return fCur->second; }
233+
bool operator!=(const iterator &rh) const { return !operator==(rh); }
234+
bool operator==(const iterator &rh) const { return fCur == rh.fCur; }
235+
};
236+
237+
explicit RNTupleAttrEntryIterable(RNTupleAttrSetReader &reader, std::optional<RFilter> filter = {})
238+
: fReader(reader), fFilter(filter)
239+
{
240+
}
241+
242+
RIterator begin() { return RIterator{fReader.fEntryRanges.begin(), fReader.fEntryRanges.end(), fFilter}; }
243+
RIterator end() { return RIterator{fReader.fEntryRanges.end(), fReader.fEntryRanges.end(), fFilter}; }
150244
};
151245

152246
} // namespace Experimental

tree/ntuple/src/RNTupleAttrReading.cxx

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,145 @@ std::unique_ptr<ROOT::REntry> ROOT::Experimental::RNTupleAttrSetReader::CreateEn
8383
return fUserModel->CreateEntry();
8484
}
8585

86+
// Entry ranges should be sorted with respect to GetStart by construction.
87+
bool ROOT::Experimental::RNTupleAttrSetReader::EntryRangesAreSorted(const decltype(fEntryRanges) &ranges)
88+
{
89+
ROOT::NTupleSize_t prevStart = 0;
90+
for (const auto &[range, _] : ranges) {
91+
if (range.GetStart() < prevStart)
92+
return false;
93+
prevStart = range.GetStart();
94+
}
95+
return true;
96+
};
97+
98+
std::vector<ROOT::NTupleSize_t>
99+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributesRangeInternal(NTupleSize_t startEntry, NTupleSize_t endEntry,
100+
bool rangeIsContained)
101+
{
102+
std::vector<ROOT::NTupleSize_t> result;
103+
104+
if (endEntry < startEntry) {
105+
R__LOG_WARNING(ROOT::Internal::NTupleLog())
106+
<< "end < start when getting attributes from Attribute Set '" << GetDescriptor().GetName()
107+
<< "' (range given: [" << startEntry << ", " << endEntry << "].";
108+
return result;
109+
}
110+
111+
assert(EntryRangesAreSorted(fEntryRanges));
112+
113+
const auto FullyContained = [rangeIsContained](auto startInner, auto endInner, auto startOuter, auto endOuter) {
114+
if (rangeIsContained) {
115+
std::swap(startOuter, startInner);
116+
std::swap(endOuter, endInner);
117+
}
118+
return startOuter <= startInner && endInner <= endOuter;
119+
};
120+
121+
// TODO: consider using binary search, since fEntryRanges is sorted
122+
// (maybe it should be done only if the size of the list is bigger than a threshold).
123+
for (const auto &[range, index] : fEntryRanges) {
124+
const auto &firstLast = range.GetFirstLast();
125+
if (!firstLast)
126+
continue;
127+
128+
const auto &[first, last] = *firstLast;
129+
if (first >= endEntry)
130+
break; // We can break here because fEntryRanges is sorted.
131+
132+
if (FullyContained(startEntry, endEntry, first, last + 1)) {
133+
result.push_back(index);
134+
}
135+
}
136+
137+
return result;
138+
}
139+
140+
ROOT::Experimental::RNTupleAttrEntryIterable
141+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributesContainingRange(NTupleSize_t startEntry, NTupleSize_t endEntry)
142+
{
143+
RNTupleAttrRange range;
144+
if (endEntry <= startEntry) {
145+
R__LOG_WARNING(ROOT::Internal::NTupleLog())
146+
<< "empty range given when getting attributes from Attribute Set '" << GetDescriptor().GetName()
147+
<< "' (range given: [" << startEntry << ", " << endEntry << ")).";
148+
// Make sure we find 0 entries
149+
range = RNTupleAttrRange::FromStartLength(startEntry, 0);
150+
} else {
151+
range = RNTupleAttrRange::FromStartEnd(startEntry, endEntry);
152+
}
153+
RNTupleAttrEntryIterable::RFilter filter{range, false};
154+
return RNTupleAttrEntryIterable{*this, filter};
155+
}
156+
157+
ROOT::Experimental::RNTupleAttrEntryIterable
158+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributesInRange(NTupleSize_t startEntry, NTupleSize_t endEntry)
159+
{
160+
RNTupleAttrRange range;
161+
if (endEntry <= startEntry) {
162+
R__LOG_WARNING(ROOT::Internal::NTupleLog())
163+
<< "empty range given when getting attributes from Attribute Set '" << GetDescriptor().GetName()
164+
<< "' (range given: [" << startEntry << ", " << endEntry << ")).";
165+
// Make sure we find 0 entries
166+
range = RNTupleAttrRange::FromStartLength(startEntry, 0);
167+
} else {
168+
range = RNTupleAttrRange::FromStartEnd(startEntry, endEntry);
169+
}
170+
RNTupleAttrEntryIterable::RFilter filter{range, true};
171+
return RNTupleAttrEntryIterable{*this, filter};
172+
}
173+
174+
ROOT::Experimental::RNTupleAttrEntryIterable
175+
ROOT::Experimental::RNTupleAttrSetReader::GetAttributes(NTupleSize_t entryIndex)
176+
{
177+
RNTupleAttrEntryIterable::RFilter filter{RNTupleAttrRange::FromStartEnd(entryIndex, entryIndex + 1), false};
178+
return RNTupleAttrEntryIterable{*this, filter};
179+
}
180+
181+
ROOT::Experimental::RNTupleAttrEntryIterable ROOT::Experimental::RNTupleAttrSetReader::GetAttributes()
182+
{
183+
return RNTupleAttrEntryIterable{*this};
184+
}
185+
186+
//
187+
// RNTupleAttrEntryIterable
188+
//
189+
bool ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::FullyContained(RNTupleAttrRange range) const
190+
{
191+
assert(fFilter);
192+
if (fFilter->fIsContained) {
193+
return fFilter->fRange.GetStart() <= range.GetStart() && range.GetEnd() <= fFilter->fRange.GetEnd();
194+
} else {
195+
return range.GetStart() <= fFilter->fRange.GetStart() && fFilter->fRange.GetEnd() <= range.GetEnd();
196+
}
197+
}
198+
199+
ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::Iter_t
200+
ROOT::Experimental::RNTupleAttrEntryIterable::RIterator::Next() const
201+
{
202+
// TODO: consider using binary search, since fEntryRanges is sorted
203+
// (maybe it should be done only if the size of the list is bigger than a threshold).
204+
for (auto it = fCur; it != fEnd; ++it) {
205+
const auto &[range, index] = *it;
206+
// If we have no filter, every entry is valid.
207+
if (!fFilter)
208+
return it;
209+
210+
const auto &firstLast = range.GetFirstLast();
211+
// If this is nullopt it means this is a zero-length entry: we always skip those except
212+
// for the "catch-all" GetAttributes() (which is when fFilter is also nullopt).
213+
if (!firstLast)
214+
continue;
215+
216+
const auto &[first, last] = *firstLast;
217+
if (first >= fFilter->fRange.GetEnd()) {
218+
// Since fEntryRanges is sorted we know we are at the end of the iteration
219+
// TODO: tweak fEnd to directly pass the last entry?
220+
return fEnd;
221+
}
222+
223+
if (FullyContained(RNTupleAttrRange::FromStartEnd(first, last + 1)))
224+
return it;
225+
}
226+
return fEnd;
227+
}

0 commit comments

Comments
 (0)