Skip to content

Commit b651605

Browse files
committed
[ntuple] add RPageSource::LoadPageListImpl()
A new callback that loads a particular page list from the backend storage. As a result, the logic to load the page lists and to populate the cluster group descriptor has been moved from the concrete page source implementations to the RPageSource base class. Moving on, this will allow to decouple loading the page lists from attaching the storage.
1 parent 3088d07 commit b651605

9 files changed

Lines changed: 49 additions & 47 deletions

tree/ntuple/inc/ROOT/RPageStorage.hxx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ public:
625625
fDescriptor.IncGeneration();
626626
fLock.unlock();
627627
}
628+
ROOT::RNTupleDescriptor &operator*() const { return fDescriptor; }
628629
ROOT::RNTupleDescriptor *operator->() const { return &fDescriptor; }
629630
void MoveIn(ROOT::RNTupleDescriptor desc) { fDescriptor = std::move(desc); }
630631
};
@@ -747,11 +748,14 @@ protected:
747748
/// Fills fStructureBuffer with the compressed header and footer
748749
virtual void LoadStructureImpl() = 0;
749750
/// `LoadStructureImpl()` has been called before `AttachImpl()` is called
750-
virtual ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) = 0;
751+
virtual ROOT::RNTupleDescriptor AttachImpl() = 0;
751752
/// Returns a new, unattached page source for the same data set
752753
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
753754
// Only called if a task scheduler is set. No-op be default.
754755
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster);
756+
// Loads a page list into the provided buffer. The buffer parameter needs to point to a memory region large enough
757+
// to hold the compressed page list.
758+
virtual void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) = 0;
755759
// Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size
756760
// is already initialized.
757761
virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0;

tree/ntuple/inc/ROOT/RPageStorageDaos.hxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,12 @@ private:
158158
RDaosNTupleAnchor fAnchor;
159159
ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder;
160160

161+
void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) final;
161162
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;
162163

163164
protected:
164165
void LoadStructureImpl() final;
165-
ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) final;
166+
ROOT::RNTupleDescriptor AttachImpl() final;
166167
/// The cloned page source creates a new connection to the pool/container.
167168
std::unique_ptr<RPageSource> CloneImpl() const final;
168169

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,11 @@ private:
160160

161161
protected:
162162
void LoadStructureImpl() final;
163-
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
163+
ROOT::RNTupleDescriptor AttachImpl() final;
164164
/// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
165165
std::unique_ptr<RPageSource> CloneImpl() const final;
166166

167+
void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) final;
167168
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;
168169

169170
public:

tree/ntuple/src/RPageStorage.cxx

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,27 @@ void ROOT::Internal::RPageSource::LoadStructure()
226226

227227
void ROOT::Internal::RPageSource::Attach(RNTupleSerializer::EDescriptorDeserializeMode mode)
228228
{
229+
if (fIsAttached)
230+
return;
231+
229232
LoadStructure();
230-
if (!fIsAttached) {
231-
GetExclDescriptorGuard().MoveIn(AttachImpl(mode));
232-
fStructureBuffer.Reset();
233+
234+
auto descGuard = GetExclDescriptorGuard();
235+
descGuard.MoveIn(AttachImpl());
236+
fStructureBuffer.Reset();
237+
238+
std::vector<unsigned char> buffer;
239+
for (const auto &cgDesc : descGuard->GetClusterGroupIterable()) {
240+
buffer.resize(cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage());
241+
auto zipBuffer = buffer.data() + cgDesc.GetPageListLength();
242+
243+
LoadPageListImpl(cgDesc.GetPageListLocator(), zipBuffer);
244+
RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
245+
cgDesc.GetPageListLength(), buffer.data());
246+
RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), *descGuard,
247+
mode);
233248
}
249+
234250
fIsAttached = true;
235251
}
236252

tree/ntuple/src/RPageStorageDaos.cxx

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -473,8 +473,7 @@ void ROOT::Experimental::Internal::RPageSourceDaos::LoadStructureImpl()
473473
}
474474
}
475475

476-
ROOT::RNTupleDescriptor
477-
ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
476+
ROOT::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl()
478477
{
479478
auto unzipBuf = reinterpret_cast<unsigned char *>(fStructureBuffer.fPtrFooter) + fAnchor.fNBytesFooter;
480479

@@ -489,23 +488,14 @@ ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDe
489488
throw ROOT::RException(R__FAIL("LocateNTuple: ntuple name '" + fNTupleName + "' unavailable in this container."));
490489
}
491490

492-
auto desc = fDescriptorBuilder.MoveDescriptor();
491+
return fDescriptorBuilder.MoveDescriptor();
492+
}
493493

494-
std::unique_ptr<unsigned char[]> buffer, zipBuffer;
494+
void ROOT::Experimental::Internal::RPageSourceDaos::LoadPageListImpl(const RNTupleLocator &locator, void *buffer)
495+
{
495496
daos_obj_id_t oidPageList{kOidLowPageList, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)};
496-
for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
497-
buffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLength());
498-
zipBuffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLocator().GetNBytesOnStorage());
499-
fDaosContainer->ReadSingleAkey(
500-
zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
501-
cgDesc.GetPageListLocator().GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
502-
RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(),
503-
cgDesc.GetPageListLength(), buffer.get());
504-
505-
RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
506-
}
507-
508-
return desc;
497+
fDaosContainer->ReadSingleAkey(buffer, locator.GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
498+
locator.GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
509499
}
510500

511501
std::string ROOT::Experimental::Internal::RPageSourceDaos::GetObjectClass() const

tree/ntuple/src/RPageStorageFile.cxx

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ void ROOT::Internal::RPageSourceFile::LoadStructureImpl()
477477
}
478478
}
479479

480-
ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
480+
ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl()
481481
{
482482
auto unzipBuf = reinterpret_cast<unsigned char *>(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter();
483483

@@ -489,33 +489,23 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria
489489
unzipBuf);
490490
RNTupleSerializer::DeserializeFooter(unzipBuf, fAnchor->GetLenFooter(), fDescriptorBuilder);
491491

492-
auto desc = fDescriptorBuilder.MoveDescriptor();
493-
494492
// fNTupleName is empty if and only if we created this source via CreateFromAnchor. If that's the case, this is the
495493
// earliest we can set the name.
496494
if (fNTupleName.empty())
497-
fNTupleName = desc.GetName();
498-
499-
std::vector<unsigned char> buffer;
500-
for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
501-
buffer.resize(std::max<size_t>(buffer.size(),
502-
cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage()));
503-
auto *zipBuffer = buffer.data() + cgDesc.GetPageListLength();
504-
fReader.ReadBuffer(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
505-
cgDesc.GetPageListLocator().GetPosition<std::uint64_t>());
506-
RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
507-
cgDesc.GetPageListLength(), buffer.data());
508-
509-
RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
510-
}
495+
fNTupleName = fDescriptorBuilder.GetDescriptor().GetName();
511496

512497
// For the page reads, we rely on the I/O scheduler to define the read requests
513498
fFile->SetBuffering(false);
514499

515500
// Set file size once after buffering is turned off
516501
fFileSize = fFile->GetSize();
517502

518-
return desc;
503+
return fDescriptorBuilder.MoveDescriptor();
504+
}
505+
506+
void ROOT::Internal::RPageSourceFile::LoadPageListImpl(const RNTupleLocator &locator, void *buffer)
507+
{
508+
fReader.ReadBuffer(buffer, locator.GetNBytesOnStorage(), locator.GetPosition<std::uint64_t>());
519509
}
520510

521511
void ROOT::Internal::RPageSourceFile::LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage)

tree/ntuple/test/ntuple_cluster.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ namespace {
3939
class RPageSourceMock : public RPageSource {
4040
protected:
4141
void LoadStructureImpl() final {}
42-
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); }
42+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
4343
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
44+
void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {}
4445
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
4546
void LoadStreamerInfo() final {}
4647
std::unique_ptr<ROOT::Internal::RPageSource>

tree/ntuple/test/ntuple_endian.cxx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,9 @@ class RPageSourceMock : public RPageSource {
8989

9090
protected:
9191
void LoadStructureImpl() final {}
92-
RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode) final
93-
{
94-
return RNTupleDescriptor();
95-
}
92+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
9693
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
94+
void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {}
9795
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
9896
void LoadStreamerInfo() final {}
9997

tree/ntuple/test/ntuple_pages.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ using ROOT::Internal::RPageRef;
99
class RPageSourceMock : public RPageSource {
1010
protected:
1111
void LoadStructureImpl() final {}
12-
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); }
12+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
1313
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
14+
void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {}
1415
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
1516
void LoadStreamerInfo() final {}
1617
std::unique_ptr<ROOT::Internal::RPageSource>

0 commit comments

Comments
 (0)