Skip to content

Commit bbb4d02

Browse files
committed
[ntuple] add RPageSource::LoadPageListImpl()
A new callback that loads and decompressed a particular page list from the backend storage. As a result, the logic to load the page lists and to populate the cluster group descriptor has been moved from the concrete page source implementations to the RPageSource base class. Moving on, this will allow to decouple loading the page lists from attaching the storage.
1 parent 5146032 commit bbb4d02

9 files changed

Lines changed: 56 additions & 46 deletions

tree/ntuple/inc/ROOT/RPageStorage.hxx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ public:
625625
fDescriptor.IncGeneration();
626626
fLock.unlock();
627627
}
628+
ROOT::RNTupleDescriptor &operator*() const { return fDescriptor; }
628629
ROOT::RNTupleDescriptor *operator->() const { return &fDescriptor; }
629630
void MoveIn(ROOT::RNTupleDescriptor desc) { fDescriptor = std::move(desc); }
630631
};
@@ -747,11 +748,15 @@ protected:
747748
/// Fills fStructureBuffer with the compressed header and footer
748749
virtual void LoadStructureImpl() = 0;
749750
/// `LoadStructureImpl()` has been called before `AttachImpl()` is called
750-
virtual ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) = 0;
751+
virtual ROOT::RNTupleDescriptor AttachImpl() = 0;
751752
/// Returns a new, unattached page source for the same data set
752753
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
753754
// Only called if a task scheduler is set. No-op be default.
754755
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster);
756+
// Loads a page list into the provided buffer. The buffer parameter needs to point to a memory region large enough
757+
// to hold the compressed _and_ the uncompressed page list. The pointer returned points to the uncompressed page list
758+
// which will reside in the memory region pointed to by buffer.
759+
virtual void *LoadPageListImpl(const RNTupleLocator &locator, std::uint64_t length, unsigned char *buffer) = 0;
755760
// Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size
756761
// is already initialized.
757762
virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0;

tree/ntuple/inc/ROOT/RPageStorageDaos.hxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,12 @@ private:
158158
RDaosNTupleAnchor fAnchor;
159159
ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder;
160160

161+
void *LoadPageListImpl(const RNTupleLocator &locator, std::uint64_t length, unsigned char *buffer) final;
161162
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;
162163

163164
protected:
164165
void LoadStructureImpl() final;
165-
ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) final;
166+
ROOT::RNTupleDescriptor AttachImpl() final;
166167
/// The cloned page source creates a new connection to the pool/container.
167168
std::unique_ptr<RPageSource> CloneImpl() const final;
168169

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,11 @@ private:
160160

161161
protected:
162162
void LoadStructureImpl() final;
163-
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
163+
ROOT::RNTupleDescriptor AttachImpl() final;
164164
/// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
165165
std::unique_ptr<RPageSource> CloneImpl() const final;
166166

167+
void *LoadPageListImpl(const RNTupleLocator &locator, std::uint64_t length, unsigned char *buffer) final;
167168
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;
168169

169170
public:

tree/ntuple/src/RPageStorage.cxx

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,23 @@ void ROOT::Internal::RPageSource::LoadStructure()
226226

227227
void ROOT::Internal::RPageSource::Attach(RNTupleSerializer::EDescriptorDeserializeMode mode)
228228
{
229+
if (fIsAttached)
230+
return;
231+
229232
LoadStructure();
230-
if (!fIsAttached) {
231-
GetExclDescriptorGuard().MoveIn(AttachImpl(mode));
232-
fStructureBuffer.Reset();
233+
234+
auto descGuard = GetExclDescriptorGuard();
235+
descGuard.MoveIn(AttachImpl());
236+
fStructureBuffer.Reset();
237+
238+
std::vector<unsigned char> buffer;
239+
for (const auto &cgDesc : descGuard->GetClusterGroupIterable()) {
240+
buffer.resize(cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage());
241+
242+
auto ptrPageList = LoadPageListImpl(cgDesc.GetPageListLocator(), cgDesc.GetPageListLength(), buffer.data());
243+
RNTupleSerializer::DeserializePageList(ptrPageList, cgDesc.GetPageListLength(), cgDesc.GetId(), *descGuard, mode);
233244
}
245+
234246
fIsAttached = true;
235247
}
236248

tree/ntuple/src/RPageStorageDaos.cxx

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ void ROOT::Experimental::Internal::RPageSourceDaos::LoadStructureImpl()
473473
}
474474

475475
ROOT::RNTupleDescriptor
476-
ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
476+
ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl()
477477
{
478478
auto unzipBuf = reinterpret_cast<unsigned char *>(fStructureBuffer.fPtrFooter) + fAnchor.fNBytesFooter;
479479

@@ -488,23 +488,19 @@ ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDe
488488
throw ROOT::RException(R__FAIL("LocateNTuple: ntuple name '" + fNTupleName + "' unavailable in this container."));
489489
}
490490

491-
auto desc = fDescriptorBuilder.MoveDescriptor();
491+
return fDescriptorBuilder.MoveDescriptor();
492+
}
492493

493-
std::unique_ptr<unsigned char[]> buffer, zipBuffer;
494+
void *
495+
ROOT::Experimental::Internal::RPageSourceDaos::LoadPageListImpl(const RNTupleLocator &locator, std::uint64_t length,
496+
unsigned char *buffer)
497+
{
494498
daos_obj_id_t oidPageList{kOidLowPageList, static_cast<decltype(daos_obj_id_t::hi)>(fNTupleIndex)};
495-
for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
496-
buffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLength());
497-
zipBuffer = MakeUninitArray<unsigned char>(cgDesc.GetPageListLocator().GetNBytesOnStorage());
498-
fDaosContainer->ReadSingleAkey(
499-
zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
500-
cgDesc.GetPageListLocator().GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
501-
RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(),
502-
cgDesc.GetPageListLength(), buffer.get());
503-
504-
RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
505-
}
506-
507-
return desc;
499+
auto *zipBuffer = buffer + length;
500+
fDaosContainer->ReadSingleAkey(zipBuffer, locator.GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault,
501+
locator.GetPosition<RNTupleLocatorObject64>().GetLocation(), kCidMetadata);
502+
RNTupleDecompressor::Unzip(zipBuffer, locator.GetNBytesOnStorage(), length, buffer);
503+
return buffer;
508504
}
509505

510506
std::string ROOT::Experimental::Internal::RPageSourceDaos::GetObjectClass() const

tree/ntuple/src/RPageStorageFile.cxx

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ void ROOT::Internal::RPageSourceFile::LoadStructureImpl()
477477
}
478478
}
479479

480-
ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode)
480+
ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl()
481481
{
482482
auto unzipBuf = reinterpret_cast<unsigned char *>(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter();
483483

@@ -489,33 +489,28 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria
489489
unzipBuf);
490490
RNTupleSerializer::DeserializeFooter(unzipBuf, fAnchor->GetLenFooter(), fDescriptorBuilder);
491491

492-
auto desc = fDescriptorBuilder.MoveDescriptor();
493-
494492
// fNTupleName is empty if and only if we created this source via CreateFromAnchor. If that's the case, this is the
495493
// earliest we can set the name.
496494
if (fNTupleName.empty())
497-
fNTupleName = desc.GetName();
498-
499-
std::vector<unsigned char> buffer;
500-
for (const auto &cgDesc : desc.GetClusterGroupIterable()) {
501-
buffer.resize(std::max<size_t>(buffer.size(),
502-
cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage()));
503-
auto *zipBuffer = buffer.data() + cgDesc.GetPageListLength();
504-
fReader.ReadBuffer(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
505-
cgDesc.GetPageListLocator().GetPosition<std::uint64_t>());
506-
RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(),
507-
cgDesc.GetPageListLength(), buffer.data());
508-
509-
RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode);
510-
}
495+
fNTupleName = fDescriptorBuilder.GetDescriptor().GetName();
511496

512497
// For the page reads, we rely on the I/O scheduler to define the read requests
513498
fFile->SetBuffering(false);
514499

515500
// Set file size once after buffering is turned off
516501
fFileSize = fFile->GetSize();
517502

518-
return desc;
503+
return fDescriptorBuilder.MoveDescriptor();
504+
}
505+
506+
void *
507+
ROOT::Internal::RPageSourceFile::LoadPageListImpl(const RNTupleLocator &locator, std::uint64_t length,
508+
unsigned char *buffer)
509+
{
510+
auto *zipBuffer = buffer + length;
511+
fReader.ReadBuffer(zipBuffer, locator.GetNBytesOnStorage(), locator.GetPosition<std::uint64_t>());
512+
RNTupleDecompressor::Unzip(zipBuffer, locator.GetNBytesOnStorage(), length, buffer);
513+
return buffer;
519514
}
520515

521516
void ROOT::Internal::RPageSourceFile::LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage)

tree/ntuple/test/ntuple_cluster.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ namespace {
3939
class RPageSourceMock : public RPageSource {
4040
protected:
4141
void LoadStructureImpl() final {}
42-
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); }
42+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
4343
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
44+
void *LoadPageListImpl(const ROOT::RNTupleLocator &, std::uint64_t, unsigned char *) final { return nullptr; }
4445
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
4546
void LoadStreamerInfo() final {}
4647
std::unique_ptr<ROOT::Internal::RPageSource>

tree/ntuple/test/ntuple_endian.cxx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,9 @@ class RPageSourceMock : public RPageSource {
8989

9090
protected:
9191
void LoadStructureImpl() final {}
92-
RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode) final
93-
{
94-
return RNTupleDescriptor();
95-
}
92+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
9693
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
94+
void *LoadPageListImpl(const ROOT::RNTupleLocator &, std::uint64_t, unsigned char *) final { return nullptr; }
9795
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
9896
void LoadStreamerInfo() final {}
9997

tree/ntuple/test/ntuple_pages.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ using ROOT::Internal::RPageRef;
99
class RPageSourceMock : public RPageSource {
1010
protected:
1111
void LoadStructureImpl() final {}
12-
RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); }
12+
RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); }
1313
std::unique_ptr<RPageSource> CloneImpl() const final { return nullptr; }
14+
void *LoadPageListImpl(const ROOT::RNTupleLocator &, std::uint64_t, unsigned char *) final { return nullptr; }
1415
void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {}
1516
void LoadStreamerInfo() final {}
1617
std::unique_ptr<ROOT::Internal::RPageSource>

0 commit comments

Comments
 (0)