diff --git a/tree/ntuple/inc/ROOT/RPageStorage.hxx b/tree/ntuple/inc/ROOT/RPageStorage.hxx index 5a0723fd0248d..8170d7ef16a10 100644 --- a/tree/ntuple/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorage.hxx @@ -567,7 +567,6 @@ The page source also gives access to the ntuple's metadata. */ // clang-format on class RPageSource : public RPageStorage { -protected: /// Summarizes meta-data necessary to load a certain page. Used by LoadPageFromSummary(). struct RPageSummary { ROOT::DescriptorId_t fClusterId = 0; @@ -626,6 +625,7 @@ public: fDescriptor.IncGeneration(); fLock.unlock(); } + ROOT::RNTupleDescriptor &operator*() const { return fDescriptor; } ROOT::RNTupleDescriptor *operator->() const { return &fDescriptor; } void MoveIn(ROOT::RNTupleDescriptor desc) { fDescriptor = std::move(desc); } }; @@ -705,6 +705,20 @@ private: ROOT::Internal::RPageRef LoadPageFromSummary(ColumnHandle_t columnHandle, const RPageSummary &pageSummary); protected: + /// Holds the uncompressed header and footer + struct RStructureBuffer { + std::unique_ptr fBuffer; ///< single buffer for both header and footer + void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer + void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer + + /// Called at the end of Attach(), i.e. when the header and footer are processed + void Reset() + { + RStructureBuffer empty; + std::swap(empty, *this); + } + }; + /// Default I/O performance counters that get registered in `fMetrics` struct RCounters { ROOT::Experimental::Detail::RNTupleAtomicCounter &fNReadV; @@ -727,16 +741,21 @@ protected: }; std::unique_ptr fCounters; + RStructureBuffer fStructureBuffer; ///< Populated by LoadStructureImpl(), reset at the end of Attach() ROOT::RNTupleReadOptions fOptions; + /// Fills fStructureBuffer with the compressed header and footer virtual void LoadStructureImpl() = 0; /// `LoadStructureImpl()` has been called before `AttachImpl()` is called - virtual ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) = 0; + virtual ROOT::RNTupleDescriptor AttachImpl() = 0; /// Returns a new, unattached page source for the same data set virtual std::unique_ptr CloneImpl() const = 0; // Only called if a task scheduler is set. No-op be default. virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster); + // Loads a page list into the provided buffer. The buffer parameter needs to point to a memory region large enough + // to hold the compressed page list. + virtual void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) = 0; // Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size // is already initialized. virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0; diff --git a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx index 4f2d525bbb7ad..2b6e7fdc868e0 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx @@ -155,13 +155,15 @@ private: /// A URI to a DAOS pool of the form 'daos://pool-label/container-label' std::string fURI; + RDaosNTupleAnchor fAnchor; ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder; + void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) final; void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final; protected: - void LoadStructureImpl() final {} - ROOT::RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode) final; + void LoadStructureImpl() final; + ROOT::RNTupleDescriptor AttachImpl() final; /// The cloned page source creates a new connection to the pool/container. std::unique_ptr CloneImpl() const final; diff --git a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx index abc0c8bf2333f..7bbf33efdbe9b 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx @@ -125,20 +125,6 @@ class RPageSourceFile : public RPageSource { friend class ROOT::RNTuple; private: - /// Holds the uncompressed header and footer - struct RStructureBuffer { - std::unique_ptr fBuffer; ///< single buffer for both header and footer - void *fPtrHeader = nullptr; ///< either nullptr or points into fBuffer - void *fPtrFooter = nullptr; ///< either nullptr or points into fBuffer - - /// Called at the end of Attach(), i.e. when the header and footer are processed - void Reset() - { - RStructureBuffer empty; - std::swap(empty, *this); - } - }; - /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name std::optional fAnchor; /// The last cluster from which a page got loaded. Points into fClusterPool->fPool @@ -149,8 +135,6 @@ private: ROOT::Internal::RMiniFileReader fReader; /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor RNTupleDescriptorBuilder fDescriptorBuilder; - /// Populated by LoadStructureImpl(), reset at the end of Attach() - RStructureBuffer fStructureBuffer; /// Tracks the last read offset for seek distance calculation std::uint64_t fLastOffset = 0; @@ -176,10 +160,11 @@ private: protected: void LoadStructureImpl() final; - ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final; + ROOT::RNTupleDescriptor AttachImpl() final; /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data. std::unique_ptr CloneImpl() const final; + void LoadPageListImpl(const RNTupleLocator &locator, void *buffer) final; void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final; public: diff --git a/tree/ntuple/src/RPageStorage.cxx b/tree/ntuple/src/RPageStorage.cxx index ee3269b2be9a6..fbe5109d5caf1 100644 --- a/tree/ntuple/src/RPageStorage.cxx +++ b/tree/ntuple/src/RPageStorage.cxx @@ -226,9 +226,27 @@ void ROOT::Internal::RPageSource::LoadStructure() void ROOT::Internal::RPageSource::Attach(RNTupleSerializer::EDescriptorDeserializeMode mode) { + if (fIsAttached) + return; + LoadStructure(); - if (!fIsAttached) - GetExclDescriptorGuard().MoveIn(AttachImpl(mode)); + + auto descGuard = GetExclDescriptorGuard(); + descGuard.MoveIn(AttachImpl()); + fStructureBuffer.Reset(); + + std::vector buffer; + for (const auto &cgDesc : descGuard->GetClusterGroupIterable()) { + buffer.resize(cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage()); + auto zipBuffer = buffer.data() + cgDesc.GetPageListLength(); + + LoadPageListImpl(cgDesc.GetPageListLocator(), zipBuffer); + RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(), + cgDesc.GetPageListLength(), buffer.data()); + RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), *descGuard, + mode); + } + fIsAttached = true; } diff --git a/tree/ntuple/src/RPageStorageDaos.cxx b/tree/ntuple/src/RPageStorageDaos.cxx index 84ac2972c53c2..52f20c9669c45 100644 --- a/tree/ntuple/src/RPageStorageDaos.cxx +++ b/tree/ntuple/src/RPageStorageDaos.cxx @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -129,7 +130,7 @@ struct RDaosContainerNTupleLocator { int InitNTupleDescriptorBuilder(ROOT::Experimental::Internal::RDaosContainer &cont, ROOT::Internal::RNTupleDescriptorBuilder &builder) { - std::unique_ptr buffer, zipBuffer; + std::unique_ptr buffer; auto &anchor = fAnchor.emplace(); int err; @@ -146,22 +147,7 @@ struct RDaosContainerNTupleLocator { builder.SetVersion(anchor.fVersionEpoch, anchor.fVersionMajor, anchor.fVersionMinor, anchor.fVersionPatch); builder.SetOnDiskHeaderSize(anchor.fNBytesHeader); - buffer = MakeUninitArray(anchor.fLenHeader); - zipBuffer = MakeUninitArray(anchor.fNBytesHeader); - if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesHeader, oidMetadata, kDistributionKeyDefault, - kAttributeKeyHeader, kCidMetadata))) - return err; - RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesHeader, anchor.fLenHeader, buffer.get()); - RNTupleSerializer::DeserializeHeader(buffer.get(), anchor.fLenHeader, builder); - builder.AddToOnDiskFooterSize(anchor.fNBytesFooter); - buffer = MakeUninitArray(anchor.fLenFooter); - zipBuffer = MakeUninitArray(anchor.fNBytesFooter); - if ((err = cont.ReadSingleAkey(zipBuffer.get(), anchor.fNBytesFooter, oidMetadata, kDistributionKeyDefault, - kAttributeKeyFooter, kCidMetadata))) - return err; - RNTupleDecompressor::Unzip(zipBuffer.get(), anchor.fNBytesFooter, anchor.fLenFooter, buffer.get()); - RNTupleSerializer::DeserializeFooter(buffer.get(), anchor.fLenFooter, builder); return 0; } @@ -174,13 +160,7 @@ struct RDaosContainerNTupleLocator { auto &loc = result.first; auto &builder = result.second; - if (int err = loc.InitNTupleDescriptorBuilder(cont, builder); !err) { - if (ntupleName.empty() || ntupleName != builder.GetDescriptor().GetName()) { - // Hash already taken by a differently-named ntuple. - throw ROOT::RException( - R__FAIL("LocateNTuple: ntuple name '" + ntupleName + "' unavailable in this container.")); - } - } + loc.InitNTupleDescriptorBuilder(cont, builder); return result; } }; @@ -455,40 +435,67 @@ ROOT::Experimental::Internal::RPageSourceDaos::~RPageSourceDaos() StopClusterPoolBackgroundThread(); } -ROOT::RNTupleDescriptor -ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) +void ROOT::Experimental::Internal::RPageSourceDaos::LoadStructureImpl() { - ROOT::RNTupleDescriptor ntplDesc; - std::unique_ptr buffer, zipBuffer; - - auto [locator, descBuilder] = RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName); - if (!locator.IsValid()) + RDaosContainerNTupleLocator ntupleLocator; + std::tie(ntupleLocator, fDescriptorBuilder) = + RDaosContainerNTupleLocator::LocateNTuple(*fDaosContainer, fNTupleName); + if (!ntupleLocator.IsValid()) { throw ROOT::RException( - R__FAIL("Attach: requested ntuple '" + fNTupleName + "' is not present in DAOS container.")); + R__FAIL("LoadStructureImpl: requested ntuple '" + fNTupleName + "' is not present in DAOS container.")); + } + fAnchor = *ntupleLocator.fAnchor; + fNTupleIndex = ntupleLocator.GetIndex(); - auto oclass = RDaosObject::ObjClassId(locator.fAnchor->fObjClass); + auto oclass = RDaosObject::ObjClassId(fAnchor.fObjClass); if (oclass.IsUnknown()) - throw ROOT::RException(R__FAIL("Attach: unknown object class " + locator.fAnchor->fObjClass)); - + throw ROOT::RException(R__FAIL("LoadStructureImpl: unknown object class " + fAnchor.fObjClass)); fDaosContainer->SetDefaultObjectClass(oclass); - fNTupleIndex = locator.GetIndex(); - daos_obj_id_t oidPageList{kOidLowPageList, static_cast(fNTupleIndex)}; - auto desc = descBuilder.MoveDescriptor(); + // Reserve enough space for the compressed and the uncompressed header/footer (see AttachImpl) + const auto bufSize = + fAnchor.fNBytesHeader + fAnchor.fNBytesFooter + std::max(fAnchor.fLenHeader, fAnchor.fLenFooter); + fStructureBuffer.fBuffer = MakeUninitArray(bufSize); + fStructureBuffer.fPtrHeader = fStructureBuffer.fBuffer.get(); + fStructureBuffer.fPtrFooter = fStructureBuffer.fBuffer.get() + fAnchor.fNBytesHeader; + + int err; + daos_obj_id_t oidMetadata{kOidLowMetadata, static_cast(fNTupleIndex)}; + + if ((err = fDaosContainer->ReadSingleAkey(fStructureBuffer.fPtrHeader, fAnchor.fNBytesHeader, oidMetadata, + kDistributionKeyDefault, kAttributeKeyHeader, kCidMetadata))) { + throw ROOT::RException(R__FAIL("LoadStructureImpl: cannot load header: " + std::to_string(err))); + } + + if ((err = fDaosContainer->ReadSingleAkey(fStructureBuffer.fPtrFooter, fAnchor.fNBytesFooter, oidMetadata, + kDistributionKeyDefault, kAttributeKeyFooter, kCidMetadata))) { + throw ROOT::RException(R__FAIL("LoadStructureImpl: cannot load footer: " + std::to_string(err))); + } +} + +ROOT::RNTupleDescriptor ROOT::Experimental::Internal::RPageSourceDaos::AttachImpl() +{ + auto unzipBuf = reinterpret_cast(fStructureBuffer.fPtrFooter) + fAnchor.fNBytesFooter; - for (const auto &cgDesc : desc.GetClusterGroupIterable()) { - buffer = MakeUninitArray(cgDesc.GetPageListLength()); - zipBuffer = MakeUninitArray(cgDesc.GetPageListLocator().GetNBytesOnStorage()); - fDaosContainer->ReadSingleAkey( - zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault, - cgDesc.GetPageListLocator().GetPosition().GetLocation(), kCidMetadata); - RNTupleDecompressor::Unzip(zipBuffer.get(), cgDesc.GetPageListLocator().GetNBytesOnStorage(), - cgDesc.GetPageListLength(), buffer.get()); + RNTupleDecompressor::Unzip(fStructureBuffer.fPtrHeader, fAnchor.fNBytesHeader, fAnchor.fLenHeader, unzipBuf); + RNTupleSerializer::DeserializeHeader(unzipBuf, fAnchor.fLenHeader, fDescriptorBuilder); - RNTupleSerializer::DeserializePageList(buffer.get(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode); + RNTupleDecompressor::Unzip(fStructureBuffer.fPtrFooter, fAnchor.fNBytesFooter, fAnchor.fLenFooter, unzipBuf); + RNTupleSerializer::DeserializeFooter(unzipBuf, fAnchor.fLenFooter, fDescriptorBuilder); + + if (fDescriptorBuilder.GetDescriptor().GetName() != fNTupleName) { + // Hash already taken by a differently-named ntuple. + throw ROOT::RException(R__FAIL("LocateNTuple: ntuple name '" + fNTupleName + "' unavailable in this container.")); } - return desc; + return fDescriptorBuilder.MoveDescriptor(); +} + +void ROOT::Experimental::Internal::RPageSourceDaos::LoadPageListImpl(const RNTupleLocator &locator, void *buffer) +{ + daos_obj_id_t oidPageList{kOidLowPageList, static_cast(fNTupleIndex)}; + fDaosContainer->ReadSingleAkey(buffer, locator.GetNBytesOnStorage(), oidPageList, kDistributionKeyDefault, + locator.GetPosition().GetLocation(), kCidMetadata); } std::string ROOT::Experimental::Internal::RPageSourceDaos::GetObjectClass() const diff --git a/tree/ntuple/src/RPageStorageFile.cxx b/tree/ntuple/src/RPageStorageFile.cxx index aad35a36cea88..550657ebe157c 100644 --- a/tree/ntuple/src/RPageStorageFile.cxx +++ b/tree/ntuple/src/RPageStorageFile.cxx @@ -477,7 +477,7 @@ void ROOT::Internal::RPageSourceFile::LoadStructureImpl() } } -ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) +ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl() { auto unzipBuf = reinterpret_cast(fStructureBuffer.fPtrFooter) + fAnchor->GetNBytesFooter(); @@ -489,25 +489,10 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria unzipBuf); RNTupleSerializer::DeserializeFooter(unzipBuf, fAnchor->GetLenFooter(), fDescriptorBuilder); - auto desc = fDescriptorBuilder.MoveDescriptor(); - // fNTupleName is empty if and only if we created this source via CreateFromAnchor. If that's the case, this is the // earliest we can set the name. if (fNTupleName.empty()) - fNTupleName = desc.GetName(); - - std::vector buffer; - for (const auto &cgDesc : desc.GetClusterGroupIterable()) { - buffer.resize(std::max(buffer.size(), - cgDesc.GetPageListLength() + cgDesc.GetPageListLocator().GetNBytesOnStorage())); - auto *zipBuffer = buffer.data() + cgDesc.GetPageListLength(); - fReader.ReadBuffer(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(), - cgDesc.GetPageListLocator().GetPosition()); - RNTupleDecompressor::Unzip(zipBuffer, cgDesc.GetPageListLocator().GetNBytesOnStorage(), - cgDesc.GetPageListLength(), buffer.data()); - - RNTupleSerializer::DeserializePageList(buffer.data(), cgDesc.GetPageListLength(), cgDesc.GetId(), desc, mode); - } + fNTupleName = fDescriptorBuilder.GetDescriptor().GetName(); // For the page reads, we rely on the I/O scheduler to define the read requests fFile->SetBuffering(false); @@ -515,7 +500,12 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria // Set file size once after buffering is turned off fFileSize = fFile->GetSize(); - return desc; + return fDescriptorBuilder.MoveDescriptor(); +} + +void ROOT::Internal::RPageSourceFile::LoadPageListImpl(const RNTupleLocator &locator, void *buffer) +{ + fReader.ReadBuffer(buffer, locator.GetNBytesOnStorage(), locator.GetPosition()); } void ROOT::Internal::RPageSourceFile::LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) diff --git a/tree/ntuple/test/ntuple_cluster.cxx b/tree/ntuple/test/ntuple_cluster.cxx index 9b321c28318f5..13c3418b598b4 100644 --- a/tree/ntuple/test/ntuple_cluster.cxx +++ b/tree/ntuple/test/ntuple_cluster.cxx @@ -39,8 +39,9 @@ namespace { class RPageSourceMock : public RPageSource { protected: void LoadStructureImpl() final {} - RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); } + RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } + void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {} void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} std::unique_ptr diff --git a/tree/ntuple/test/ntuple_endian.cxx b/tree/ntuple/test/ntuple_endian.cxx index 90fe9d8aaa90d..59b9fecdb1966 100644 --- a/tree/ntuple/test/ntuple_endian.cxx +++ b/tree/ntuple/test/ntuple_endian.cxx @@ -89,11 +89,9 @@ class RPageSourceMock : public RPageSource { protected: void LoadStructureImpl() final {} - RNTupleDescriptor AttachImpl(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode) final - { - return RNTupleDescriptor(); - } + RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } + void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {} void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} diff --git a/tree/ntuple/test/ntuple_pages.cxx b/tree/ntuple/test/ntuple_pages.cxx index aef3c5b4a0708..4ef3f1bd5cecc 100644 --- a/tree/ntuple/test/ntuple_pages.cxx +++ b/tree/ntuple/test/ntuple_pages.cxx @@ -9,8 +9,9 @@ using ROOT::Internal::RPageRef; class RPageSourceMock : public RPageSource { protected: void LoadStructureImpl() final {} - RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode) final { return RNTupleDescriptor(); } + RNTupleDescriptor AttachImpl() final { return RNTupleDescriptor(); } std::unique_ptr CloneImpl() const final { return nullptr; } + void LoadPageListImpl(const ROOT::RNTupleLocator &, void *) final {} void LoadSealedPageImpl(const ROOT::RNTupleLocator &, RSealedPage &) final {} void LoadStreamerInfo() final {} std::unique_ptr