Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions tree/ntuple/inc/ROOT/RPageStorage.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,16 @@ The page source also gives access to the ntuple's metadata.
*/
// clang-format on
class RPageSource : public RPageStorage {
protected:
/// Summarizes meta-data necessary to load a certain page. Used by LoadPageImpl().
struct RPageSummary {
ROOT::DescriptorId_t fClusterId = 0;
/// The first element number of the page's column in the given cluster
std::uint64_t fColumnOffset = 0;
/// Location of the page on disk
ROOT::RClusterDescriptor::RPageInfoExtended fPageInfo;
};

public:
/// Used in SetEntryRange / GetEntryRange
struct REntryRange {
Expand Down Expand Up @@ -640,6 +650,9 @@ private:
/// Must not be called when the descriptor guard is taken.
void UpdateLastUsedCluster(ROOT::DescriptorId_t clusterId);

// Common treatment of zero pages that would otherwise need to be handled in LoadPageImpl()
ROOT::Internal::RPageRef LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary);

protected:
/// Default I/O performance counters that get registered in `fMetrics`
struct RCounters {
Expand Down Expand Up @@ -693,16 +706,6 @@ protected:
}
};

/// Summarizes cluster-level information that are necessary to load a certain page.
/// Used by LoadPageImpl().
struct RClusterInfo {
ROOT::DescriptorId_t fClusterId = 0;
/// Location of the page on disk
ROOT::RClusterDescriptor::RPageInfoExtended fPageInfo;
/// The first element number of the page's column in the given cluster
std::uint64_t fColumnOffset = 0;
};

std::unique_ptr<RCounters> fCounters;

ROOT::RNTupleReadOptions fOptions;
Expand All @@ -726,9 +729,11 @@ protected:
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
// Only called if a task scheduler is set. No-op be default.
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster);
// Returns a page from storage if not found in the page pool. Should be able to handle zero page locators.
virtual ROOT::Internal::RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) = 0;
// Returns a page from storage if not found in the page pool. Will never receive requests for zero pages.
virtual ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) = 0;
// Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size
// is already initialized.
virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0;

/// Prepare a page range read for the column set in `clusterKey`. Specifically, pages referencing the
/// `kTypePageZero` locator are filled in `pageZeroMap`; otherwise, `perPageFunc` is called for each page. This is
Expand Down Expand Up @@ -816,8 +821,7 @@ public:
/// The `fSize` and `fNElements` member of the sealedPage parameters are always set. If `sealedPage.fBuffer` is
/// `nullptr`, no data will be copied but the returned size information can be used by the caller to allocate a large
/// enough buffer and call `LoadSealedPage` again.
virtual void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) = 0;
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage);

/// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
/// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
Expand Down
7 changes: 2 additions & 5 deletions tree/ntuple/inc/ROOT/RPageStorageDaos.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ private:

ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder;

ROOT::Internal::RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
ROOT::Internal::RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final;
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;

protected:
void LoadStructureImpl() final {}
Expand All @@ -172,9 +172,6 @@ public:
RPageSourceDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleReadOptions &options);
~RPageSourceDaos() override;

void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;

Expand Down
7 changes: 2 additions & 5 deletions tree/ntuple/inc/ROOT/RPageStorageFile.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ protected:
/// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
std::unique_ptr<RPageSource> CloneImpl() const final;

RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
RPageRef LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final;
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;

public:
RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
Expand All @@ -201,9 +201,6 @@ public:
std::unique_ptr<RPageSource> OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink,
const ROOT::RNTupleReadOptions &options = {}) final;

void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;

Expand Down
91 changes: 71 additions & 20 deletions tree/ntuple/src/RPageStorage.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,54 @@ void ROOT::Internal::RPageSource::UpdateLastUsedCluster(ROOT::DescriptorId_t clu
fLastUsedCluster = clusterId;
}

void ROOT::Internal::RPageSource::LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex,
RSealedPage &sealedPage)
{
const auto clusterId = localIndex.GetClusterId();

ROOT::RClusterDescriptor::RPageInfo pageInfo;
{
auto descriptorGuard = GetSharedDescriptorGuard();
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
pageInfo = clusterDescriptor.GetPageRange(physicalColumnId).Find(localIndex.GetIndexInCluster());
}

sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
sealedPage.SetNElements(pageInfo.GetNElements());
sealedPage.SetHasChecksum(pageInfo.HasChecksum());

if (!sealedPage.GetBuffer())
return;

if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
assert(!pageInfo.HasChecksum());
memcpy(const_cast<void *>(sealedPage.GetBuffer()), ROOT::Internal::RPage::GetPageZeroBuffer(),
sealedPage.GetBufferSize());
return;
}
Comment on lines +414 to +419

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we ever need to load a sealed zero page? According to the coverage information, this code is never executed...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adressed in 3f630db


LoadSealedPageImpl(pageInfo.GetLocator(), sealedPage);
sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary)
{
const auto &pageInfo = pageSummary.fPageInfo;
assert(pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero);

const auto element = columnHandle.fColumn->GetElement();
const auto elementSize = element->GetSize();
const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;

auto pageZero = fPageAllocator->NewPage(elementSize, pageInfo.GetNElements());
pageZero.GrowUnchecked(pageInfo.GetNElements());
std::memset(pageZero.GetBuffer(), 0, pageZero.GetNBytes());
pageZero.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(),
RPage::RClusterInfo(pageSummary.fClusterId, pageSummary.fColumnOffset));
return fPagePool.RegisterPage(std::move(pageZero), RPagePool::RKey{columnHandle.fPhysicalId, elementInMemoryType});
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex)
{
Expand All @@ -404,38 +452,38 @@ ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleS
return cachedPageRef;
}

std::uint64_t idxInCluster;
RClusterInfo clusterInfo;
RPageSummary pageSummary;
{
auto descriptorGuard = GetSharedDescriptorGuard();
clusterInfo.fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);
pageSummary.fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);

if (clusterInfo.fClusterId == ROOT::kInvalidDescriptorId)
if (pageSummary.fClusterId == ROOT::kInvalidDescriptorId)
throw RException(R__FAIL("entry with index " + std::to_string(globalIndex) + " out of bounds"));

const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterInfo.fClusterId);
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(pageSummary.fClusterId);
const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
if (columnRange.IsSuppressed())
return ROOT::Internal::RPageRef();

clusterInfo.fColumnOffset = columnRange.GetFirstElementIndex();
R__ASSERT(clusterInfo.fColumnOffset <= globalIndex);
idxInCluster = globalIndex - clusterInfo.fColumnOffset;
clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
R__ASSERT(pageSummary.fColumnOffset <= globalIndex);
pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(globalIndex - pageSummary.fColumnOffset);
}

if (clusterInfo.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown)
if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) {
throw RException(R__FAIL("tried to read a page with an unknown locator"));
} else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
return LoadZeroPage(columnHandle, pageSummary);
}

UpdateLastUsedCluster(clusterInfo.fClusterId);
return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
UpdateLastUsedCluster(pageSummary.fClusterId);
return LoadPageImpl(columnHandle, pageSummary);
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, RNTupleLocalIndex localIndex)
{
const auto clusterId = localIndex.GetClusterId();
const auto idxInCluster = localIndex.GetIndexInCluster();
const auto columnId = columnHandle.fPhysicalId;
const auto columnElementId = columnHandle.fColumn->GetElement()->GetIdentifier();
auto cachedPageRef =
Expand All @@ -448,24 +496,27 @@ ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, RNTupleLocalI
if (clusterId == kInvalidDescriptorId)
throw RException(R__FAIL("entry out of bounds"));

RClusterInfo clusterInfo;
RPageSummary pageSummary;
{
auto descriptorGuard = GetSharedDescriptorGuard();
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
if (columnRange.IsSuppressed())
return ROOT::Internal::RPageRef();

clusterInfo.fClusterId = clusterId;
clusterInfo.fColumnOffset = columnRange.GetFirstElementIndex();
clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
pageSummary.fClusterId = clusterId;
pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(localIndex.GetIndexInCluster());
}

if (clusterInfo.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown)
if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) {
throw RException(R__FAIL("tried to read a page with an unknown locator"));
} else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
return LoadZeroPage(columnHandle, pageSummary);
}

UpdateLastUsedCluster(clusterInfo.fClusterId);
return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
UpdateLastUsedCluster(clusterId);
return LoadPageImpl(columnHandle, pageSummary);
}

void ROOT::Internal::RPageSource::EnableDefaultMetrics(const std::string &prefix)
Expand Down
Loading
Loading