Skip to content

Commit 6d9ddbe

Browse files
JasMehta08jblomer
authored andcommitted
Add computed metrics (randomness, sparseness) to RNTuple
1 parent 5f6891e commit 6d9ddbe

3 files changed

Lines changed: 133 additions & 2 deletions

File tree

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,19 @@ private:
152152
RNTupleDescriptorBuilder fDescriptorBuilder;
153153
/// Populated by LoadStructureImpl(), reset at the end of Attach()
154154
RStructureBuffer fStructureBuffer;
155+
/// Tracks the last read offset for seek distance calculation
156+
std::uint64_t fLastOffset = 0;
157+
158+
/// File-specific I/O performance counters
159+
struct RFileCounters {
160+
ROOT::Experimental::Detail::RNTupleAtomicCounter &fSzSkip;
161+
ROOT::Experimental::Detail::RNTupleCalcPerf &fSzFile;
162+
ROOT::Experimental::Detail::RNTupleCalcPerf &fRandomness;
163+
ROOT::Experimental::Detail::RNTupleCalcPerf &fSparseness;
164+
};
165+
std::unique_ptr<RFileCounters> fFileCounters;
166+
/// Total file size, set once in AttachImpl()
167+
std::int64_t fFileSize = 0;
155168

156169
RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
157170

tree/ntuple/src/RPageStorageFile.cxx

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,20 @@
4242
#include <functional>
4343
#include <mutex>
4444

45+
using ROOT::Experimental::Detail::RNTupleAtomicCounter;
4546
using ROOT::Experimental::Detail::RNTupleAtomicTimer;
47+
using ROOT::Experimental::Detail::RNTupleCalcPerf;
48+
using ROOT::Experimental::Detail::RNTupleMetrics;
49+
using ROOT::Internal::MakeUninitArray;
50+
using ROOT::Internal::RCluster;
51+
using ROOT::Internal::RClusterPool;
52+
using ROOT::Internal::RNTupleCompressor;
53+
using ROOT::Internal::RNTupleDecompressor;
54+
using ROOT::Internal::RNTupleFileWriter;
55+
using ROOT::Internal::RNTupleSerializer;
56+
using ROOT::Internal::ROnDiskPage;
57+
using ROOT::Internal::ROnDiskPageMap;
58+
using ROOT::Internal::RPagePool;
4659

4760
ROOT::Internal::RPageSinkFile::RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options)
4861
: RPagePersistentSink(ntupleName, options)
@@ -315,6 +328,46 @@ ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, co
315328
: RPageSource(ntupleName, opts)
316329
{
317330
EnableDefaultMetrics("RPageSourceFile");
331+
fFileCounters = std::make_unique<RFileCounters>(RFileCounters{
332+
*fMetrics.MakeCounter<RNTupleAtomicCounter *>("szSkip", "B",
333+
"cumulative seek distance (excluding header/footer reads)"),
334+
*fMetrics.MakeCounter<RNTupleCalcPerf *>(
335+
"szFile", "B", "total file size", fMetrics,
336+
[this](const RNTupleMetrics &) -> std::pair<bool, double> {
337+
if (fFileSize > 0)
338+
return {true, static_cast<double>(fFileSize)};
339+
return {false, -1.};
340+
}),
341+
*fMetrics.MakeCounter<RNTupleCalcPerf *>(
342+
"randomness", "",
343+
"ratio of seek distance to bytes read (excluding file structure reads)", fMetrics,
344+
[](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
345+
if (const auto szSkip = metrics.GetLocalCounter("szSkip")) {
346+
if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) {
347+
if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) {
348+
auto totalRead = szReadPayload->GetValueAsInt() + szReadOverhead->GetValueAsInt();
349+
if (totalRead > 0) {
350+
return {true, (1. * szSkip->GetValueAsInt()) / totalRead};
351+
}
352+
}
353+
}
354+
}
355+
return {false, -1.};
356+
}),
357+
*fMetrics.MakeCounter<RNTupleCalcPerf *>(
358+
"sparseness", "",
359+
"ratio of bytes read to total file size (excluding file structure reads)", fMetrics,
360+
[this](const RNTupleMetrics &metrics) -> std::pair<bool, double> {
361+
if (fFileSize > 0) {
362+
if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) {
363+
if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) {
364+
auto totalRead = szReadPayload->GetValueAsInt() + szReadOverhead->GetValueAsInt();
365+
return {true, (1. * totalRead) / fFileSize};
366+
}
367+
}
368+
}
369+
return {false, -1.};
370+
})});
318371
}
319372

320373
ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName,
@@ -460,6 +513,9 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria
460513
// For the page reads, we rely on the I/O scheduler to define the read requests
461514
fFile->SetBuffering(false);
462515

516+
// Set file size once after buffering is turned off
517+
fFileSize = fFile->GetSize();
518+
463519
return desc;
464520
}
465521

@@ -523,8 +579,16 @@ ROOT::Internal::RPageRef ROOT::Internal::RPageSourceFile::LoadPageImpl(ColumnHan
523579
directReadBuffer = MakeUninitArray<unsigned char>(sealedPage.GetBufferSize());
524580
{
525581
RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);
526-
fReader.ReadBuffer(directReadBuffer.get(), sealedPage.GetBufferSize(),
527-
pageInfo.GetLocator().GetPosition<std::uint64_t>());
582+
const auto offset = pageInfo.GetLocator().GetPosition<std::uint64_t>();
583+
// Track seek distance (excluding file structure reads)
584+
R__ASSERT(fFileCounters);
585+
if (fLastOffset != 0) {
586+
const auto distance = static_cast<std::uint64_t>(std::abs(
587+
static_cast<std::int64_t>(offset) - static_cast<std::int64_t>(fLastOffset)));
588+
fFileCounters->fSzSkip.Add(distance);
589+
}
590+
fReader.ReadBuffer(directReadBuffer.get(), sealedPage.GetBufferSize(), offset);
591+
fLastOffset = offset + sealedPage.GetBufferSize();
528592
}
529593
fCounters->fNPageRead.Inc();
530594
fCounters->fNRead.Inc();
@@ -730,6 +794,18 @@ ROOT::Internal::RPageSourceFile::LoadClusters(std::span<RCluster::RKey> clusterK
730794
}
731795
}
732796

797+
// Track seek distance for each read request (excluding file structure reads)
798+
R__ASSERT(fFileCounters);
799+
for (std::size_t i = 0; i < nBatch; ++i) {
800+
const auto offset = readRequests[iReq + i].fOffset;
801+
if (fLastOffset != 0) {
802+
const auto distance = static_cast<std::uint64_t>(std::abs(
803+
static_cast<std::int64_t>(offset) - static_cast<std::int64_t>(fLastOffset)));
804+
fFileCounters->fSzSkip.Add(distance);
805+
}
806+
fLastOffset = offset + readRequests[iReq + i].fSize;
807+
}
808+
733809
if (nBatch <= 1) {
734810
nBatch = 1;
735811
RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead);

tree/ntuple/test/ntuple_metrics.cxx

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,45 @@ TEST(Metrics, RNTupleWriter)
112112
// one page for the int field, one for the float field
113113
EXPECT_EQ(2, page_counter->GetValueAsInt());
114114
}
115+
116+
TEST(Metrics, IOMetrics)
117+
{
118+
FileRaii fileGuard("test_ntuple_io_metrics.root");
119+
120+
{
121+
auto model = RNTupleModel::Create();
122+
auto int_field = model->MakeField<int>("ints");
123+
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());
124+
for (int i = 0; i < 1000; ++i) {
125+
*int_field = i;
126+
ntuple->Fill();
127+
}
128+
ntuple->CommitCluster();
129+
}
130+
131+
{
132+
auto ntupleReader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
133+
EXPECT_FALSE(ntupleReader->GetMetrics().IsEnabled());
134+
ntupleReader->EnableMetrics();
135+
EXPECT_TRUE(ntupleReader->GetMetrics().IsEnabled());
136+
137+
auto view = ntupleReader->GetView<int>("ints");
138+
for (auto i : *ntupleReader) {
139+
(void)view(i);
140+
}
141+
142+
const auto &metrics = ntupleReader->GetMetrics();
143+
auto *randomness = metrics.GetCounter("RNTupleReader.RPageSourceFile.randomness");
144+
auto *sparseness = metrics.GetCounter("RNTupleReader.RPageSourceFile.sparseness");
145+
auto *szSkip = metrics.GetCounter("RNTupleReader.RPageSourceFile.szSkip");
146+
auto *szFile = metrics.GetCounter("RNTupleReader.RPageSourceFile.szFile");
147+
148+
ASSERT_NE(randomness, nullptr);
149+
ASSERT_NE(sparseness, nullptr);
150+
ASSERT_NE(szSkip, nullptr);
151+
ASSERT_NE(szFile, nullptr);
152+
153+
EXPECT_GE(szSkip->GetValueAsInt(), 0);
154+
EXPECT_GT(szFile->GetValueAsInt(), 0);
155+
}
156+
}

0 commit comments

Comments
 (0)