|
42 | 42 | #include <functional> |
43 | 43 | #include <mutex> |
44 | 44 |
|
| 45 | +using ROOT::Experimental::Detail::RNTupleAtomicCounter; |
45 | 46 | using ROOT::Experimental::Detail::RNTupleAtomicTimer; |
| 47 | +using ROOT::Experimental::Detail::RNTupleCalcPerf; |
| 48 | +using ROOT::Experimental::Detail::RNTupleMetrics; |
| 49 | +using ROOT::Internal::MakeUninitArray; |
| 50 | +using ROOT::Internal::RCluster; |
| 51 | +using ROOT::Internal::RClusterPool; |
| 52 | +using ROOT::Internal::RNTupleCompressor; |
| 53 | +using ROOT::Internal::RNTupleDecompressor; |
| 54 | +using ROOT::Internal::RNTupleFileWriter; |
| 55 | +using ROOT::Internal::RNTupleSerializer; |
| 56 | +using ROOT::Internal::ROnDiskPage; |
| 57 | +using ROOT::Internal::ROnDiskPageMap; |
| 58 | +using ROOT::Internal::RPagePool; |
46 | 59 |
|
47 | 60 | ROOT::Internal::RPageSinkFile::RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options) |
48 | 61 | : RPagePersistentSink(ntupleName, options) |
@@ -315,6 +328,46 @@ ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, co |
315 | 328 | : RPageSource(ntupleName, opts) |
316 | 329 | { |
317 | 330 | EnableDefaultMetrics("RPageSourceFile"); |
| 331 | + fFileCounters = std::make_unique<RFileCounters>(RFileCounters{ |
| 332 | + *fMetrics.MakeCounter<RNTupleAtomicCounter *>("szSkip", "B", |
| 333 | + "cumulative seek distance (excluding header/footer reads)"), |
| 334 | + *fMetrics.MakeCounter<RNTupleCalcPerf *>( |
| 335 | + "szFile", "B", "total file size", fMetrics, |
| 336 | + [this](const RNTupleMetrics &) -> std::pair<bool, double> { |
| 337 | + if (fFileSize > 0) |
| 338 | + return {true, static_cast<double>(fFileSize)}; |
| 339 | + return {false, -1.}; |
| 340 | + }), |
| 341 | + *fMetrics.MakeCounter<RNTupleCalcPerf *>( |
| 342 | + "randomness", "", |
| 343 | + "ratio of seek distance to bytes read (excluding file structure reads)", fMetrics, |
| 344 | + [](const RNTupleMetrics &metrics) -> std::pair<bool, double> { |
| 345 | + if (const auto szSkip = metrics.GetLocalCounter("szSkip")) { |
| 346 | + if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) { |
| 347 | + if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) { |
| 348 | + auto totalRead = szReadPayload->GetValueAsInt() + szReadOverhead->GetValueAsInt(); |
| 349 | + if (totalRead > 0) { |
| 350 | + return {true, (1. * szSkip->GetValueAsInt()) / totalRead}; |
| 351 | + } |
| 352 | + } |
| 353 | + } |
| 354 | + } |
| 355 | + return {false, -1.}; |
| 356 | + }), |
| 357 | + *fMetrics.MakeCounter<RNTupleCalcPerf *>( |
| 358 | + "sparseness", "", |
| 359 | + "ratio of bytes read to total file size (excluding file structure reads)", fMetrics, |
| 360 | + [this](const RNTupleMetrics &metrics) -> std::pair<bool, double> { |
| 361 | + if (fFileSize > 0) { |
| 362 | + if (const auto szReadPayload = metrics.GetLocalCounter("szReadPayload")) { |
| 363 | + if (const auto szReadOverhead = metrics.GetLocalCounter("szReadOverhead")) { |
| 364 | + auto totalRead = szReadPayload->GetValueAsInt() + szReadOverhead->GetValueAsInt(); |
| 365 | + return {true, (1. * totalRead) / fFileSize}; |
| 366 | + } |
| 367 | + } |
| 368 | + } |
| 369 | + return {false, -1.}; |
| 370 | + })}); |
318 | 371 | } |
319 | 372 |
|
320 | 373 | ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, |
@@ -460,6 +513,9 @@ ROOT::RNTupleDescriptor ROOT::Internal::RPageSourceFile::AttachImpl(RNTupleSeria |
460 | 513 | // For the page reads, we rely on the I/O scheduler to define the read requests |
461 | 514 | fFile->SetBuffering(false); |
462 | 515 |
|
| 516 | + // Set file size once after buffering is turned off |
| 517 | + fFileSize = fFile->GetSize(); |
| 518 | + |
463 | 519 | return desc; |
464 | 520 | } |
465 | 521 |
|
@@ -523,8 +579,16 @@ ROOT::Internal::RPageRef ROOT::Internal::RPageSourceFile::LoadPageImpl(ColumnHan |
523 | 579 | directReadBuffer = MakeUninitArray<unsigned char>(sealedPage.GetBufferSize()); |
524 | 580 | { |
525 | 581 | RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead); |
526 | | - fReader.ReadBuffer(directReadBuffer.get(), sealedPage.GetBufferSize(), |
527 | | - pageInfo.GetLocator().GetPosition<std::uint64_t>()); |
| 582 | + const auto offset = pageInfo.GetLocator().GetPosition<std::uint64_t>(); |
| 583 | + // Track seek distance (excluding file structure reads) |
| 584 | + R__ASSERT(fFileCounters); |
| 585 | + if (fLastOffset != 0) { |
| 586 | + const auto distance = static_cast<std::uint64_t>(std::abs( |
| 587 | + static_cast<std::int64_t>(offset) - static_cast<std::int64_t>(fLastOffset))); |
| 588 | + fFileCounters->fSzSkip.Add(distance); |
| 589 | + } |
| 590 | + fReader.ReadBuffer(directReadBuffer.get(), sealedPage.GetBufferSize(), offset); |
| 591 | + fLastOffset = offset + sealedPage.GetBufferSize(); |
528 | 592 | } |
529 | 593 | fCounters->fNPageRead.Inc(); |
530 | 594 | fCounters->fNRead.Inc(); |
@@ -730,6 +794,18 @@ ROOT::Internal::RPageSourceFile::LoadClusters(std::span<RCluster::RKey> clusterK |
730 | 794 | } |
731 | 795 | } |
732 | 796 |
|
| 797 | + // Track seek distance for each read request (excluding file structure reads) |
| 798 | + R__ASSERT(fFileCounters); |
| 799 | + for (std::size_t i = 0; i < nBatch; ++i) { |
| 800 | + const auto offset = readRequests[iReq + i].fOffset; |
| 801 | + if (fLastOffset != 0) { |
| 802 | + const auto distance = static_cast<std::uint64_t>(std::abs( |
| 803 | + static_cast<std::int64_t>(offset) - static_cast<std::int64_t>(fLastOffset))); |
| 804 | + fFileCounters->fSzSkip.Add(distance); |
| 805 | + } |
| 806 | + fLastOffset = offset + readRequests[iReq + i].fSize; |
| 807 | + } |
| 808 | + |
733 | 809 | if (nBatch <= 1) { |
734 | 810 | nBatch = 1; |
735 | 811 | RNTupleAtomicTimer timer(fCounters->fTimeWallRead, fCounters->fTimeCpuRead); |
|
0 commit comments