feat(stats): time-windowed hashrate meter

yuzi-co · yuzi-co · commit 2b65ff339f84 · 2026-06-12T02:23:49.000+03:00
Replace the count-gated, per-job-reset hashrate with a free-running nonce
accumulator sampled over a wall-clock window (EWMA-smoothed) -- the model used
by ethminer/XMRig. Fixes the dashboard reporting 0 H/s for slow / memory-hard
kernels (XelisHash V3, Octopus, large-DAG) under fast pool job streams, where
kernelExecuted was zeroed on every mining.notify before it could reach the
100-launch publish threshold. The displayed value is also smoother than before.

- Statistical: nonceAccumulator (std::atomic&lt;uint64_t&gt;, relaxed) counts the
  actual per-launch nonces, correct across occupancy changes; the mining thread
  fetch_adds and the stats thread exchange()s it to 0, so the two threads never
  race. sampleWindow() folds one sample into the EWMA per window: a working
  window contributes its measured rate, an empty/stalled window contributes
  0 H/s so the value decays toward 0 instead of holding a stale rate (a hung GPU
  stays visible). A slow but working kernel still completes &gt;= 1 launch per
  window, so it never produces an empty window and never decays. Pure
  computeHashrate/smoothHashrate helpers.
- Device: getHashrate() is a pure const read; new sampleHashrate() (stats thread)
  owns the window; updateBatchNonce() no longer resets the window per job;
  loopDoWork() opens the first window before the loop; updateJob() skips the
  redundant pre-rebuild reset when memory is being rebuilt.
- Stats thread samples via sampleHashrate(); REST API and benchmark unchanged.
- --internal_kernel_count no longer gates the display (kept for compatibility).
- Unit tests for the meter (sources/statistical/tests).
diff --git a/documentation/ARCHITECTURE.md b/documentation/ARCHITECTURE.md
@@ -458,7 +458,7 @@ Loaded from CLI by `common::Cli`, supports per-device pool overrides:
 
 **Statistics** (`statistical/statistical.hpp`):
 - Tracks kernel executions, valid/invalid shares, elapsed time
-- `getHashrate()` computes MH/s from kernel count × batch nonce / time
+- `sampleHashrate()` (stats thread) computes H/s from accumulated nonces over a wall-clock window, EWMA-smoothed; `getHashrate()` is a pure read of the last sample
 - Printed to console every ~10 seconds by the stats thread
 
 **REST API** (`api/api.hpp`):
diff --git a/documentation/PARAMETERS.md b/documentation/PARAMETERS.md
@@ -97,7 +97,7 @@ N/A : No default value is set.
 | `--blocks` | ✅ | N/A | Set occupancy blocks. | `--blocks=128` |
 | `--occupancy` | ✅ | false | System will define the best occupancy for kernel. | `--occupancy=<true\|false>` |
 | `--internal_loop` | ✅ | 1 | Set internal loop for kernel. | `--internal_loop=1` |
-| `--internal_kernel_count` | ✅ | 1 | Set internal loop for kernel. This defines the minimum number of times the kernel must be called to display statistics | `--internal_kernel_count=1` |
+| `--internal_kernel_count` | ✅ | 1 | Deprecated: no longer affects the hashrate display (the dashboard now uses a time-windowed meter). Still accepted for backwards compatibility. | `--internal_kernel_count=1` |
 | `--cuda_context` | ✅ | auto | Set CUDA context. | `--cuda_context=<auto\|blocking\|yield\|spin>` |
 
 ## Smart Mining
diff --git a/sources/device/device.cpp b/sources/device/device.cpp
@@ -507,18 +507,20 @@ void device::Device::increaseShare(bool const isValid)
 }
 
 
-double device::Device::getHashrate()
+double device::Device::getHashrate() const
 {
-    uint32_t const  executeCount{ miningStats.getKernelExecutedCount() };
-    common::Config& config{ common::Config::instance() };
+    // Pure read of the last sampled value. The stats thread drives the actual
+    // measurement through sampleHashrate(); other consumers (REST API,
+    // benchmark display) must not reset the window.
+    return miningStats.getHashrate();
+}
 
-    if (config.occupancy.kernelMinimunExecuteNeeded <= executeCount)
-    {
-        miningStats.stop();
-        miningStats.updateHashrate();
-        miningStats.reset();
-    }
 
+double device::Device::sampleHashrate()
+{
+    // Owned by the stats thread: close the current wall-clock window, fold it
+    // into the smoothed value, and open the next one.
+    miningStats.sampleWindow();
     return miningStats.getHashrate();
 }
 
@@ -590,7 +592,12 @@ bool device::Device::updateJob()
     uint64_t const currentAtomicMemory{ synchronizer.memory.get() };
 
     ////////////////////////////////////////////////////////////////////////////
-    if (nextjobInfo.epoch != currentJobInfo.epoch || nextjobInfo.period != currentJobInfo.period)
+    // A period/epoch change restarts the hashrate window. When this change also
+    // rebuilds memory, the reset below (after the rebuild) is the meaningful one
+    // -- it excludes DAG-build time from the window -- so skip the redundant
+    // pre-rebuild reset here in that case.
+    if ((nextjobInfo.epoch != currentJobInfo.epoch || nextjobInfo.period != currentJobInfo.period)
+        && false == needUpdateMemory)
     {
         miningStats.reset();
     }
@@ -655,9 +662,12 @@ void device::Device::updateBatchNonce()
     }
 
     ////////////////////////////////////////////////////////////////////////////
+    // updateJob() calls this on every pool job. Only the per-launch nonce stride
+    // is refreshed here -- the hashrate window (accumulator + chrono) must survive
+    // job updates, otherwise a slow kernel under a fast job stream never measures a
+    // full window. The window is reset only on a memory rebuild or a period change;
+    // the stats thread rolls it forward (sampleWindow) at each sample.
     miningStats.setBatchNonce(resolver->getBlocks() * resolver->getThreads() * internalLoop);
-    miningStats.resetHashrate();
-    miningStats.reset();
 }
 
 
@@ -691,6 +701,11 @@ void device::Device::loopDoWork()
     ////////////////////////////////////////////////////////////////////////////
     computing.store(true, boost::memory_order::seq_cst);
 
+    ////////////////////////////////////////////////////////////////////////////
+    // Open the first hashrate window now that a job is in hand. From here the
+    // window is owned by the stats thread (sampleHashrate) and never reset per job.
+    miningStats.reset();
+
     ////////////////////////////////////////////////////////////////////////////
     deviceDebug() << "Start working!";
     while (true == isAlive() && nullptr != resolver)
diff --git a/sources/device/device.hpp b/sources/device/device.hpp
@@ -61,7 +61,8 @@ namespace device
         bool              isComputing() const;
         void              update(bool const memory, bool const constants, stratum::StratumJobInfo const& newJobInfo);
         void              increaseShare(bool const isValid);
-        double            getHashrate();
+        double            getHashrate() const;
+        double            sampleHashrate();
         stratum::Stratum* getStratum();
         stratum::StratumSmartMining*        getStratumSmartMining();
         statistical::Statistical::ShareInfo getShare();
diff --git a/sources/device/device_manager_loop_statistical.cpp b/sources/device/device_manager_loop_statistical.cpp
@@ -95,7 +95,7 @@ void device::DeviceManager::loopStatistical()
             }
 
             ///////////////////////////////////////////////////////////////////
-            auto const                          hashrate{ device->getHashrate() };
+            auto const                          hashrate{ device->sampleHashrate() };
             statistical::Statistical::ShareInfo shareInfo{ device->getShare() };
 
             ///////////////////////////////////////////////////////////////////
diff --git a/sources/statistical/CMakeLists.txt b/sources/statistical/CMakeLists.txt
@@ -22,4 +22,7 @@ if (BUILD_EXE_UNIT_TEST)
     )
 endif()
 
+add_subdirectory(tests)
+
 set(SOURCES_STRATISTICAL ${HEADERS} ${SOURCES} PARENT_SCOPE)
+set(SOURCES_STATISTICAL_TESTS ${SOURCES_STATISTICAL_TESTS} PARENT_SCOPE)
diff --git a/sources/statistical/statistical.cpp b/sources/statistical/statistical.cpp
@@ -1,5 +1,4 @@
 #include <common/cast.hpp>
-#include <common/log/log.hpp>
 #include <statistical/statistical.hpp>
 
 
@@ -10,7 +9,7 @@ void statistical::Statistical::setChronoUnit(common::CHRONO_UNIT newUnit)
     {
         case common::CHRONO_UNIT::SEC:
         {
-            chronoTime = 1;
+            chronoTime = 1.0;
             break;
         }
         case common::CHRONO_UNIT::MS:
@@ -47,13 +46,15 @@ void statistical::Statistical::stop()
 void statistical::Statistical::reset()
 {
     kernelExecuted = 0u;
+    nonceAccumulator.store(0ull, std::memory_order_relaxed);
     start();
 }
 
 
 void statistical::Statistical::increaseKernelExecuted()
 {
     ++kernelExecuted;
+    nonceAccumulator.fetch_add(batchNonce, std::memory_order_relaxed);
 }
 
 
@@ -75,13 +76,17 @@ uint64_t statistical::Statistical::getBatchNonce() const
 }
 
 
+uint64_t statistical::Statistical::getNonceAccumulator() const
+{
+    return nonceAccumulator.load(std::memory_order_relaxed);
+}
+
+
 void statistical::Statistical::updateHashrate()
 {
     ///////////////////////////////////////////////////////////////////////////
     elapsed = chrono.elapsed(chronoUnit);
-    double const   diffTime{ chronoTime / elapsed };
-    uint64_t const totalNonce{ batchNonce * kernelExecuted };
-    double const   values{ totalNonce * diffTime };
+    double const values{ computeHashrate(batchNonce * kernelExecuted, elapsed, chronoTime) };
 
     ///////////////////////////////////////////////////////////////////////////
     if (values > 0.0)
@@ -91,10 +96,57 @@ void statistical::Statistical::updateHashrate()
 }
 
 
-void statistical::Statistical::resetHashrate()
+double statistical::Statistical::computeHashrate(
+    uint64_t const totalNonce,
+    uint64_t const elapsedTicks,
+    double const   ticksPerSecond)
 {
-    kernelExecuted = 0u;
-    hashrates = 0.0;
+    if (0ull == elapsedTicks)
+    {
+        return 0.0;
+    }
+    return (castDouble(totalNonce) * ticksPerSecond) / castDouble(elapsedTicks);
+}
+
+
+double statistical::Statistical::smoothHashrate(double const previous, double const sample, double const factor)
+{
+    ///////////////////////////////////////////////////////////////////////////
+    // Seed directly from the first sample so the meter does not slowly ramp up
+    // from 0; afterwards blend to keep the displayed value stable.
+    if (0.0 >= previous)
+    {
+        return sample;
+    }
+    return (factor * sample) + ((1.0 - factor) * previous);
+}
+
+
+void statistical::Statistical::sampleWindow()
+{
+    ///////////////////////////////////////////////////////////////////////////
+    stop();
+    elapsed = chrono.elapsed(chronoUnit);
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Fold one sample into the EWMA per window. A working window contributes its
+    // measured rate; an empty window (a stall, or a kernel slower than the whole
+    // sampling interval) contributes 0 H/s, so the displayed value decays toward
+    // 0 instead of holding a stale value -- a dead GPU must stay visible. A
+    // slow-but-working kernel still completes >= 1 launch per window, so it never
+    // produces an empty window and never decays. Zero-length windows carry no
+    // information and are skipped.
+    uint64_t const windowNonce{ nonceAccumulator.exchange(0ull, std::memory_order_relaxed) };
+    if (0ull < elapsed)
+    {
+        double const sample{ computeHashrate(windowNonce, elapsed, chronoTime) };
+        hashrates = smoothHashrate(hashrates, sample, HASHRATE_SMOOTHING_FACTOR);
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Open the next window. kernelExecuted is owned by the mining (device) thread
+    // and is not touched here, so the stats thread never races it.
+    start();
 }
 
 
diff --git a/sources/statistical/statistical.hpp b/sources/statistical/statistical.hpp
@@ -1,11 +1,15 @@
 #pragma once
 
 
+#include <atomic>
+
 #include <common/chrono.hpp>
 
 
 namespace statistical
 {
+    constexpr double HASHRATE_SMOOTHING_FACTOR{ 0.5 };
+
     struct Statistical
     {
       public:
@@ -24,22 +28,28 @@ namespace statistical
         uint32_t            getKernelExecutedCount() const;
         void                setBatchNonce(uint64_t const newBatchNonce);
         uint64_t            getBatchNonce() const;
+        uint64_t            getNonceAccumulator() const;
         void                updateHashrate();
-        void                resetHashrate();
+        void                sampleWindow();
         double              getHashrate() const;
         ShareInfo&          getShares();
         ShareInfo           getShares() const;
         uint64_t            getElapsed() const;
         common::CHRONO_UNIT getChronoUnit() const;
 
+        static double
+        computeHashrate(uint64_t const totalNonce, uint64_t const elapsedTicks, double const ticksPerSecond);
+        static double smoothHashrate(double const previous, double const sample, double const factor);
+
       private:
-        common::CHRONO_UNIT chronoUnit{ common::CHRONO_UNIT::US };
-        common::Chrono      chrono{};
-        double              chronoTime{ common::SEC_TO_US };
-        ShareInfo           shares{};
-        uint64_t            batchNonce{ 0ull };
-        uint64_t            elapsed{ 0ull };
-        double              hashrates{ 0.0 };
-        uint32_t            kernelExecuted{ 0u };
+        common::CHRONO_UNIT   chronoUnit{ common::CHRONO_UNIT::US };
+        common::Chrono        chrono{};
+        double                chronoTime{ common::SEC_TO_US };
+        ShareInfo             shares{};
+        uint64_t              batchNonce{ 0ull };
+        std::atomic<uint64_t> nonceAccumulator{ 0ull };
+        uint64_t              elapsed{ 0ull };
+        double                hashrates{ 0.0 };
+        uint32_t              kernelExecuted{ 0u };
     };
 }
diff --git a/sources/statistical/tests/CMakeLists.txt b/sources/statistical/tests/CMakeLists.txt
@@ -0,0 +1,11 @@
+file(GLOB HEADERS "*.hpp")
+file(GLOB SOURCES "*.cpp")
+
+if (BUILD_EXE_UNIT_TEST)
+    target_sources(${UNIT_TEST_EXE} PUBLIC
+        ${HEADERS}
+        ${SOURCES}
+    )
+endif()
+
+set(SOURCES_STATISTICAL_TESTS ${HEADERS} ${SOURCES} PARENT_SCOPE)
diff --git a/sources/statistical/tests/statistical.cpp b/sources/statistical/tests/statistical.cpp

Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ void device::DeviceManager::loopStatistical()`
`95`	`95`	`}`
`96`	`96`
`97`	`97`	`///////////////////////////////////////////////////////////////////`
`98`		`- auto const hashrate{ device->getHashrate() };`
	`98`	`+ auto const hashrate{ device->sampleHashrate() };`
`99`	`99`	`statistical::Statistical::ShareInfo shareInfo{ device->getShare() };`
`100`	`100`
`101`	`101`	`///////////////////////////////////////////////////////////////////`
Original file line number	Diff line number	Diff line change
`@@ -22,4 +22,7 @@ if (BUILD_EXE_UNIT_TEST)`
`22`	`22`	`)`
`23`	`23`	`endif()`
`24`	`24`
	`25`	`+add_subdirectory(tests)`
	`26`	`+`
`25`	`27`	`set(SOURCES_STRATISTICAL ${HEADERS} ${SOURCES} PARENT_SCOPE)`
	`28`	`+set(SOURCES_STATISTICAL_TESTS ${SOURCES_STATISTICAL_TESTS} PARENT_SCOPE)`