Skip to content

Commit 0ec3117

Browse files
authored
Cherry-pick two recent SimpleCounters related PRs to release-7.4 (apple#12941)
* Net2::run: add counters for callbacks executed and duration of time spent executing them (apple#12824) It was observed that compute management platforms do provide CPU usage graphs, but retention is not necessarily as much as we'd like. CPU scheduling is directly controlled by FDB and is very, very easy to instrument to count how busy we are, so just do it. Testing: ../build_output4/bin/fdbserver -r test -f tests/noSim/RandomUnitTests.toml ../build_output4/bin/fdbserver -r unittests -f 'noSim' then reviewed trace files. * Address issues noticed by retroactive AI code review (apple#12930) * count Arenas created * remove a deleted method signature * Remove out of date guidance in header file * Fix up merge * formatting; delete a blank line
1 parent 86ab72e commit 0ec3117

4 files changed

Lines changed: 30 additions & 16 deletions

File tree

flow/Arena.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,18 @@ void makeUndefined(void*, size_t) {}
107107
#endif
108108
} // namespace
109109

110-
Arena::Arena() : impl(nullptr) {}
110+
static SimpleCounter<int64_t>* arenasCreated(void) {
111+
static SimpleCounter<int64_t>* p = SimpleCounter<int64_t>::makeCounter("/flow/arena/arenasCreated");
112+
return p;
113+
}
114+
115+
Arena::Arena() : impl(nullptr) {
116+
arenasCreated()->increment(1);
117+
}
118+
111119
Arena::Arena(size_t reservedSize) : impl(0) {
112120
UNSTOPPABLE_ASSERT(reservedSize < std::numeric_limits<int>::max());
113-
static SimpleCounter<int64_t>* created = SimpleCounter<int64_t>::makeCounter("/flow/arena/arenasCreated");
114-
created->increment(1);
121+
arenasCreated()->increment(1);
115122
if (reservedSize) {
116123
allowAccess(impl.getPtr());
117124
ArenaBlock::create((int)reservedSize, impl);

flow/Net2.actor.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "flow/Arena.h"
2525
#include "flow/Knobs.h"
2626
#include "flow/Platform.h"
27+
#include "flow/SimpleCounter.h"
2728
#include "flow/Trace.h"
2829
#include "flow/swift.h"
2930
#include "flow/swift_concurrency_hooks.h"
@@ -1670,6 +1671,7 @@ void Net2::run() {
16701671
[[maybe_unused]] int queueSize = taskQueue.getNumReadyTasks();
16711672

16721673
FDB_TRACE_PROBE(run_loop_tasks_start, queueSize);
1674+
int tasksExecuted = 0;
16731675
while (taskQueue.hasReadyTask()) {
16741676
++countTasks;
16751677
currentTaskID = taskQueue.getReadyTaskID();
@@ -1678,6 +1680,7 @@ void Net2::run() {
16781680
taskQueue.popReadyTask();
16791681

16801682
try {
1683+
++tasksExecuted;
16811684
++tasksSinceReact;
16821685
(*task)();
16831686
} catch (Error& e) {
@@ -1713,6 +1716,9 @@ void Net2::run() {
17131716
taskBegin = newTaskBegin;
17141717
tscBegin = tscNow;
17151718
}
1719+
static SimpleCounter<int64_t>* callbacksExecuted =
1720+
SimpleCounter<int64_t>::makeCounter("/Net2/callbacksExecuted");
1721+
callbacksExecuted->increment(tasksExecuted);
17161722

17171723
trackAtPriority(TaskPriority::RunLoop, taskBegin);
17181724

@@ -1764,17 +1770,24 @@ void Net2::run() {
17641770
}
17651771
#endif
17661772
nnow = timer_monotonic();
1773+
auto time_delta = nnow - now;
17671774

1768-
if ((nnow - now) > FLOW_KNOBS->SLOW_LOOP_CUTOFF &&
1769-
nondeterministicRandom()->random01() < (nnow - now) * FLOW_KNOBS->SLOW_LOOP_SAMPLING_RATE)
1770-
TraceEvent("SomewhatSlowRunLoopBottom")
1771-
.detail("Elapsed", nnow - now); // This includes the time spent running tasks
1775+
static SimpleCounter<double>* exec_time = SimpleCounter<double>::makeCounter("/Net2/mainThreadExecutionTime");
1776+
exec_time->increment(time_delta);
1777+
1778+
if (time_delta > FLOW_KNOBS->SLOW_LOOP_CUTOFF &&
1779+
nondeterministicRandom()->random01() < time_delta * FLOW_KNOBS->SLOW_LOOP_SAMPLING_RATE) {
1780+
TraceEvent("SomewhatSlowRunLoopBottom").detail("Elapsed", time_delta);
1781+
}
17721782
}
17731783

17741784
for (auto& fn : stopCallbacks) {
17751785
fn();
17761786
}
17771787

1788+
// Emit at least one batch of counters, for manual inspection.
1789+
simpleCounterReport();
1790+
17781791
#ifdef WIN32
17791792
timeEndPeriod(1);
17801793
#endif

flow/include/flow/SimpleCounter.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,10 @@
4444
// synchronous work in side threads, but is intended to generally be very
4545
// light weight. `makeCounter` can be called in constructors of global objects.
4646
//
47-
// If you want to use hierarchical metric names (e.g., '/'-separated
48-
// components), please use ALL LOWER CASE METRIC NAMES AS PER THE EXAMPLE ABOVE.
49-
// This enables the implementation to smuggle path component
50-
// separators into the trace output by replacing path separaters like '/' with
51-
// carefully chosen capital letters. This obtains compatibility with current FDB
52-
// "field name" naming conventions.
53-
//
47+
// Hierarchical counter names with '/' separaters are encouraged.
5448
// In the future we might replace '/' with '_' to obtain Prometheus-compatible
55-
// metric names that don't actually look terrible.
49+
// metric names that don't actually look terrible. For now software does this
50+
// conversion on back-end TraceEvent generation.
5651
//
5752
// If you don't want to use hierarchical metric names, then your counter
5853
// names should be ReallyVerboseConcatenatedNamesWithCaps and must be globally

flow/include/flow/Trace.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ class TraceEventFields {
136136
Field& mutate(int index);
137137

138138
std::string toString() const;
139-
void validateFormat() const;
140139
template <class Archiver>
141140
void serialize(Archiver& ar) {
142141
static_assert(is_fb_function<Archiver>, "Streaming serializer has to use load/save");

0 commit comments

Comments
 (0)