Skip to content

Commit b2c4da0

Browse files
psalzPeterTh
authored andcommitted
Measure and report executor starvation time
The executor is considered to be "starving" when it is out of instructions to process, and the scheduler is currently busy. This means that phases of the user program that do not interact with the Celerity API do not count as starvation periods. If the total starvation time exceeds a percentage threshold of the time spent doing actual work (i.e., processing instructions), we print a warning indicating that the application might be scheduler-bound.
1 parent 1badf92 commit b2c4da0

15 files changed

+377
-62
lines changed

include/double_buffered_queue.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class double_buffered_queue {
3939
return m_read.queue;
4040
}
4141

42+
/// Returns true if a call to `pop_all` would have returned an empty vector.
43+
bool empty() const { return !(m_write.queue_nonempty.load(std::memory_order_relaxed)); }
44+
4245
/// After this function returns, the result of `pop_all` is non-empty as long as there is only a single reader thread.
4346
void wait_while_empty() {
4447
if(!m_write.queue_nonempty.load(std::memory_order_relaxed) /* opportunistic */) {

include/dry_run_executor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ class dry_run_executor final : public executor {
3535

3636
void submit(std::vector<const instruction*> instructions, std::vector<outbound_pilot> pilots) override;
3737

38+
void notify_scheduler_idle(bool is_idle) override;
39+
40+
std::chrono::nanoseconds get_starvation_time() const override;
41+
std::chrono::nanoseconds get_active_time() const override;
42+
3843
private:
3944
using host_object_transfer = std::pair<host_object_id, std::unique_ptr<host_object_instance>>;
4045
using submission = std::variant<std::vector<const instruction*>, host_object_transfer>;

include/executor.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "types.h"
44

5+
#include <chrono>
56
#include <memory>
67
#include <vector>
78

@@ -61,6 +62,15 @@ class executor {
6162
/// recipients as soon as possible. Instructions must be in topological order of dependencies, as must be the concatenation of all vectors passed to
6263
/// subsequent invocations of this function.
6364
virtual void submit(std::vector<const instruction*> instructions, std::vector<outbound_pilot> pilots) = 0;
65+
66+
/// Informs the executor about a change of the scheduler idle state. Required for tracking starvation time.
67+
virtual void notify_scheduler_idle(const bool is_idle) = 0;
68+
69+
/// Returns the total time the executor has spent idle waiting for instructions while the scheduler was busy.
70+
virtual std::chrono::nanoseconds get_starvation_time() const = 0;
71+
72+
/// Returns the total time the executor has spent processing instructions.
73+
virtual std::chrono::nanoseconds get_active_time() const = 0;
6474
};
6575

6676
} // namespace celerity::detail

include/live_executor.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,16 @@ struct reducer_transfer {
3030
reduction_id rid = 0;
3131
std::unique_ptr<reducer> reduction;
3232
};
33-
using submission = std::variant<instruction_pilot_batch, user_allocation_transfer, host_object_transfer, reducer_transfer>;
33+
struct scheduler_idle_state_change {
34+
bool is_idle = false;
35+
};
36+
using submission = std::variant<instruction_pilot_batch, user_allocation_transfer, host_object_transfer, reducer_transfer, scheduler_idle_state_change>;
3437

3538
} // namespace celerity::detail::live_executor_detail
3639

3740
namespace celerity::detail {
3841

3942
class communicator;
40-
struct system_info;
4143
class backend;
4244

4345
/// Executor implementation for a normal (non-dry) run of a Celerity application. Internal instruction dependencies are resolved by means of an
@@ -66,14 +68,23 @@ class live_executor final : public executor {
6668

6769
void submit(std::vector<const instruction*> instructions, std::vector<outbound_pilot> pilots) override;
6870

71+
void notify_scheduler_idle(const bool is_idle) override;
72+
73+
std::chrono::nanoseconds get_starvation_time() const override;
74+
75+
std::chrono::nanoseconds get_active_time() const override;
76+
6977
private:
7078
friend struct executor_testspy;
7179

80+
struct impl;
81+
7282
std::unique_ptr<communicator> m_root_comm; // created and destroyed outside of executor thread
7383
double_buffered_queue<live_executor_detail::submission> m_submission_queue;
84+
std::unique_ptr<impl> m_impl;
7485
std::thread m_thread;
7586

76-
void thread_main(std::unique_ptr<backend> backend, executor::delegate* dlg, const policy_set& policy);
87+
void thread_main();
7788

7889
/// Default-constructs a `policy_set` - this must be a function because we can't use the implicit default constructor of `policy_set`, which has member
7990
/// initializers, within its surrounding class (Clang diagnostic).

include/scheduler.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,19 @@ class scheduler {
2828
friend struct scheduler_testspy;
2929

3030
public:
31-
using delegate = instruction_graph_generator::delegate;
31+
class delegate : public instruction_graph_generator::delegate {
32+
protected:
33+
delegate() = default;
34+
delegate(const delegate&) = default;
35+
delegate(delegate&&) = default;
36+
delegate& operator=(const delegate&) = default;
37+
delegate& operator=(delegate&&) = default;
38+
~delegate() = default; // do not allow destruction through base pointer
39+
40+
public:
41+
virtual void on_scheduler_idle() = 0;
42+
virtual void on_scheduler_busy() = 0;
43+
};
3244

3345
struct policy_set {
3446
detail::command_graph_generator::policy_set command_graph_generator;

include/tracy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ enum class trace_color : std::underlying_type_t<tracy::Color::ColorType> {
110110
distr_queue_submit = tracy::Color::Orange3,
111111

112112
executor_fetch = tracy::Color::Gray,
113+
executor_idle = tracy::Color::SlateGray,
113114
executor_issue = tracy::Color::Blue,
114115
executor_issue_copy = tracy::Color::Green4,
115116
executor_issue_device_kernel = tracy::Color::Yellow2,

src/dry_run_executor.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ void dry_run_executor::submit(std::vector<const instruction*> instructions, std:
3131
(void)pilots; // ignore;
3232
}
3333

34+
void dry_run_executor::notify_scheduler_idle(const bool is_idle) {
35+
(void)is_idle; // ignore
36+
}
37+
38+
std::chrono::nanoseconds dry_run_executor::get_starvation_time() const { return std::chrono::nanoseconds(0); }
39+
40+
std::chrono::nanoseconds dry_run_executor::get_active_time() const { return std::chrono::nanoseconds(0); }
41+
3442
void dry_run_executor::thread_main(executor::delegate* const dlg) {
3543
name_and_pin_and_order_this_thread(named_threads::thread_type::executor);
3644
// For simplicity we keep all executor state within this function.

0 commit comments

Comments
 (0)