Skip to content

Commit c71e720

Browse files
committed
in_ebpf: Implement sched eBPF trace
Signed-off-by: Hiroshi Hatake <hiroshi@chronosphere.io>
1 parent 2cc2b16 commit c71e720

6 files changed

Lines changed: 309 additions & 0 deletions

File tree

plugins/in_ebpf/traces/includes/common/encoder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ static inline char *event_type_to_string(enum event_type type) {
2727
return "connect";
2828
case EVENT_TYPE_DNS:
2929
return "dns";
30+
case EVENT_TYPE_SCHED:
31+
return "sched";
3032
default:
3133
return "unknown";
3234
}

plugins/in_ebpf/traces/includes/common/events.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ enum event_type {
2121
EVENT_TYPE_ACCEPT,
2222
EVENT_TYPE_CONNECT,
2323
EVENT_TYPE_DNS,
24+
EVENT_TYPE_SCHED,
2425
};
2526

2627
enum vfs_op {
@@ -145,6 +146,17 @@ struct dns_event {
145146
__u8 query_raw[DNS_QUERY_RAW_MAX];
146147
};
147148

149+
struct sched_event {
150+
__u32 prev_pid;
151+
int prev_prio;
152+
long prev_state;
153+
__u32 next_pid;
154+
int next_prio;
155+
__u32 cpu;
156+
__u64 runq_latency_ns;
157+
__u8 wakeup_tracked;
158+
};
159+
148160
struct event {
149161
enum event_type type; // Type of event (execve, signal, mem, bind)
150162
struct event_common common; // Common fields for all events
@@ -158,6 +170,7 @@ struct event {
158170
struct accept_event accept;
159171
struct connect_event connect;
160172
struct dns_event dns;
173+
struct sched_event sched;
161174
} details;
162175
};
163176

plugins/in_ebpf/traces/sched/bpf.c

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2+
#define __TARGET_ARCH_x86_64
3+
4+
#include <vmlinux.h>
5+
6+
#define _LINUX_TYPES_H
7+
#define _LINUX_POSIX_TYPES_H
8+
9+
#include <bpf/bpf_helpers.h>
10+
#include <bpf/bpf_core_read.h>
11+
12+
#include <gadget/buffer.h>
13+
#include <gadget/mntns_filter.h>
14+
15+
#include "common/events.h"
16+
17+
struct wakeup_info {
18+
__u64 wakeup_ns;
19+
};
20+
21+
struct {
22+
__uint(type, BPF_MAP_TYPE_HASH);
23+
__uint(max_entries, 65536);
24+
__type(key, __u32);
25+
__type(value, struct wakeup_info);
26+
} wakeup_by_pid SEC(".maps");
27+
28+
struct {
29+
__uint(type, BPF_MAP_TYPE_RINGBUF);
30+
__uint(max_entries, 1 << 20);
31+
} events SEC(".maps");
32+
33+
static __always_inline int track_wakeup(__u32 pid)
34+
{
35+
struct wakeup_info info = {0};
36+
37+
if (pid == 0) {
38+
return 0;
39+
}
40+
41+
info.wakeup_ns = bpf_ktime_get_ns();
42+
bpf_map_update_elem(&wakeup_by_pid, &pid, &info, BPF_ANY);
43+
44+
return 0;
45+
}
46+
47+
SEC("tracepoint/sched/sched_wakeup")
48+
int trace_sched_wakeup(struct trace_event_raw_sched_wakeup_template *ctx)
49+
{
50+
return track_wakeup((__u32) ctx->pid);
51+
}
52+
53+
SEC("tracepoint/sched/sched_wakeup_new")
54+
int trace_sched_wakeup_new(struct trace_event_raw_sched_wakeup_template *ctx)
55+
{
56+
return track_wakeup((__u32) ctx->pid);
57+
}
58+
59+
SEC("tracepoint/sched/sched_switch")
60+
int trace_sched_switch(struct trace_event_raw_sched_switch *ctx)
61+
{
62+
struct event *event;
63+
struct wakeup_info *wakeup;
64+
__u64 now_ns;
65+
__u64 pid_tgid;
66+
__u32 next_pid;
67+
__u64 uid_gid;
68+
__u64 mntns_id;
69+
70+
mntns_id = gadget_get_mntns_id();
71+
if (gadget_should_discard_mntns_id(mntns_id)) {
72+
return 0;
73+
}
74+
75+
next_pid = (__u32) ctx->next_pid;
76+
if (next_pid == 0) {
77+
return 0;
78+
}
79+
80+
event = gadget_reserve_buf(&events, sizeof(*event));
81+
if (!event) {
82+
return 0;
83+
}
84+
85+
now_ns = bpf_ktime_get_ns();
86+
pid_tgid = bpf_get_current_pid_tgid();
87+
uid_gid = bpf_get_current_uid_gid();
88+
89+
event->type = EVENT_TYPE_SCHED;
90+
event->common.timestamp_raw = bpf_ktime_get_boot_ns();
91+
event->common.pid = next_pid;
92+
event->common.tid = next_pid;
93+
event->common.uid = (u32) uid_gid;
94+
event->common.gid = (u32) (uid_gid >> 32);
95+
event->common.mntns_id = mntns_id;
96+
97+
__builtin_memcpy(event->common.comm, ctx->next_comm, sizeof(event->common.comm));
98+
99+
event->details.sched.prev_pid = (__u32) ctx->prev_pid;
100+
event->details.sched.prev_prio = ctx->prev_prio;
101+
event->details.sched.prev_state = ctx->prev_state;
102+
event->details.sched.next_pid = next_pid;
103+
event->details.sched.next_prio = ctx->next_prio;
104+
event->details.sched.cpu = bpf_get_smp_processor_id();
105+
event->details.sched.wakeup_tracked = 0;
106+
event->details.sched.runq_latency_ns = 0;
107+
108+
wakeup = bpf_map_lookup_elem(&wakeup_by_pid, &next_pid);
109+
if (wakeup) {
110+
event->details.sched.wakeup_tracked = 1;
111+
if (now_ns > wakeup->wakeup_ns) {
112+
event->details.sched.runq_latency_ns = now_ns - wakeup->wakeup_ns;
113+
}
114+
bpf_map_delete_elem(&wakeup_by_pid, &next_pid);
115+
}
116+
117+
gadget_submit_buf(ctx, &events, event, sizeof(*event));
118+
119+
return 0;
120+
}
121+
122+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#include <fluent-bit/flb_input_plugin.h>
2+
#include <fluent-bit/flb_log_event_encoder.h>
3+
4+
#include "common/events.h"
5+
#include "common/event_context.h"
6+
#include "common/encoder.h"
7+
8+
#include "handler.h"
9+
10+
int encode_sched_event(struct flb_log_event_encoder *log_encoder,
11+
const struct event *e)
12+
{
13+
int ret;
14+
15+
ret = flb_log_event_encoder_begin_record(log_encoder);
16+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
17+
return -1;
18+
}
19+
20+
ret = encode_common_fields(log_encoder, e);
21+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
22+
flb_log_event_encoder_rollback_record(log_encoder);
23+
return -1;
24+
}
25+
26+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "cpu");
27+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
28+
flb_log_event_encoder_rollback_record(log_encoder);
29+
return -1;
30+
}
31+
ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.cpu);
32+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
33+
flb_log_event_encoder_rollback_record(log_encoder);
34+
return -1;
35+
}
36+
37+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_pid");
38+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
39+
flb_log_event_encoder_rollback_record(log_encoder);
40+
return -1;
41+
}
42+
ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.prev_pid);
43+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
44+
flb_log_event_encoder_rollback_record(log_encoder);
45+
return -1;
46+
}
47+
48+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_prio");
49+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
50+
flb_log_event_encoder_rollback_record(log_encoder);
51+
return -1;
52+
}
53+
ret = flb_log_event_encoder_append_body_int32(log_encoder, e->details.sched.prev_prio);
54+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
55+
flb_log_event_encoder_rollback_record(log_encoder);
56+
return -1;
57+
}
58+
59+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_state");
60+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
61+
flb_log_event_encoder_rollback_record(log_encoder);
62+
return -1;
63+
}
64+
ret = flb_log_event_encoder_append_body_int64(log_encoder, e->details.sched.prev_state);
65+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
66+
flb_log_event_encoder_rollback_record(log_encoder);
67+
return -1;
68+
}
69+
70+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "next_pid");
71+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
72+
flb_log_event_encoder_rollback_record(log_encoder);
73+
return -1;
74+
}
75+
ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.next_pid);
76+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
77+
flb_log_event_encoder_rollback_record(log_encoder);
78+
return -1;
79+
}
80+
81+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "next_prio");
82+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
83+
flb_log_event_encoder_rollback_record(log_encoder);
84+
return -1;
85+
}
86+
ret = flb_log_event_encoder_append_body_int32(log_encoder, e->details.sched.next_prio);
87+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
88+
flb_log_event_encoder_rollback_record(log_encoder);
89+
return -1;
90+
}
91+
92+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "runq_latency_ns");
93+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
94+
flb_log_event_encoder_rollback_record(log_encoder);
95+
return -1;
96+
}
97+
ret = flb_log_event_encoder_append_body_uint64(log_encoder, e->details.sched.runq_latency_ns);
98+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
99+
flb_log_event_encoder_rollback_record(log_encoder);
100+
return -1;
101+
}
102+
103+
ret = flb_log_event_encoder_append_body_cstring(log_encoder, "wakeup_tracked");
104+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
105+
flb_log_event_encoder_rollback_record(log_encoder);
106+
return -1;
107+
}
108+
ret = flb_log_event_encoder_append_body_boolean(log_encoder, e->details.sched.wakeup_tracked);
109+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
110+
flb_log_event_encoder_rollback_record(log_encoder);
111+
return -1;
112+
}
113+
114+
ret = flb_log_event_encoder_commit_record(log_encoder);
115+
if (ret != FLB_EVENT_ENCODER_SUCCESS) {
116+
return -1;
117+
}
118+
119+
return 0;
120+
}
121+
122+
int trace_sched_handler(void *ctx, void *data, size_t data_sz)
123+
{
124+
struct trace_event_context *event_ctx;
125+
struct flb_log_event_encoder *encoder;
126+
struct event *e;
127+
int ret;
128+
129+
event_ctx = (struct trace_event_context *) ctx;
130+
e = (struct event *) data;
131+
132+
if (data_sz < sizeof(struct event) || e->type != EVENT_TYPE_SCHED) {
133+
return -1;
134+
}
135+
136+
encoder = event_ctx->log_encoder;
137+
138+
ret = encode_sched_event(encoder, e);
139+
if (ret != 0) {
140+
return -1;
141+
}
142+
143+
ret = flb_input_log_append(event_ctx->ins,
144+
NULL,
145+
0,
146+
encoder->output_buffer,
147+
encoder->output_length);
148+
if (ret == -1) {
149+
return -1;
150+
}
151+
152+
flb_log_event_encoder_reset(encoder);
153+
154+
return 0;
155+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef SCHED_HANDLER_H
2+
#define SCHED_HANDLER_H
3+
4+
#include <fluent-bit/flb_log_event_encoder.h>
5+
#include <stddef.h>
6+
7+
#include "common/events.h"
8+
9+
int encode_sched_event(struct flb_log_event_encoder *log_encoder,
10+
const struct event *e);
11+
int trace_sched_handler(void *ctx, void *data, size_t data_sz);
12+
13+
#endif

plugins/in_ebpf/traces/traces.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "generated/trace_tcp.skel.h"
1111
#include "generated/trace_exec.skel.h"
1212
#include "generated/trace_dns.skel.h"
13+
#include "generated/trace_sched.skel.h"
1314

1415
#include "bind/handler.h"
1516
#include "signal/handler.h" // Include signal handler
@@ -18,6 +19,7 @@
1819
#include "tcp/handler.h"
1920
#include "exec/handler.h"
2021
#include "dns/handler.h"
22+
#include "sched/handler.h"
2123

2224
/* Skeleton function pointer types */
2325
typedef void *(*trace_skel_open_func_t)(void);
@@ -72,6 +74,7 @@ DEFINE_GET_BPF_OBJECT(trace_vfs)
7274
DEFINE_GET_BPF_OBJECT(trace_tcp)
7375
DEFINE_GET_BPF_OBJECT(trace_exec)
7476
DEFINE_GET_BPF_OBJECT(trace_dns)
77+
DEFINE_GET_BPF_OBJECT(trace_sched)
7578

7679
static struct trace_registration trace_table[] = {
7780
REGISTER_TRACE(trace_signal, trace_signal_handler),
@@ -81,6 +84,7 @@ static struct trace_registration trace_table[] = {
8184
REGISTER_TRACE(trace_tcp, trace_tcp_handler),
8285
REGISTER_TRACE(trace_exec, trace_exec_handler),
8386
REGISTER_TRACE(trace_dns, trace_dns_handler),
87+
REGISTER_TRACE(trace_sched, trace_sched_handler),
8488
};
8589

8690
#endif // TRACE_TRACES_H

0 commit comments

Comments
 (0)