Skip to content

Commit 027a46b

Browse files
committed
ROX-30714: track cgroup ID to container ID mapping
This change greatly reduces the amount of data sent in the ringbuffer and the effort the BPF hooks need to retrieve the cgroup of the current process. In exchange for these benefits, we now need to lookup and keep track of the cgroups and container IDs that exist on the system ourselves by iterating over the cgroupsfs. TODO: Add integration tests with containers.
1 parent 32a36de commit 027a46b

8 files changed

Lines changed: 306 additions & 215 deletions

File tree

fact-ebpf/src/bpf/maps.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
*/
1313
struct helper_t {
1414
char buf[PATH_MAX * 2];
15-
const unsigned char* array[16];
1615
};
1716

1817
struct {

fact-ebpf/src/bpf/process.h

Lines changed: 1 addition & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11,71 +11,6 @@
1111
#include <bpf/bpf_core_read.h>
1212
// clang-format on
1313

14-
__always_inline static const char* get_memory_cgroup(struct helper_t* helper) {
15-
if (!bpf_core_enum_value_exists(enum cgroup_subsys_id, memory_cgrp_id)) {
16-
return NULL;
17-
}
18-
19-
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
20-
21-
// We're guessing which cgroup controllers are enabled for this task. The
22-
// assumption is that memory controller is present more often than
23-
// cpu & cpuacct.
24-
struct kernfs_node* kn = BPF_CORE_READ(task, cgroups, subsys[memory_cgrp_id], cgroup, kn);
25-
if (kn == NULL) {
26-
return NULL;
27-
}
28-
29-
int i = 0;
30-
for (; i < 16; i++) {
31-
helper->array[i] = (const unsigned char*)BPF_CORE_READ(kn, name);
32-
if (bpf_core_field_exists(kn->__parent)) {
33-
kn = BPF_CORE_READ(kn, __parent);
34-
} else {
35-
struct {
36-
struct kernfs_node* parent;
37-
}* kn_old = (void*)kn;
38-
kn = BPF_CORE_READ(kn_old, parent);
39-
}
40-
if (kn == NULL) {
41-
break;
42-
}
43-
}
44-
45-
if (i == 16) {
46-
i--;
47-
}
48-
49-
int offset = 0;
50-
for (; i >= 0 && offset < PATH_MAX; i--) {
51-
// Skip empty directories
52-
if (helper->array[i] == NULL) {
53-
continue;
54-
}
55-
56-
helper->buf[offset & (PATH_MAX - 1)] = '/';
57-
if (++offset >= PATH_MAX) {
58-
return NULL;
59-
}
60-
61-
int len = bpf_probe_read_kernel_str(&helper->buf[offset & (PATH_MAX - 1)], PATH_MAX, helper->array[i]);
62-
if (len < 0) {
63-
// We should have skipped all empty entries, any other error is a genuine
64-
// problem, stop processing.
65-
return NULL;
66-
}
67-
68-
if (len == 1) {
69-
offset--;
70-
continue;
71-
}
72-
73-
offset += len - 1;
74-
}
75-
76-
return helper->buf;
77-
}
78-
7914
__always_inline static void process_fill_lineage(process_t* p, struct helper_t* helper) {
8015
struct task_struct* task = (struct task_struct*)bpf_get_current_task();
8116
struct path path;
@@ -112,6 +47,7 @@ __always_inline static int64_t process_fill(process_t* p) {
11247
p->gid = (uid_gid >> 32) & 0xFFFFFFFF;
11348
p->login_uid = BPF_CORE_READ(task, loginuid.val);
11449
p->pid = (bpf_get_current_pid_tgid() >> 32) & 0xFFFFFFFF;
50+
p->cgroup_id = bpf_get_current_cgroup_id();
11551
u_int64_t err = bpf_get_current_comm(p->comm, TASK_COMM_LEN);
11652
if (err != 0) {
11753
bpf_printk("Failed to fill task comm");
@@ -144,11 +80,6 @@ __always_inline static int64_t process_fill(process_t* p) {
14480
}
14581
bpf_probe_read_str(p->exe_path, PATH_MAX, exe_path);
14682

147-
const char* cg = get_memory_cgroup(helper);
148-
if (cg != NULL) {
149-
bpf_probe_read_str(p->memory_cgroup, PATH_MAX, cg);
150-
}
151-
15283
p->in_root_mount_ns = get_mount_ns() == host_mount_ns;
15384

15485
process_fill_lineage(p, helper);

fact-ebpf/src/bpf/types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ typedef struct process_t {
2222
char args[4096];
2323
unsigned int args_len;
2424
char exe_path[PATH_MAX];
25-
char memory_cgroup[PATH_MAX];
25+
unsigned long long cgroup_id;
2626
unsigned int uid;
2727
unsigned int gid;
2828
unsigned int login_uid;

fact/src/bpf.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ use tokio::{
1414
task::JoinHandle,
1515
};
1616

17-
use crate::{config::FactConfig, event::Event, host_info, metrics::EventCounter};
17+
use crate::{
18+
cgroup::ContainerIdCache, config::FactConfig, event::Event, host_info, metrics::EventCounter,
19+
};
1820

1921
use fact_ebpf::{event_t, metrics_t, path_prefix_t, LPM_SIZE_MAX};
2022

@@ -98,6 +100,7 @@ impl Bpf {
98100
mut fd: AsyncFd<RingBuf<MapData>>,
99101
mut running: Receiver<bool>,
100102
event_counter: EventCounter,
103+
cid_cache: ContainerIdCache,
101104
) -> JoinHandle<()> {
102105
info!("Starting BPF worker...");
103106
tokio::spawn(async move {
@@ -108,7 +111,7 @@ impl Bpf {
108111
let ringbuf = guard.get_inner_mut();
109112
while let Some(event) = ringbuf.next() {
110113
let event: &event_t = unsafe { &*(event.as_ptr() as *const _) };
111-
let event = match Event::try_from(event) {
114+
let event = match Event::new(event, &cid_cache).await {
112115
Ok(event) => Arc::new(event),
113116
Err(e) => {
114117
error!("Failed to parse event: '{e}'");
@@ -173,15 +176,22 @@ mod bpf_tests {
173176
let (run_tx, run_rx) = watch::channel(true);
174177
// Create a metrics exporter, but don't start it
175178
let exporter = Exporter::new(bpf.get_metrics().unwrap());
176-
177-
Bpf::start_worker(tx, bpf.fd, run_rx, exporter.metrics.bpf_worker.clone());
179+
let cid_cache = ContainerIdCache::new();
180+
181+
Bpf::start_worker(
182+
tx,
183+
bpf.fd,
184+
run_rx,
185+
exporter.metrics.bpf_worker.clone(),
186+
cid_cache,
187+
);
178188

179189
// Create a file
180190
let file =
181191
NamedTempFile::new_in(monitored_path).expect("Failed to create temporary file");
182192
println!("Created {file:?}");
183193

184-
let expected = Event::new(
194+
let expected = Event::from_raw_parts(
185195
file_activity_type_t::FILE_ACTIVITY_CREATION,
186196
host_info::get_hostname(),
187197
file.path().to_path_buf(),

fact/src/cgroup.rs

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
use std::{
2+
collections::HashMap,
3+
os::unix::fs::DirEntryExt,
4+
path::PathBuf,
5+
sync::Arc,
6+
time::{Duration, SystemTime},
7+
};
8+
9+
use log::warn;
10+
use tokio::{
11+
sync::{watch::Receiver, Mutex},
12+
task::JoinHandle,
13+
time,
14+
};
15+
16+
use crate::host_info::get_cgroup_paths;
17+
18+
#[derive(Debug)]
19+
struct ContainerIdEntry {
20+
container_id: Option<String>,
21+
pub last_seen: SystemTime,
22+
}
23+
24+
type ContainerIdMap = HashMap<u64, ContainerIdEntry>;
25+
26+
#[derive(Debug, Clone, Default)]
27+
pub struct ContainerIdCache(Arc<Mutex<ContainerIdMap>>);
28+
29+
impl ContainerIdCache {
30+
pub fn new() -> Self {
31+
let mut map = HashMap::new();
32+
ContainerIdCache::update_unlocked(&mut map);
33+
ContainerIdCache(Arc::new(Mutex::new(map)))
34+
}
35+
36+
fn update_unlocked(map: &mut ContainerIdMap) {
37+
for root in get_cgroup_paths() {
38+
ContainerIdCache::walk_cgroupfs(&root, map, None);
39+
}
40+
}
41+
42+
async fn update(&mut self) {
43+
let mut map = self.0.lock().await;
44+
ContainerIdCache::update_unlocked(&mut map);
45+
}
46+
47+
async fn prune(&mut self) {
48+
let now = SystemTime::now();
49+
self.0.lock().await.retain(|_, value| {
50+
now.duration_since(value.last_seen).unwrap() < Duration::from_secs(30)
51+
})
52+
}
53+
54+
pub async fn get_container_id(&self, cgroup_id: u64) -> Option<String> {
55+
let mut map = self.0.lock().await;
56+
match map.get(&cgroup_id) {
57+
Some(entry) => entry.container_id.clone(),
58+
None => {
59+
// Update the container ID cache and try again
60+
ContainerIdCache::update_unlocked(&mut map);
61+
map.get(&cgroup_id).map(|s| s.container_id.clone())?
62+
}
63+
}
64+
}
65+
66+
pub fn start_worker(mut self, mut running: Receiver<bool>) -> JoinHandle<()> {
67+
tokio::spawn(async move {
68+
let mut update_interval = time::interval(time::Duration::from_secs(30));
69+
loop {
70+
tokio::select! {
71+
_ = update_interval.tick() => {
72+
self.update().await;
73+
self.prune().await;
74+
},
75+
_ = running.changed() => {
76+
if !*running.borrow() {
77+
return;
78+
}
79+
}
80+
}
81+
}
82+
})
83+
}
84+
85+
fn walk_cgroupfs(path: &PathBuf, map: &mut ContainerIdMap, parent_id: Option<&str>) {
86+
for entry in std::fs::read_dir(path).unwrap() {
87+
let entry = match entry {
88+
Ok(entry) => entry,
89+
Err(e) => {
90+
warn!("Failed to read {}: {e}", path.display());
91+
continue;
92+
}
93+
};
94+
95+
let p = entry.path();
96+
if !p.is_dir() {
97+
continue;
98+
}
99+
100+
let container_id = match map.get_mut(&entry.ino()) {
101+
Some(e) => {
102+
e.last_seen = SystemTime::now();
103+
e.container_id.clone()
104+
}
105+
None => {
106+
let last_component = p
107+
.file_name()
108+
.map(|f| f.to_str().unwrap_or(""))
109+
.unwrap_or("");
110+
let container_id = match ContainerIdCache::extract_container_id(last_component)
111+
{
112+
Some(cid) => Some(cid),
113+
None => parent_id.map(|f| f.to_owned()),
114+
};
115+
let last_seen = SystemTime::now();
116+
map.insert(
117+
entry.ino(),
118+
ContainerIdEntry {
119+
container_id: container_id.clone(),
120+
last_seen,
121+
},
122+
);
123+
container_id
124+
}
125+
};
126+
ContainerIdCache::walk_cgroupfs(&p, map, container_id.as_deref());
127+
}
128+
}
129+
130+
pub fn extract_container_id(cgroup: &str) -> Option<String> {
131+
if cgroup.is_empty() {
132+
return None;
133+
}
134+
135+
let cgroup = cgroup.strip_suffix(".scope").unwrap_or(cgroup);
136+
if cgroup.len() < 64 {
137+
return None;
138+
}
139+
140+
let (prefix, id) = cgroup.split_at(cgroup.len() - 64);
141+
142+
if !prefix.is_empty() && !prefix.ends_with('-') {
143+
return None;
144+
}
145+
146+
if id.chars().all(|c| c.is_ascii_hexdigit()) {
147+
Some(id.split_at(12).0.to_owned())
148+
} else {
149+
None
150+
}
151+
}
152+
}
153+
154+
#[cfg(test)]
155+
mod tests {
156+
use super::*;
157+
158+
#[test]
159+
fn extract_container_id() {
160+
let tests = [
161+
("e73c55f3e7f5b6a9cfc32a89bf13e44d348bcc4fa7b079f804d61fb1532ddbe5", Some("e73c55f3e7f5")),
162+
("cri-containerd-219d7afb8e7450929eaeb06f2d27cbf7183bfa5b55b7275696f3df4154a979af.scope", Some("219d7afb8e74")),
163+
("kubelet-kubepods-burstable-pod469726a5_079d_4d15_a259_1f654b534b44.slice", None),
164+
("libpod-conmon-a2d2a36121868d946af912b931fc5f6b42bf84c700cef67784422b1e2c8585ee.scope", Some("a2d2a3612186")),
165+
("init.scope", None),
166+
("app-flatpak-com.github.IsmaelMartinez.teams_for_linux-384393947.scope", None),
167+
];
168+
169+
for (cgroup, expected) in tests {
170+
let cid = ContainerIdCache::extract_container_id(cgroup);
171+
assert_eq!(cid.as_deref(), expected);
172+
}
173+
}
174+
}

0 commit comments

Comments
 (0)