Skip to content

Commit 4488111

Browse files
fix(agent): retry block event delivery and backfill best effort
1 parent 23c7057 commit 4488111

3 files changed

Lines changed: 249 additions & 0 deletions

File tree

agent/src/common/proc_event/linux.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,21 @@ pub struct ProcLifecycleExecInfo<'a> {
319319
pub parent_pid: u32,
320320
pub uid: u32,
321321
pub gid: u32,
322+
pub timestamp: u64,
322323
pub comm: &'a [u8],
323324
pub cmdline: &'a [u8],
324325
pub exec_path: &'a [u8],
325326
}
326327

328+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
329+
pub struct ProcBlockInfo<'a> {
330+
pub pid: u32,
331+
pub action: u8,
332+
pub timestamp: u64,
333+
pub rule_id: &'a str,
334+
pub exec_path: &'a [u8],
335+
}
336+
327337
impl TryFrom<&[u8]> for ProcLifecycleEventData {
328338
type Error = Error;
329339

@@ -406,7 +416,9 @@ const PROC_BLOCK_CMDLINE_LEN: usize = 256;
406416
const PROC_BLOCK_EXEC_PATH_LEN: usize = 256;
407417
const ENFORCEMENT_TARGET_EXEC: u8 = 1;
408418
const ENFORCEMENT_ACTION_AUDIT: u8 = 1;
419+
const ENFORCEMENT_ACTION_DENY: u8 = 2;
409420
const ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT: u8 = 5;
421+
const ENFORCEMENT_GUARANTEE_BEST_EFFORT: u8 = 2;
410422
const ENFORCEMENT_GUARANTEE_AUDIT_ONLY: u8 = 3;
411423

412424
struct ProcBlockEventData {
@@ -725,6 +737,7 @@ impl ProcEvent {
725737
parent_pid: data.parent_pid,
726738
uid: data.uid,
727739
gid: data.gid,
740+
timestamp: data.timestamp,
728741
comm: &data.comm,
729742
cmdline: &data.cmdline,
730743
exec_path: &data.exec_path,
@@ -734,6 +747,19 @@ impl ProcEvent {
734747
}
735748
}
736749

750+
pub fn proc_block_info(&self) -> Option<ProcBlockInfo<'_>> {
751+
match &self.event_data {
752+
EventData::ProcBlockEvent(data) => Some(ProcBlockInfo {
753+
pid: data.pid,
754+
action: data.action,
755+
timestamp: data.timestamp,
756+
rule_id: data.rule_id.as_str(),
757+
exec_path: &data.exec_path,
758+
}),
759+
_ => None,
760+
}
761+
}
762+
737763
pub fn new_proc_block_event_for_audit(
738764
&self,
739765
rule_id: &str,
@@ -786,6 +812,59 @@ impl ProcEvent {
786812
event_data: EventData::ProcBlockEvent(block_event),
787813
})))
788814
}
815+
816+
pub fn new_proc_block_event_for_best_effort(
817+
&self,
818+
rule_id: &str,
819+
policy_epoch: u64,
820+
) -> Option<BoxedProcEvents> {
821+
let data = match &self.event_data {
822+
EventData::ProcLifecycleEvent(data) if data.lifecycle_type == PROC_LIFECYCLE_EXEC => {
823+
data
824+
}
825+
_ => return None,
826+
};
827+
let process_kname = if data.comm.is_empty() {
828+
self.process_kname.clone()
829+
} else {
830+
data.comm.clone()
831+
};
832+
let root_pid = self.ai_agent_root_pid;
833+
let block_event = ProcBlockEventData {
834+
rule_id: rule_id.chars().take(PROC_BLOCK_RULE_ID_LEN).collect(),
835+
target_type: ENFORCEMENT_TARGET_EXEC,
836+
action: ENFORCEMENT_ACTION_DENY,
837+
mechanism: enforcement_mechanism_name(ENFORCEMENT_MECHANISM_USER_SPACE_AUDIT)
838+
.to_string(),
839+
guarantee: enforcement_guarantee_name(ENFORCEMENT_GUARANTEE_BEST_EFFORT).to_string(),
840+
errno: 1,
841+
pid: data.pid,
842+
parent_pid: data.parent_pid,
843+
ai_agent_root_pid: root_pid,
844+
uid: data.uid,
845+
gid: data.gid,
846+
comm: process_kname.clone(),
847+
cmdline: truncate_bytes(&data.cmdline, PROC_BLOCK_CMDLINE_LEN),
848+
exec_path: truncate_bytes(&data.exec_path, PROC_BLOCK_EXEC_PATH_LEN),
849+
syscall_name: String::new(),
850+
syscall_id: 0,
851+
timestamp: data.timestamp,
852+
policy_epoch,
853+
};
854+
855+
Some(BoxedProcEvents(Box::new(ProcEvent {
856+
pid: data.pid,
857+
pod_id: self.pod_id,
858+
ai_agent_root_pid: root_pid,
859+
thread_id: self.thread_id,
860+
coroutine_id: self.coroutine_id,
861+
process_kname,
862+
start_time: self.start_time,
863+
end_time: self.end_time,
864+
event_type: EventType::ProcBlockEvent,
865+
event_data: EventData::ProcBlockEvent(block_event),
866+
})))
867+
}
789868
}
790869

791870
fn truncate_bytes(bytes: &[u8], limit: usize) -> Vec<u8> {
@@ -1025,6 +1104,49 @@ mod tests {
10251104
assert_eq!(block.exec_path, b"/sbin/reboot");
10261105
}
10271106

1107+
#[test]
1108+
fn test_new_proc_block_event_for_best_effort_encodes_proc_block_event() {
1109+
let proc_event = ProcEvent {
1110+
pid: 13,
1111+
pod_id: 7,
1112+
ai_agent_root_pid: 100,
1113+
thread_id: 13,
1114+
coroutine_id: 0,
1115+
process_kname: b"reboot".to_vec(),
1116+
start_time: 42,
1117+
end_time: 43,
1118+
event_type: EventType::ProcLifecycleEvent,
1119+
event_data: EventData::ProcLifecycleEvent(ProcLifecycleEventData {
1120+
lifecycle_type: PROC_LIFECYCLE_EXEC,
1121+
pid: 13,
1122+
parent_pid: 10,
1123+
uid: 1000,
1124+
gid: 1000,
1125+
timestamp: 42,
1126+
comm: b"reboot".to_vec(),
1127+
cmdline: b"reboot now".to_vec(),
1128+
exec_path: b"/sbin/reboot".to_vec(),
1129+
}),
1130+
};
1131+
1132+
let boxed = proc_event
1133+
.new_proc_block_event_for_best_effort("block-reboot", 99)
1134+
.unwrap();
1135+
let mut buf = Vec::new();
1136+
boxed.encode(&mut buf).unwrap();
1137+
let pb = metric::ProcEvent::decode(buf.as_slice()).unwrap();
1138+
let block = pb.proc_block_event_data.unwrap();
1139+
1140+
assert_eq!(pb.event_type, metric::EventType::ProcBlockEvent as i32);
1141+
assert_eq!(block.rule_id, "block-reboot");
1142+
assert_eq!(block.action, metric::EnforcementAction::Deny as i32);
1143+
assert_eq!(block.mechanism, "user_space_audit");
1144+
assert_eq!(block.guarantee, "best_effort");
1145+
assert_eq!(block.errno, 1);
1146+
assert_eq!(block.ai_agent_root_pid, 100);
1147+
assert_eq!(block.exec_path, b"/sbin/reboot");
1148+
}
1149+
10281150
fn make_proc_block_raw(
10291151
target_type: u8,
10301152
action: u8,

agent/src/ebpf/test/test_ai_agent_source_contracts.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,20 @@ def read_source(path: Path) -> str:
405405
"buf->path,\n\t\t\t\t AI_AGENT_EXEC_PATTERN_LEN" in exec_override_text,
406406
"AI Agent exec override must report exec_path as cmdline placeholder instead of a partial argv slot",
407407
)
408+
file_io_bpf = ENTERPRISE_BPF / "ai_agent_file_io.bpf.c"
409+
file_io_text = read_source(file_io_bpf)
410+
require(
411+
"for (__u32 attempt = 0; attempt < 3; attempt++)" in exec_override_standalone_text
412+
and "ret = bpf_perf_event_output(" in exec_override_standalone_text
413+
and "if (ret >= 0)" in exec_override_standalone_text,
414+
"standalone exec override helper must retry perf event output up to 3 attempts",
415+
)
416+
require(
417+
"for (__u32 attempt = 0; attempt < 3; attempt++)" in file_io_text
418+
and "ret = bpf_perf_event_output(" in file_io_text
419+
and "if (ret >= 0)" in file_io_text,
420+
"shared AI Agent event helper must retry perf event output up to 3 attempts",
421+
)
408422
syscall_override_bpf = ENTERPRISE_BPF / "ai_agent_syscall_override.bpf.c"
409423
require(
410424
syscall_override_bpf.exists(),

agent/src/ebpf_dispatcher.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,17 @@ fn main() {
4040
#[cfg(feature = "extended_observability")]
4141
pub mod memory_profile;
4242

43+
#[cfg(feature = "enterprise")]
44+
use std::collections::HashMap;
4345
use std::ffi::{CStr, CString};
4446
use std::ptr::{self, null_mut};
4547
use std::slice;
4648
#[cfg(feature = "enterprise")]
4749
use std::sync::atomic::AtomicI32;
4850
use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
4951
use std::sync::Arc;
52+
#[cfg(feature = "enterprise")]
53+
use std::sync::{Mutex, OnceLock};
5054
use std::thread::{self, JoinHandle};
5155
use std::time::Duration;
5256

@@ -241,6 +245,98 @@ fn emit_ai_agent_enforcement_audit_event(event: &BoxedProcEvents) {
241245
}
242246
}
243247

248+
#[cfg(feature = "enterprise")]
249+
fn kernel_block_event_cache() -> &'static Mutex<HashMap<KernelBlockMarkerKey, u64>> {
250+
RECENT_KERNEL_BLOCK_EVENTS.get_or_init(|| Mutex::new(HashMap::new()))
251+
}
252+
253+
#[cfg(feature = "enterprise")]
254+
fn prune_kernel_block_event_cache(cache: &mut HashMap<KernelBlockMarkerKey, u64>, now: u64) {
255+
cache.retain(|_, ts| now.saturating_sub(*ts) <= KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS);
256+
}
257+
258+
#[cfg(feature = "enterprise")]
259+
fn record_kernel_block_event(event: &BoxedProcEvents) {
260+
let Some(info) = event.0.proc_block_info() else {
261+
return;
262+
};
263+
if info.action != metric::EnforcementAction::Deny as u8 || info.exec_path.is_empty() {
264+
return;
265+
}
266+
let mut cache = kernel_block_event_cache().lock().unwrap();
267+
prune_kernel_block_event_cache(&mut cache, info.timestamp);
268+
cache.insert(
269+
KernelBlockMarkerKey {
270+
pid: info.pid,
271+
rule_id: info.rule_id.to_string(),
272+
exec_path: info.exec_path.to_vec(),
273+
},
274+
info.timestamp,
275+
);
276+
}
277+
278+
#[cfg(feature = "enterprise")]
279+
fn consume_recent_kernel_block_event(pid: u32, rule_id: &str, exec_path: &[u8], now: u64) -> bool {
280+
let mut cache = kernel_block_event_cache().lock().unwrap();
281+
prune_kernel_block_event_cache(&mut cache, now);
282+
cache
283+
.remove(&KernelBlockMarkerKey {
284+
pid,
285+
rule_id: rule_id.to_string(),
286+
exec_path: exec_path.to_vec(),
287+
})
288+
.map(|ts| now.saturating_sub(ts) <= KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS)
289+
.unwrap_or(false)
290+
}
291+
292+
#[cfg(feature = "enterprise")]
293+
#[allow(static_mut_refs)]
294+
fn emit_ai_agent_enforcement_best_effort_event(event: &BoxedProcEvents) {
295+
use enterprise_utils::ai_agent_enforcement::EnforcementMode;
296+
297+
if event.0.ai_agent_root_pid == 0 {
298+
return;
299+
}
300+
let Some(exec_info) = event.0.proc_lifecycle_exec_info() else {
301+
return;
302+
};
303+
if exec_info.exec_path.is_empty() {
304+
return;
305+
}
306+
let Some(policy) = enterprise_utils::ai_agent_enforcement::global_exec_policy() else {
307+
return;
308+
};
309+
let exec_path = String::from_utf8_lossy(exec_info.exec_path);
310+
let cmdline = String::from_utf8_lossy(exec_info.cmdline);
311+
let Some(hit) = policy.match_exec(&exec_path, &cmdline) else {
312+
return;
313+
};
314+
if hit.mode != EnforcementMode::Block {
315+
return;
316+
}
317+
if consume_recent_kernel_block_event(
318+
exec_info.pid,
319+
&hit.rule_id,
320+
exec_info.exec_path,
321+
exec_info.timestamp,
322+
) {
323+
return;
324+
}
325+
let Some(best_effort_event) = event
326+
.0
327+
.new_proc_block_event_for_best_effort(&hit.rule_id, policy.epoch)
328+
else {
329+
return;
330+
};
331+
unsafe {
332+
if let Some(sender) = PROC_EVENT_SENDER.as_mut() {
333+
if let Err(e) = sender.send(best_effort_event) {
334+
warn!("ai agent enforcement best_effort event send error: {:?}", e);
335+
}
336+
}
337+
}
338+
}
339+
244340
impl OwnedCountable for SyncEbpfCounter {
245341
fn get_counters(&self) -> Vec<Counter> {
246342
let rx = self.counter.rx.swap(0, Ordering::Relaxed);
@@ -705,9 +801,22 @@ static AI_AGENT_EXEC_LSM_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false);
705801
#[cfg(feature = "enterprise")]
706802
static AI_AGENT_EXEC_KPROBE_EVENTS_ACTIVE: AtomicBool = AtomicBool::new(false);
707803
#[cfg(feature = "enterprise")]
804+
static RECENT_KERNEL_BLOCK_EVENTS: OnceLock<Mutex<HashMap<KernelBlockMarkerKey, u64>>> =
805+
OnceLock::new();
806+
#[cfg(feature = "enterprise")]
708807
const AI_AGENT_EXEC_RULES_BPF_MAX: usize = 256;
709808
#[cfg(feature = "enterprise")]
710809
const AI_AGENT_SYSCALL_RULES_BPF_MAX: usize = 32;
810+
#[cfg(feature = "enterprise")]
811+
const KERNEL_BLOCK_EVENT_CACHE_WINDOW_NS: u64 = 5_000_000_000;
812+
813+
#[cfg(feature = "enterprise")]
814+
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
815+
struct KernelBlockMarkerKey {
816+
pid: u32,
817+
rule_id: String,
818+
exec_path: Vec<u8>,
819+
}
711820

712821
#[cfg(feature = "enterprise")]
713822
fn ai_agent_enforcement_mode_eq(value: &str, expected: &str) -> bool {
@@ -922,7 +1031,11 @@ impl EbpfCollector {
9221031
#[cfg(feature = "enterprise")]
9231032
fill_ai_agent_root_pid(&mut event);
9241033
#[cfg(feature = "enterprise")]
1034+
record_kernel_block_event(&event);
1035+
#[cfg(feature = "enterprise")]
9251036
emit_ai_agent_enforcement_audit_event(&event);
1037+
#[cfg(feature = "enterprise")]
1038+
emit_ai_agent_enforcement_best_effort_event(&event);
9261039
if let Err(e) = PROC_EVENT_SENDER.as_mut().unwrap().send(event) {
9271040
warn!("event send ebpf error: {:?}", e);
9281041
}

0 commit comments

Comments
 (0)