Skip to content

Commit 8fce81b

Browse files
committed
Add crash show registgers and per-frame args
1 parent 658bbe6 commit 8fce81b

File tree

3 files changed

+763
-25
lines changed

3 files changed

+763
-25
lines changed

agent/src/ebpf/user/crash_monitor.c

Lines changed: 225 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171

7272
#define CRASH_ALTSTACK_SIZE (64 * 1024)
7373
#define CRASH_SNAPSHOT_VERSION_V2 2
74+
#define CRASH_SNAPSHOT_VERSION_V3 3
7475

7576
struct crash_snapshot_record_header {
7677
uint32_t magic;
@@ -79,6 +80,13 @@ struct crash_snapshot_record_header {
7980
uint32_t size;
8081
};
8182

83+
struct crash_snapshot_frame_v3 {
84+
uint64_t absolute_pc;
85+
uint64_t rel_pc;
86+
uint32_t module_index;
87+
uint32_t reserved;
88+
};
89+
8290
struct crash_snapshot_record_v2 {
8391
uint32_t magic;
8492
uint16_t version;
@@ -98,7 +106,30 @@ struct crash_snapshot_record_v2 {
98106
uint32_t modules_count;
99107
uint32_t frames_count;
100108
struct crash_snapshot_module modules[CRASH_SNAPSHOT_MAX_MODULES];
101-
struct crash_snapshot_frame frames[CRASH_SNAPSHOT_MAX_FRAMES];
109+
struct crash_snapshot_frame_v3 frames[CRASH_SNAPSHOT_MAX_FRAMES];
110+
};
111+
112+
struct crash_snapshot_record_v3 {
113+
uint32_t magic;
114+
uint16_t version;
115+
uint16_t arch;
116+
uint32_t size;
117+
uint32_t signal;
118+
int32_t si_code;
119+
uint32_t pid;
120+
uint32_t tid;
121+
uint64_t fault_addr;
122+
uint64_t ip;
123+
uint64_t sp;
124+
uint64_t fp;
125+
uint64_t lr;
126+
uint64_t args[CRASH_SNAPSHOT_ARG_REGS];
127+
char executable_path[CRASH_SNAPSHOT_MODULE_PATH_LEN];
128+
char thread_name[CRASH_SNAPSHOT_TASK_NAME_LEN];
129+
uint32_t modules_count;
130+
uint32_t frames_count;
131+
struct crash_snapshot_module modules[CRASH_SNAPSHOT_MAX_MODULES];
132+
struct crash_snapshot_frame_v3 frames[CRASH_SNAPSHOT_MAX_FRAMES];
102133
};
103134

104135
/*
@@ -505,27 +536,117 @@ static int crash_open_snapshot_file(void)
505536
*/
506537
static void crash_fill_frame(struct crash_snapshot_record *record,
507538
struct crash_snapshot_frame *frame,
508-
uint64_t absolute_pc)
539+
uint64_t absolute_pc, uint64_t frame_fp,
540+
uint32_t frame_flags)
509541
{
510542
frame->absolute_pc = absolute_pc;
511543
frame->rel_pc = 0;
544+
frame->frame_fp = frame_fp;
512545
frame->module_index = CRASH_SNAPSHOT_INVALID_MODULE;
513-
frame->reserved = 0;
546+
frame->frame_flags = frame_flags;
514547
if (record != NULL)
515548
crash_fill_frame_module(record, frame);
516549
}
517550

518551
static int crash_append_frame(struct crash_snapshot_record *record,
519-
uint64_t absolute_pc)
552+
uint64_t absolute_pc, uint64_t frame_fp,
553+
uint32_t frame_flags)
520554
{
521555
if (absolute_pc == 0 || record->frames_count >= CRASH_SNAPSHOT_MAX_FRAMES)
522556
return 0;
523557

524-
crash_fill_frame(record, &record->frames[record->frames_count], absolute_pc);
558+
crash_fill_frame(record, &record->frames[record->frames_count], absolute_pc,
559+
frame_fp, frame_flags);
525560
record->frames_count++;
526561
return 1;
527562
}
528563

564+
static void crash_capture_stack_window(struct crash_snapshot_record *record,
565+
uintptr_t stack_floor,
566+
uintptr_t stack_ceil)
567+
{
568+
uintptr_t start = 0;
569+
size_t copy_size;
570+
571+
if (record == NULL)
572+
return;
573+
if (record->sp >= stack_floor && record->sp < stack_ceil)
574+
start = (uintptr_t)record->sp;
575+
if (record->fp >= stack_floor && record->fp < stack_ceil &&
576+
(start == 0 || record->fp < start))
577+
start = (uintptr_t)record->fp;
578+
if (start == 0 || start >= stack_ceil)
579+
return;
580+
581+
copy_size = (size_t)(stack_ceil - start);
582+
if (copy_size > CRASH_SNAPSHOT_STACK_WINDOW_SIZE)
583+
copy_size = CRASH_SNAPSHOT_STACK_WINDOW_SIZE;
584+
if (copy_size == 0)
585+
return;
586+
587+
crash_copy_bytes(record->stack_window, sizeof(record->stack_window),
588+
(const void *)start, copy_size);
589+
record->stack_window_start = (uint64_t)start;
590+
record->stack_window_size = (uint32_t)copy_size;
591+
record->capture_flags |= CRASH_SNAPSHOT_FLAG_STACK_WINDOW;
592+
}
593+
594+
static void crash_capture_registers_from_ucontext(struct crash_snapshot_record *record,
595+
ucontext_t *ctx)
596+
{
597+
if (record == NULL || ctx == NULL)
598+
return;
599+
600+
#if defined(__x86_64__)
601+
record->registers.x86_64.rax =
602+
(uint64_t)ctx->uc_mcontext.gregs[REG_RAX];
603+
record->registers.x86_64.rbx =
604+
(uint64_t)ctx->uc_mcontext.gregs[REG_RBX];
605+
record->registers.x86_64.rcx =
606+
(uint64_t)ctx->uc_mcontext.gregs[REG_RCX];
607+
record->registers.x86_64.rdx =
608+
(uint64_t)ctx->uc_mcontext.gregs[REG_RDX];
609+
record->registers.x86_64.rsi =
610+
(uint64_t)ctx->uc_mcontext.gregs[REG_RSI];
611+
record->registers.x86_64.rdi =
612+
(uint64_t)ctx->uc_mcontext.gregs[REG_RDI];
613+
record->registers.x86_64.rbp =
614+
(uint64_t)ctx->uc_mcontext.gregs[REG_RBP];
615+
record->registers.x86_64.rsp =
616+
(uint64_t)ctx->uc_mcontext.gregs[REG_RSP];
617+
record->registers.x86_64.r8 =
618+
(uint64_t)ctx->uc_mcontext.gregs[REG_R8];
619+
record->registers.x86_64.r9 =
620+
(uint64_t)ctx->uc_mcontext.gregs[REG_R9];
621+
record->registers.x86_64.r10 =
622+
(uint64_t)ctx->uc_mcontext.gregs[REG_R10];
623+
record->registers.x86_64.r11 =
624+
(uint64_t)ctx->uc_mcontext.gregs[REG_R11];
625+
record->registers.x86_64.r12 =
626+
(uint64_t)ctx->uc_mcontext.gregs[REG_R12];
627+
record->registers.x86_64.r13 =
628+
(uint64_t)ctx->uc_mcontext.gregs[REG_R13];
629+
record->registers.x86_64.r14 =
630+
(uint64_t)ctx->uc_mcontext.gregs[REG_R14];
631+
record->registers.x86_64.r15 =
632+
(uint64_t)ctx->uc_mcontext.gregs[REG_R15];
633+
record->registers.x86_64.rip =
634+
(uint64_t)ctx->uc_mcontext.gregs[REG_RIP];
635+
record->registers.x86_64.eflags =
636+
(uint64_t)ctx->uc_mcontext.gregs[REG_EFL];
637+
record->capture_flags |= CRASH_SNAPSHOT_FLAG_FULL_REGS;
638+
#elif defined(__aarch64__)
639+
for (size_t i = 0; i < 31; i++)
640+
record->registers.aarch64.x[i] = (uint64_t)ctx->uc_mcontext.regs[i];
641+
record->registers.aarch64.sp = (uint64_t)ctx->uc_mcontext.sp;
642+
record->registers.aarch64.pc = (uint64_t)ctx->uc_mcontext.pc;
643+
record->registers.aarch64.pstate = (uint64_t)ctx->uc_mcontext.pstate;
644+
record->capture_flags |= CRASH_SNAPSHOT_FLAG_FULL_REGS;
645+
#else
646+
(void)ctx;
647+
#endif
648+
}
649+
529650
static void crash_cache_thread_stack_bounds(void)
530651
{
531652
pthread_attr_t attr;
@@ -654,6 +775,7 @@ static uint32_t crash_collect_frames(struct crash_snapshot_frame *frames,
654775
uintptr_t *frame;
655776
uintptr_t next_fp;
656777
uintptr_t return_addr;
778+
uint64_t frame_hint = 0;
657779

658780
if ((current_fp & (sizeof(uintptr_t) - 1)) != 0)
659781
break;
@@ -668,13 +790,18 @@ static uint32_t crash_collect_frames(struct crash_snapshot_frame *frames,
668790
return_addr = frame[1];
669791
if (return_addr == 0)
670792
break;
671-
crash_fill_frame(NULL, &frames[count++], (uint64_t)return_addr);
672-
if (!crash_is_frame_pointer_valid(current_fp, next_fp, stack_floor,
793+
if (crash_is_frame_pointer_valid(current_fp, next_fp, stack_floor,
673794
stack_ceil))
795+
frame_hint = (uint64_t)next_fp;
796+
crash_fill_frame(NULL, &frames[count++], (uint64_t)return_addr,
797+
frame_hint, CRASH_SNAPSHOT_FRAME_FP_WALK);
798+
if (frame_hint == 0)
674799
break;
675800
current_fp = next_fp;
676801
}
677802

803+
if (count == max_frames && current_fp != 0)
804+
frames[count - 1].frame_flags |= CRASH_SNAPSHOT_FRAME_TRUNCATED;
678805
return count;
679806
}
680807

@@ -687,6 +814,8 @@ static uint32_t crash_collect_frames(struct crash_snapshot_frame *frames,
687814
* - architecture id,
688815
* - top-frame control registers (IP/SP/FP/LR),
689816
* - a best-effort set of ABI argument registers,
817+
* - the full general-purpose register block for v4 snapshots,
818+
* - a bounded stack window anchored in the crashing thread's normal stack,
690819
* - and a bounded stack trace candidate built from the frame pointer.
691820
*
692821
* The argument capture is intentionally limited to the crashing frame's raw ABI
@@ -722,7 +851,10 @@ static void crash_fill_record_from_ucontext(struct crash_snapshot_record *record
722851
record->args[3] = (uint64_t)ctx->uc_mcontext.gregs[REG_RCX];
723852
record->args[4] = (uint64_t)ctx->uc_mcontext.gregs[REG_R8];
724853
record->args[5] = (uint64_t)ctx->uc_mcontext.gregs[REG_R9];
725-
(void)crash_append_frame(record, record->ip);
854+
crash_capture_registers_from_ucontext(record, ctx);
855+
crash_capture_stack_window(record, stack_floor, stack_ceil);
856+
(void)crash_append_frame(record, record->ip, record->fp,
857+
CRASH_SNAPSHOT_FRAME_TOP);
726858
record->frames_count +=
727859
crash_collect_frames(record->frames + record->frames_count,
728860
CRASH_SNAPSHOT_MAX_FRAMES -
@@ -743,13 +875,17 @@ static void crash_fill_record_from_ucontext(struct crash_snapshot_record *record
743875
record->args[5] = (uint64_t)ctx->uc_mcontext.regs[5];
744876
record->args[6] = (uint64_t)ctx->uc_mcontext.regs[6];
745877
record->args[7] = (uint64_t)ctx->uc_mcontext.regs[7];
746-
(void)crash_append_frame(record, record->ip);
878+
crash_capture_registers_from_ucontext(record, ctx);
879+
crash_capture_stack_window(record, stack_floor, stack_ceil);
880+
(void)crash_append_frame(record, record->ip, record->fp,
881+
CRASH_SNAPSHOT_FRAME_TOP);
747882
/*
748883
* On AArch64 the link register often contains a useful caller hint even when
749884
* the full frame walk is short. Record it as an extra top-level clue before
750885
* continuing along the frame-pointer chain.
751886
*/
752-
(void)crash_append_frame(record, record->lr);
887+
(void)crash_append_frame(record, record->lr, record->fp,
888+
CRASH_SNAPSHOT_FRAME_LR_HINT);
753889
record->frames_count +=
754890
crash_collect_frames(record->frames + record->frames_count,
755891
CRASH_SNAPSHOT_MAX_FRAMES -
@@ -871,6 +1007,9 @@ static int crash_install_signal_handlers(void)
8711007
static void crash_upgrade_v2_record(struct crash_snapshot_record *dst,
8721008
const struct crash_snapshot_record_v2 *src)
8731009
{
1010+
uint32_t frames;
1011+
uint32_t i;
1012+
8741013
if (dst == NULL || src == NULL)
8751014
return;
8761015

@@ -896,8 +1035,63 @@ static void crash_upgrade_v2_record(struct crash_snapshot_record *dst,
8961035
dst->frames_count = src->frames_count;
8971036
crash_copy_bytes(dst->modules, sizeof(dst->modules), src->modules,
8981037
sizeof(src->modules));
899-
crash_copy_bytes(dst->frames, sizeof(dst->frames), src->frames,
900-
sizeof(src->frames));
1038+
frames = dst->frames_count;
1039+
if (frames > CRASH_SNAPSHOT_MAX_FRAMES)
1040+
frames = CRASH_SNAPSHOT_MAX_FRAMES;
1041+
for (i = 0; i < frames; i++) {
1042+
dst->frames[i].absolute_pc = src->frames[i].absolute_pc;
1043+
dst->frames[i].rel_pc = src->frames[i].rel_pc;
1044+
dst->frames[i].frame_fp = 0;
1045+
dst->frames[i].module_index = src->frames[i].module_index;
1046+
dst->frames[i].frame_flags =
1047+
i == 0 ? CRASH_SNAPSHOT_FRAME_TOP : CRASH_SNAPSHOT_FRAME_FP_WALK;
1048+
}
1049+
}
1050+
1051+
static void crash_upgrade_v3_record(struct crash_snapshot_record *dst,
1052+
const struct crash_snapshot_record_v3 *src)
1053+
{
1054+
uint32_t frames;
1055+
uint32_t i;
1056+
1057+
if (dst == NULL || src == NULL)
1058+
return;
1059+
1060+
memset(dst, 0, sizeof(*dst));
1061+
dst->magic = src->magic;
1062+
dst->version = CRASH_SNAPSHOT_VERSION;
1063+
dst->arch = src->arch;
1064+
dst->size = sizeof(*dst);
1065+
dst->signal = src->signal;
1066+
dst->si_code = src->si_code;
1067+
dst->pid = src->pid;
1068+
dst->tid = src->tid;
1069+
dst->fault_addr = src->fault_addr;
1070+
dst->ip = src->ip;
1071+
dst->sp = src->sp;
1072+
dst->fp = src->fp;
1073+
dst->lr = src->lr;
1074+
crash_copy_bytes(dst->args, sizeof(dst->args), src->args,
1075+
sizeof(src->args));
1076+
crash_copy_cstr(dst->executable_path, sizeof(dst->executable_path),
1077+
src->executable_path);
1078+
crash_copy_cstr(dst->thread_name, sizeof(dst->thread_name),
1079+
src->thread_name);
1080+
dst->modules_count = src->modules_count;
1081+
dst->frames_count = src->frames_count;
1082+
crash_copy_bytes(dst->modules, sizeof(dst->modules), src->modules,
1083+
sizeof(src->modules));
1084+
frames = dst->frames_count;
1085+
if (frames > CRASH_SNAPSHOT_MAX_FRAMES)
1086+
frames = CRASH_SNAPSHOT_MAX_FRAMES;
1087+
for (i = 0; i < frames; i++) {
1088+
dst->frames[i].absolute_pc = src->frames[i].absolute_pc;
1089+
dst->frames[i].rel_pc = src->frames[i].rel_pc;
1090+
dst->frames[i].frame_fp = 0;
1091+
dst->frames[i].module_index = src->frames[i].module_index;
1092+
dst->frames[i].frame_flags =
1093+
i == 0 ? CRASH_SNAPSHOT_FRAME_TOP : CRASH_SNAPSHOT_FRAME_FP_WALK;
1094+
}
9011095
}
9021096

9031097
static int crash_read_next_pending_record(int fd,
@@ -963,6 +1157,25 @@ static int crash_read_next_pending_record(int fd,
9631157
return ETR_OK;
9641158
}
9651159

1160+
if (header.version == CRASH_SNAPSHOT_VERSION_V3 &&
1161+
header.size == sizeof(struct crash_snapshot_record_v3)) {
1162+
struct crash_snapshot_record_v3 old_record;
1163+
1164+
memset(&old_record, 0, sizeof(old_record));
1165+
old_record.magic = header.magic;
1166+
old_record.version = header.version;
1167+
old_record.arch = header.arch;
1168+
old_record.size = header.size;
1169+
remain = (ssize_t)sizeof(old_record) - (ssize_t)sizeof(header);
1170+
nread = read(fd, (char *)&old_record + sizeof(header),
1171+
(size_t)remain);
1172+
*nread_out = sizeof(header) + nread;
1173+
if (nread != remain)
1174+
return ETR_INVAL;
1175+
crash_upgrade_v3_record(record, &old_record);
1176+
return ETR_OK;
1177+
}
1178+
9661179
*nread_out = sizeof(header);
9671180
return ETR_NOTEXIST;
9681181
}

0 commit comments

Comments
 (0)