diff --git a/interpreter/customlabels/customlabels.go b/interpreter/customlabels/customlabels.go index 5eaa107d0..1de827a80 100644 --- a/interpreter/customlabels/customlabels.go +++ b/interpreter/customlabels/customlabels.go @@ -4,7 +4,6 @@ package customlabels // import "go.opentelemetry.io/ebpf-profiler/interpreter/cu // #include "../../support/ebpf/types.h" import "C" import ( - "debug/elf" "errors" "fmt" "regexp" @@ -31,13 +30,6 @@ type data struct { var _ interpreter.Data = &data{} -func roundUp(multiple, value uint64) uint64 { - if multiple == 0 { - return value - } - return (value + multiple - 1) / multiple * multiple -} - func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { ef, err := info.GetELF() if err != nil { @@ -75,39 +67,11 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete return nil, errors.New("failed to locate TLS descriptor for custom labels") } } else { - tlsSym, err := ef.LookupSymbol(tlsExport) + offset, err := ef.LookupTLSSymbolOffset(tlsExport) if err != nil { - return nil, err - } - if ef.Machine == elf.EM_AARCH64 { - tlsAddr = libpf.Address(tlsSym.Address) - } else if ef.Machine == elf.EM_X86_64 { - // Symbol addresses are relative to the start of the - // thread-local storage image, but the thread pointer points to the _end_ - // of the image. So we need to find the size of the image in order to know where the - // beginning is. - // - // The image is just .tdata followed by .tbss, - // but we also have to respect the alignment. - tbss, err := ef.Tbss() - if err != nil { - return nil, err - } - tdata, err := ef.Tdata() - var tdataSize uint64 - if err != nil { - // No Tdata is ok, it's the same as size 0 - if err != pfelf.ErrNoTdata { - return nil, err - } - } else { - tdataSize = tdata.Size - } - imageSize := roundUp(tbss.Addralign, tdataSize) + tbss.Size - tlsAddr = libpf.Address(int64(tlsSym.Address) - int64(imageSize)) - } else { - return nil, fmt.Errorf("unrecognized machine: %s", ef.Machine.String()) + return nil, fmt.Errorf("failed to get tls symbol offset: %w", err) } + tlsAddr = libpf.Address(offset) } d := data{ diff --git a/interpreter/luajit/extractor_x86.go b/interpreter/luajit/extractor_x86.go index 1c5ff8051..2933c62cc 100644 --- a/interpreter/luajit/extractor_x86.go +++ b/interpreter/luajit/extractor_x86.go @@ -13,9 +13,9 @@ package luajit // import "go.opentelemetry.io/ebpf-profiler/interpreter/luajit" import ( "errors" - "slices" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" + xh "go.opentelemetry.io/ebpf-profiler/x86helpers" "golang.org/x/arch/x86/x86asm" ) @@ -50,7 +50,7 @@ which is a dynamic public symbol that should be in all binaries of LuaJIT includ */ //nolint:nonamedreturns func (x *x86Extractor) findOffsetsFromLuaClose(b []byte) (glref, curL uint64, err error) { - b, _ = skipEndBranch(b) + b, _ = xh.SkipEndBranch(b) var greg x86asm.Reg for len(b) > 0 { var i x86asm.Inst @@ -101,7 +101,7 @@ func (x *x86Extractor) findOffsetsFromLuaClose(b []byte) (glref, curL uint64, er // 0xfa8 is the g to dispatch offset. // https://github.com/openresty/luajit2/blob/7952882d/src/lj_dispatch.c#L122 func (x *x86Extractor) findG2DispatchOffsetFromLjDispatchUpdate(b []byte) (uint64, error) { - b, _ = skipEndBranch(b) + b, _ = xh.SkipEndBranch(b) var greg x86asm.Reg for len(b) > 0 { i, err := x86asm.Decode(b, 64) @@ -159,7 +159,7 @@ func (x *x86Extractor) findG2DispatchOffsetFromLjDispatchUpdate(b []byte) (uint6 // //nolint:lll func (x *x86Extractor) findLjDispatchUpdateAddr(b []byte, addr uint64) (uint64, error) { - b, ip := skipEndBranch(b) + b, ip := xh.SkipEndBranch(b) var Lreg x86asm.Reg rdiHasG := false for len(b) > 0 { @@ -212,7 +212,7 @@ func (x *x86Extractor) findLjDispatchUpdateAddr(b []byte, addr uint64) (uint64, // ----------- 0x430 is the G to J->traces offset // libluajit-5.1.so[0x637a1] <+33>: movq 0x430(%rdx), %rdx func (x *x86Extractor) findG2TracesOffsetFromChecktrace(b []byte) (uint64, error) { - b, _ = skipEndBranch(b) + b, _ = xh.SkipEndBranch(b) var Greg x86asm.Reg for len(b) > 0 { i, err := x86asm.Decode(b, 64) @@ -236,7 +236,7 @@ func (x *x86Extractor) findG2TracesOffsetFromChecktrace(b []byte) (uint64, error } func (x *x86Extractor) findFirstCall(b []byte, baseAddr int64) (uint64, error) { - b, ip := skipEndBranch(b) + b, ip := xh.SkipEndBranch(b) for len(b) > 0 { i, err := x86asm.Decode(b, 64) if err != nil { @@ -258,7 +258,7 @@ func (x *x86Extractor) findFirstCall(b []byte, baseAddr int64) (uint64, error) { // Return true if the code in b calls targetCall. func (x *x86Extractor) callExists(b []byte, baseAddr, targetCall int64) (bool, error) { - b, ip := skipEndBranch(b) + b, ip := xh.SkipEndBranch(b) for len(b) > 0 { i, err := x86asm.Decode(b, 64) if err != nil { @@ -289,7 +289,7 @@ func (x *x86Extractor) callExists(b []byte, baseAddr, targetCall int64) (bool, e func findRipRelativeLea2ndArgTo2ndCall(b []byte, baseAddr, targetCall int64) (uint64, error) { var leaRsi int64 calls := 2 - b, ip := skipEndBranch(b) + b, ip := xh.SkipEndBranch(b) for len(b) > 0 { i, err := x86asm.Decode(b, 64) if err != nil { @@ -364,7 +364,7 @@ func skipCallsAABA(b []byte, ip, baseAddr int64) ([]byte, int64, error) { func (x *x86Extractor) find3rdArgToLibPreregCall(b []byte, baseAddr int64) (uint64, error) { var rdxAddr int64 calls := 3 - b, ip := skipEndBranch(b) + b, ip := xh.SkipEndBranch(b) // Skip the lua_push* call sequence (and all the preceding calls which varies depending on // inlining). // libluajit-5.1.so[0x700a5] <+133>: movq %rbx, %rdi @@ -432,7 +432,7 @@ func (x *x86Extractor) find3rdArgToLibPreregCall(b []byte, baseAddr int64) (uint // bbc2: c3 ret func (x *x86Extractor) find4thArgToLibRegCall(b []byte, baseAddr int64) (int64, error) { var ip int64 - b, ip = skipEndBranch(b) + b, ip = xh.SkipEndBranch(b) for len(b) > 0 { i, err := x86asm.Decode(b, 64) if err != nil { @@ -468,24 +468,6 @@ func calcRipRelativeAddr(a1 x86asm.Mem, baseAddr, ip int64) int64 { return baseAddr + ip + int64(disp) } -var endbr64 = [4]byte{0xf3, 0x0f, 0x1e, 0xfa} - -// On some binaries the function starts like this: -// -// 0x0000000000012860 <+0>: f3 0f 1e fa endbr64 -// 0x0000000000012864 <+4>: 41 55 push %r13 -// -// This is some kind of stack smashing indirect jump protection, treat it as a nop, -// x86asm doesn't know how to handle it. -// -//nolint:gocritic -func skipEndBranch(b []byte) ([]byte, int64) { - if slices.Equal(b[0:4], endbr64[:]) { - return b[4:], 4 - } - return b, 0 -} - // If we're dealing with 32bit values compilers will use R or E prefix // interchangeably (E refs are just zero padded). func sameReg(r1, r2 x86asm.Reg) bool { diff --git a/libpf/pfelf/file.go b/libpf/pfelf/file.go index 6c03703af..426659eca 100644 --- a/libpf/pfelf/file.go +++ b/libpf/pfelf/file.go @@ -62,6 +62,8 @@ var ErrNoTbss = errors.New("no thread-local uninitialized data section (tbss)") // ErrNoTdata is returned when the tdata section cannot be found var ErrNoTdata = errors.New("no thread-local initialized data section (tdata)") +var ErrNoTLS = errors.New("no TLS program header") + // File represents an open ELF file type File struct { // closer is called internally when resources for this File are to be released @@ -456,6 +458,16 @@ func (f *File) Tdata() (*Section, error) { return nil, ErrNoTdata } +// TLS gets the TLS segment (program header) +func (f *File) TLS() (*Prog, error) { + for _, seg := range f.Progs { + if seg.Type == elf.PT_TLS { + return &seg, nil + } + } + return nil, ErrNoTLS +} + // ReadVirtualMemory reads bytes from given virtual address func (f *File) ReadVirtualMemory(p []byte, addr int64) (int, error) { if len(p) == 0 { @@ -834,6 +846,57 @@ func calcSysvHash(s libpf.SymbolName) uint32 { return h & 0xfffffff } +// roundUp rounds `value` up to the nearest multiple of `multiple`. +func roundUp(value, multiple uint64) uint64 { + if multiple == 0 { + return value + } + return (value + multiple - 1) / multiple * multiple +} + +// LookupTLSSymbolOffset computes the offset of a symbol +// in thread-local storage of the main binary. +// +// On x86-64, this is the offset from the fs-base internal register (and should be negative). +// On aarch64, this is the offset from the tpidr_el0 register (and should be positive). +// +// Note that this only works _in the main binary of the executable_. +// Lookup up a thread-local variable in a shared library requires a more complex +// procedure. +func (f *File) LookupTLSSymbolOffset(symbol libpf.SymbolName) (int64, error) { + tlsSym, err := f.LookupSymbol(symbol) + if err != nil { + return 0, err + } + if f.Machine == elf.EM_AARCH64 { + return int64(tlsSym.Address), nil + } + if f.Machine == elf.EM_X86_64 { + // Symbol addresses are relative to the start of the + // thread-local storage image, but the thread pointer points to the _end_ + // of the image. So we need to find the size of the image in order to know where the + // beginning is. + // + // Furthermore, the thread pointer (fs-base) respects the TLS segment's alignment + // (which is a bit weird given that offsets are negative, but it is in fact true). + // + // So if the segment is 32-byte aligned (and of size <= 32), and some object is at + // byte 4 in the segment, + // it will be at offset -28 from fs-base. + // + // See "ELF Handling For Thread-Local Storage" (https://www.uclibc.org/docs/tls.pdf), + // pp. 8 ("Variant II"), 11 ("IA-32 Specific"), 14 ("x86-64 Specific"). + tls, err := f.TLS() + if err != nil { + return 0, err + } + offset := int64(tlsSym.Address) - int64(roundUp(tls.Memsz, tls.Align)) + + return offset, nil + } + return 0, fmt.Errorf("unrecognized machine: %s", f.Machine.String()) +} + // LookupSymbol searches for a given symbol in the ELF func (f *File) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) { if f.gnuHash.addr != 0 { diff --git a/libpf/pfelf/file_test.go b/libpf/pfelf/file_test.go index 9e8f4a95f..d83609954 100644 --- a/libpf/pfelf/file_test.go +++ b/libpf/pfelf/file_test.go @@ -14,6 +14,8 @@ import ( "go.opentelemetry.io/ebpf-profiler/testsupport" "go.opentelemetry.io/ebpf-profiler/libpf" + xh "go.opentelemetry.io/ebpf-profiler/x86helpers" + xx "golang.org/x/arch/x86/x86asm" ) func getPFELF(path string, t *testing.T) *File { @@ -95,3 +97,89 @@ func TestGoVersion(t *testing.T) { require.NoError(t, err) assert.Equal(t, runtime.Version(), testVersion) } + +func symbolOffsetFromCodeX86(code []byte) (int64, error) { + // e.g. mov eax,DWORD PTR fs:0xfffffffffffffffc + code, _ = xh.SkipEndBranch(code) + offset := 0 + for { + insn, err := xx.Decode(code[offset:], 64) + if err != nil { + return 0, err + } + offset += insn.Len + if insn.Op != xx.MOV { + continue + } + switch a := insn.Args[1].(type) { + case xx.Mem: + if a.Segment != xx.FS { + continue + } + // for some reason the Go disassembler + // reports the displacement as a 32-bit value + // embedded in a 64-bit one; e.g., it represents -16 as 0x00000000fffffff0 . + // So this double cast is necessary. + return int64(int32(a.Disp)), nil + default: + continue + } + } +} + +func TestLookupTlsSymbolOffset(t *testing.T) { + for _, test := range []struct { + exe string + hasTbss bool + hasTdata bool + }{ + {"tls-tbss", true, false}, + {"tls-aligned-tbss", true, false}, + {"tls-tdata", false, true}, + {"tls-aligned-tdata", false, true}, + {"tls-tbss-tdata", true, true}, + {"tls-aligned-tbss-tdata", true, true}, + {"tls-tbss-aligned-tdata", true, true}, + {"tls-aligned-tbss-aligned-tdata", true, true}, + } { + // Testing this on arm is nontrivial, because we need to actually follow some + // pointers in-process to get the address of the tls block. So let's + // ignore it and just test x86. + if runtime.GOARCH != "amd64" { + t.Skip("this test is only supported on x86") + } + ef, err := Open("testdata/" + test.exe) + require.NoError(t, err) + + if test.hasTbss { + sym, err := ef.LookupSymbol("get_tbss") + require.NoError(t, err) + code := make([]byte, sym.Size) + _, err = ef.ReadVirtualMemory(code, int64(sym.Address)) + require.NoError(t, err) + + offset, err := symbolOffsetFromCodeX86(code) + require.NoError(t, err) + + offset2, err := ef.LookupTLSSymbolOffset("tbss") + require.NoError(t, err) + + require.Equal(t, offset, offset2) + } + if test.hasTdata { + sym, err := ef.LookupSymbol("get_tdata") + require.NoError(t, err) + code := make([]byte, sym.Size) + _, err = ef.ReadVirtualMemory(code, int64(sym.Address)) + require.NoError(t, err) + + offset, err := symbolOffsetFromCodeX86(code) + require.NoError(t, err) + + offset2, err := ef.LookupTLSSymbolOffset("tdata") + require.NoError(t, err) + + require.Equal(t, offset, offset2) + } + } +} diff --git a/libpf/pfelf/testdata/.gitignore b/libpf/pfelf/testdata/.gitignore index 5025847f1..0b10263bf 100644 --- a/libpf/pfelf/testdata/.gitignore +++ b/libpf/pfelf/testdata/.gitignore @@ -5,3 +5,5 @@ kernel-image ubuntu-kernel-image go-binary separate-debug-file +tls-* +!tls-lookup.c diff --git a/libpf/pfelf/testdata/Makefile b/libpf/pfelf/testdata/Makefile index 757c2801a..d16be64a3 100644 --- a/libpf/pfelf/testdata/Makefile +++ b/libpf/pfelf/testdata/Makefile @@ -10,7 +10,15 @@ BINARIES=fixed-address \ the_notorious_build_id \ ubuntu-kernel-image \ with-debug-syms \ - without-debug-syms + without-debug-syms \ + tls-tbss \ + tls-aligned-tbss \ + tls-tdata \ + tls-aligned-tdata \ + tls-tbss-tdata \ + tls-aligned-tbss-tdata \ + tls-tbss-aligned-tdata \ + tls-aligned-tbss-aligned-tdata all: $(BINARIES) @@ -47,3 +55,28 @@ ubuntu-kernel-image: test.c go-binary: gotest.go go build -o go-binary -ldflags "-w -s" gotest.go +# -Wl,-E makes all the symbols dynamic. + +tls-tbss: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 $< -o $@ + +tls-aligned-tbss: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 -DTBSS_ALIGN=1 $< -o $@ + +tls-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TDATA=1 $< -o $@ + +tls-aligned-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TDATA=1 -DTDATA_ALIGN=1 $< -o $@ + +tls-tbss-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 -DHAS_TDATA=1 $< -o $@ + +tls-aligned-tbss-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 -DTBSS_ALIGN=1 -DHAS_TDATA=1 $< -o $@ + +tls-tbss-aligned-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 -DHAS_TDATA=1 -DTDATA_ALIGN=1 $< -o $@ + +tls-aligned-tbss-aligned-tdata: tls-lookup.c + $(CC) -Wl,-E -DHAS_TBSS=1 -DTDATA_ALIGN=1 -DHAS_TDATA=1 -DTDATA_ALIGN=1 $< -o $@ diff --git a/libpf/pfelf/testdata/tls-lookup.c b/libpf/pfelf/testdata/tls-lookup.c new file mode 100644 index 000000000..46a543962 --- /dev/null +++ b/libpf/pfelf/testdata/tls-lookup.c @@ -0,0 +1,39 @@ +#ifdef HAS_TBSS +#ifdef TBSS_ALIGN +#define ALIGNMENT __attribute__((aligned(32))) +#else +#define ALIGNMENT +#endif +int __thread ALIGNMENT tbss = 0; + +int get_tbss() +{ + return tbss; +} +#undef ALIGNMENT +#endif + +#ifdef HAS_TDATA +#ifdef TDATA_ALIGN +#define ALIGNMENT __attribute__((aligned(32))) +#else +#define ALIGNMENT +#endif +int __thread ALIGNMENT tdata = 42; + +int get_tdata() +{ + return tdata; +} + +#undef ALIGNMENT +#endif + + +#include + +int main() +{ + for (;;) + sleep(1); +} diff --git a/x86helpers/x86_helpers.go b/x86helpers/x86_helpers.go new file mode 100644 index 000000000..a322d9566 --- /dev/null +++ b/x86helpers/x86_helpers.go @@ -0,0 +1,33 @@ +// Copyright 2024 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +// This package contains a series of helper functions that are useful for x86 disassembly. +package x86helpers // import "go.opentelemetry.io/ebpf-profiler/x86helpers" + +import "slices" + +var endbr64 = [4]byte{0xf3, 0x0f, 0x1e, 0xfa} + +// On some binaries the function starts like this: +// +// 0x0000000000012860 <+0>: f3 0f 1e fa endbr64 +// 0x0000000000012864 <+4>: 41 55 push %r13 +// +// This is some kind of stack smashing indirect jump protection, treat it as a nop, +// x86asm doesn't know how to handle it. +// +//nolint:gocritic +func SkipEndBranch(b []byte) ([]byte, int64) { + if slices.Equal(b[0:4], endbr64[:]) { + return b[4:], 4 + } + return b, 0 +}