Skip to content

Commit f20d100

Browse files
authored
Fix X86 tls handling (for custom labels) (#63)
See the comment for more details -- we were not accounting for the fact that the pointer to the end of the TLS segment might be aligned. Which is strange, because alignment normally affects the beginning of things, not the end of things, but apparently this is how things work. Also, greatly simplify how we calculate the TLS offset on x86. We don't need to look at the individual sections and deduce how they'll be combined into a segment at runtime; we can just get the information about the segment from the program header directly.
1 parent 77fe4ce commit f20d100

8 files changed

Lines changed: 272 additions & 68 deletions

File tree

interpreter/customlabels/customlabels.go

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package customlabels // import "go.opentelemetry.io/ebpf-profiler/interpreter/cu
44
// #include "../../support/ebpf/types.h"
55
import "C"
66
import (
7-
"debug/elf"
87
"errors"
98
"fmt"
109
"regexp"
@@ -31,13 +30,6 @@ type data struct {
3130

3231
var _ interpreter.Data = &data{}
3332

34-
func roundUp(multiple, value uint64) uint64 {
35-
if multiple == 0 {
36-
return value
37-
}
38-
return (value + multiple - 1) / multiple * multiple
39-
}
40-
4133
func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) {
4234
ef, err := info.GetELF()
4335
if err != nil {
@@ -75,39 +67,11 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete
7567
return nil, errors.New("failed to locate TLS descriptor for custom labels")
7668
}
7769
} else {
78-
tlsSym, err := ef.LookupSymbol(tlsExport)
70+
offset, err := ef.LookupTLSSymbolOffset(tlsExport)
7971
if err != nil {
80-
return nil, err
81-
}
82-
if ef.Machine == elf.EM_AARCH64 {
83-
tlsAddr = libpf.Address(tlsSym.Address)
84-
} else if ef.Machine == elf.EM_X86_64 {
85-
// Symbol addresses are relative to the start of the
86-
// thread-local storage image, but the thread pointer points to the _end_
87-
// of the image. So we need to find the size of the image in order to know where the
88-
// beginning is.
89-
//
90-
// The image is just .tdata followed by .tbss,
91-
// but we also have to respect the alignment.
92-
tbss, err := ef.Tbss()
93-
if err != nil {
94-
return nil, err
95-
}
96-
tdata, err := ef.Tdata()
97-
var tdataSize uint64
98-
if err != nil {
99-
// No Tdata is ok, it's the same as size 0
100-
if err != pfelf.ErrNoTdata {
101-
return nil, err
102-
}
103-
} else {
104-
tdataSize = tdata.Size
105-
}
106-
imageSize := roundUp(tbss.Addralign, tdataSize) + tbss.Size
107-
tlsAddr = libpf.Address(int64(tlsSym.Address) - int64(imageSize))
108-
} else {
109-
return nil, fmt.Errorf("unrecognized machine: %s", ef.Machine.String())
72+
return nil, fmt.Errorf("failed to get tls symbol offset: %w", err)
11073
}
74+
tlsAddr = libpf.Address(offset)
11175
}
11276

11377
d := data{

interpreter/luajit/extractor_x86.go

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ package luajit // import "go.opentelemetry.io/ebpf-profiler/interpreter/luajit"
1313

1414
import (
1515
"errors"
16-
"slices"
1716

1817
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
18+
xh "go.opentelemetry.io/ebpf-profiler/x86helpers"
1919
"golang.org/x/arch/x86/x86asm"
2020
)
2121

@@ -50,7 +50,7 @@ which is a dynamic public symbol that should be in all binaries of LuaJIT includ
5050
*/
5151
//nolint:nonamedreturns
5252
func (x *x86Extractor) findOffsetsFromLuaClose(b []byte) (glref, curL uint64, err error) {
53-
b, _ = skipEndBranch(b)
53+
b, _ = xh.SkipEndBranch(b)
5454
var greg x86asm.Reg
5555
for len(b) > 0 {
5656
var i x86asm.Inst
@@ -101,7 +101,7 @@ func (x *x86Extractor) findOffsetsFromLuaClose(b []byte) (glref, curL uint64, er
101101
// 0xfa8 is the g to dispatch offset.
102102
// https://github.com/openresty/luajit2/blob/7952882d/src/lj_dispatch.c#L122
103103
func (x *x86Extractor) findG2DispatchOffsetFromLjDispatchUpdate(b []byte) (uint64, error) {
104-
b, _ = skipEndBranch(b)
104+
b, _ = xh.SkipEndBranch(b)
105105
var greg x86asm.Reg
106106
for len(b) > 0 {
107107
i, err := x86asm.Decode(b, 64)
@@ -159,7 +159,7 @@ func (x *x86Extractor) findG2DispatchOffsetFromLjDispatchUpdate(b []byte) (uint6
159159
//
160160
//nolint:lll
161161
func (x *x86Extractor) findLjDispatchUpdateAddr(b []byte, addr uint64) (uint64, error) {
162-
b, ip := skipEndBranch(b)
162+
b, ip := xh.SkipEndBranch(b)
163163
var Lreg x86asm.Reg
164164
rdiHasG := false
165165
for len(b) > 0 {
@@ -212,7 +212,7 @@ func (x *x86Extractor) findLjDispatchUpdateAddr(b []byte, addr uint64) (uint64,
212212
// ----------- 0x430 is the G to J->traces offset
213213
// libluajit-5.1.so[0x637a1] <+33>: movq 0x430(%rdx), %rdx
214214
func (x *x86Extractor) findG2TracesOffsetFromChecktrace(b []byte) (uint64, error) {
215-
b, _ = skipEndBranch(b)
215+
b, _ = xh.SkipEndBranch(b)
216216
var Greg x86asm.Reg
217217
for len(b) > 0 {
218218
i, err := x86asm.Decode(b, 64)
@@ -236,7 +236,7 @@ func (x *x86Extractor) findG2TracesOffsetFromChecktrace(b []byte) (uint64, error
236236
}
237237

238238
func (x *x86Extractor) findFirstCall(b []byte, baseAddr int64) (uint64, error) {
239-
b, ip := skipEndBranch(b)
239+
b, ip := xh.SkipEndBranch(b)
240240
for len(b) > 0 {
241241
i, err := x86asm.Decode(b, 64)
242242
if err != nil {
@@ -258,7 +258,7 @@ func (x *x86Extractor) findFirstCall(b []byte, baseAddr int64) (uint64, error) {
258258

259259
// Return true if the code in b calls targetCall.
260260
func (x *x86Extractor) callExists(b []byte, baseAddr, targetCall int64) (bool, error) {
261-
b, ip := skipEndBranch(b)
261+
b, ip := xh.SkipEndBranch(b)
262262
for len(b) > 0 {
263263
i, err := x86asm.Decode(b, 64)
264264
if err != nil {
@@ -289,7 +289,7 @@ func (x *x86Extractor) callExists(b []byte, baseAddr, targetCall int64) (bool, e
289289
func findRipRelativeLea2ndArgTo2ndCall(b []byte, baseAddr, targetCall int64) (uint64, error) {
290290
var leaRsi int64
291291
calls := 2
292-
b, ip := skipEndBranch(b)
292+
b, ip := xh.SkipEndBranch(b)
293293
for len(b) > 0 {
294294
i, err := x86asm.Decode(b, 64)
295295
if err != nil {
@@ -364,7 +364,7 @@ func skipCallsAABA(b []byte, ip, baseAddr int64) ([]byte, int64, error) {
364364
func (x *x86Extractor) find3rdArgToLibPreregCall(b []byte, baseAddr int64) (uint64, error) {
365365
var rdxAddr int64
366366
calls := 3
367-
b, ip := skipEndBranch(b)
367+
b, ip := xh.SkipEndBranch(b)
368368
// Skip the lua_push* call sequence (and all the preceding calls which varies depending on
369369
// inlining).
370370
// libluajit-5.1.so[0x700a5] <+133>: movq %rbx, %rdi
@@ -432,7 +432,7 @@ func (x *x86Extractor) find3rdArgToLibPreregCall(b []byte, baseAddr int64) (uint
432432
// bbc2: c3 ret
433433
func (x *x86Extractor) find4thArgToLibRegCall(b []byte, baseAddr int64) (int64, error) {
434434
var ip int64
435-
b, ip = skipEndBranch(b)
435+
b, ip = xh.SkipEndBranch(b)
436436
for len(b) > 0 {
437437
i, err := x86asm.Decode(b, 64)
438438
if err != nil {
@@ -468,24 +468,6 @@ func calcRipRelativeAddr(a1 x86asm.Mem, baseAddr, ip int64) int64 {
468468
return baseAddr + ip + int64(disp)
469469
}
470470

471-
var endbr64 = [4]byte{0xf3, 0x0f, 0x1e, 0xfa}
472-
473-
// On some binaries the function starts like this:
474-
//
475-
// 0x0000000000012860 <+0>: f3 0f 1e fa endbr64
476-
// 0x0000000000012864 <+4>: 41 55 push %r13
477-
//
478-
// This is some kind of stack smashing indirect jump protection, treat it as a nop,
479-
// x86asm doesn't know how to handle it.
480-
//
481-
//nolint:gocritic
482-
func skipEndBranch(b []byte) ([]byte, int64) {
483-
if slices.Equal(b[0:4], endbr64[:]) {
484-
return b[4:], 4
485-
}
486-
return b, 0
487-
}
488-
489471
// If we're dealing with 32bit values compilers will use R or E prefix
490472
// interchangeably (E refs are just zero padded).
491473
func sameReg(r1, r2 x86asm.Reg) bool {

libpf/pfelf/file.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ var ErrNoTbss = errors.New("no thread-local uninitialized data section (tbss)")
6262
// ErrNoTdata is returned when the tdata section cannot be found
6363
var ErrNoTdata = errors.New("no thread-local initialized data section (tdata)")
6464

65+
var ErrNoTLS = errors.New("no TLS program header")
66+
6567
// File represents an open ELF file
6668
type File struct {
6769
// closer is called internally when resources for this File are to be released
@@ -456,6 +458,16 @@ func (f *File) Tdata() (*Section, error) {
456458
return nil, ErrNoTdata
457459
}
458460

461+
// TLS gets the TLS segment (program header)
462+
func (f *File) TLS() (*Prog, error) {
463+
for _, seg := range f.Progs {
464+
if seg.Type == elf.PT_TLS {
465+
return &seg, nil
466+
}
467+
}
468+
return nil, ErrNoTLS
469+
}
470+
459471
// ReadVirtualMemory reads bytes from given virtual address
460472
func (f *File) ReadVirtualMemory(p []byte, addr int64) (int, error) {
461473
if len(p) == 0 {
@@ -834,6 +846,57 @@ func calcSysvHash(s libpf.SymbolName) uint32 {
834846
return h & 0xfffffff
835847
}
836848

849+
// roundUp rounds `value` up to the nearest multiple of `multiple`.
850+
func roundUp(value, multiple uint64) uint64 {
851+
if multiple == 0 {
852+
return value
853+
}
854+
return (value + multiple - 1) / multiple * multiple
855+
}
856+
857+
// LookupTLSSymbolOffset computes the offset of a symbol
858+
// in thread-local storage of the main binary.
859+
//
860+
// On x86-64, this is the offset from the fs-base internal register (and should be negative).
861+
// On aarch64, this is the offset from the tpidr_el0 register (and should be positive).
862+
//
863+
// Note that this only works _in the main binary of the executable_.
864+
// Lookup up a thread-local variable in a shared library requires a more complex
865+
// procedure.
866+
func (f *File) LookupTLSSymbolOffset(symbol libpf.SymbolName) (int64, error) {
867+
tlsSym, err := f.LookupSymbol(symbol)
868+
if err != nil {
869+
return 0, err
870+
}
871+
if f.Machine == elf.EM_AARCH64 {
872+
return int64(tlsSym.Address), nil
873+
}
874+
if f.Machine == elf.EM_X86_64 {
875+
// Symbol addresses are relative to the start of the
876+
// thread-local storage image, but the thread pointer points to the _end_
877+
// of the image. So we need to find the size of the image in order to know where the
878+
// beginning is.
879+
//
880+
// Furthermore, the thread pointer (fs-base) respects the TLS segment's alignment
881+
// (which is a bit weird given that offsets are negative, but it is in fact true).
882+
//
883+
// So if the segment is 32-byte aligned (and of size <= 32), and some object is at
884+
// byte 4 in the segment,
885+
// it will be at offset -28 from fs-base.
886+
//
887+
// See "ELF Handling For Thread-Local Storage" (https://www.uclibc.org/docs/tls.pdf),
888+
// pp. 8 ("Variant II"), 11 ("IA-32 Specific"), 14 ("x86-64 Specific").
889+
tls, err := f.TLS()
890+
if err != nil {
891+
return 0, err
892+
}
893+
offset := int64(tlsSym.Address) - int64(roundUp(tls.Memsz, tls.Align))
894+
895+
return offset, nil
896+
}
897+
return 0, fmt.Errorf("unrecognized machine: %s", f.Machine.String())
898+
}
899+
837900
// LookupSymbol searches for a given symbol in the ELF
838901
func (f *File) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) {
839902
if f.gnuHash.addr != 0 {

libpf/pfelf/file_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"go.opentelemetry.io/ebpf-profiler/testsupport"
1515

1616
"go.opentelemetry.io/ebpf-profiler/libpf"
17+
xh "go.opentelemetry.io/ebpf-profiler/x86helpers"
18+
xx "golang.org/x/arch/x86/x86asm"
1719
)
1820

1921
func getPFELF(path string, t *testing.T) *File {
@@ -95,3 +97,89 @@ func TestGoVersion(t *testing.T) {
9597
require.NoError(t, err)
9698
assert.Equal(t, runtime.Version(), testVersion)
9799
}
100+
101+
func symbolOffsetFromCodeX86(code []byte) (int64, error) {
102+
// e.g. mov eax,DWORD PTR fs:0xfffffffffffffffc
103+
code, _ = xh.SkipEndBranch(code)
104+
offset := 0
105+
for {
106+
insn, err := xx.Decode(code[offset:], 64)
107+
if err != nil {
108+
return 0, err
109+
}
110+
offset += insn.Len
111+
if insn.Op != xx.MOV {
112+
continue
113+
}
114+
switch a := insn.Args[1].(type) {
115+
case xx.Mem:
116+
if a.Segment != xx.FS {
117+
continue
118+
}
119+
// for some reason the Go disassembler
120+
// reports the displacement as a 32-bit value
121+
// embedded in a 64-bit one; e.g., it represents -16 as 0x00000000fffffff0 .
122+
// So this double cast is necessary.
123+
return int64(int32(a.Disp)), nil
124+
default:
125+
continue
126+
}
127+
}
128+
}
129+
130+
func TestLookupTlsSymbolOffset(t *testing.T) {
131+
for _, test := range []struct {
132+
exe string
133+
hasTbss bool
134+
hasTdata bool
135+
}{
136+
{"tls-tbss", true, false},
137+
{"tls-aligned-tbss", true, false},
138+
{"tls-tdata", false, true},
139+
{"tls-aligned-tdata", false, true},
140+
{"tls-tbss-tdata", true, true},
141+
{"tls-aligned-tbss-tdata", true, true},
142+
{"tls-tbss-aligned-tdata", true, true},
143+
{"tls-aligned-tbss-aligned-tdata", true, true},
144+
} {
145+
// Testing this on arm is nontrivial, because we need to actually follow some
146+
// pointers in-process to get the address of the tls block. So let's
147+
// ignore it and just test x86.
148+
if runtime.GOARCH != "amd64" {
149+
t.Skip("this test is only supported on x86")
150+
}
151+
ef, err := Open("testdata/" + test.exe)
152+
require.NoError(t, err)
153+
154+
if test.hasTbss {
155+
sym, err := ef.LookupSymbol("get_tbss")
156+
require.NoError(t, err)
157+
code := make([]byte, sym.Size)
158+
_, err = ef.ReadVirtualMemory(code, int64(sym.Address))
159+
require.NoError(t, err)
160+
161+
offset, err := symbolOffsetFromCodeX86(code)
162+
require.NoError(t, err)
163+
164+
offset2, err := ef.LookupTLSSymbolOffset("tbss")
165+
require.NoError(t, err)
166+
167+
require.Equal(t, offset, offset2)
168+
}
169+
if test.hasTdata {
170+
sym, err := ef.LookupSymbol("get_tdata")
171+
require.NoError(t, err)
172+
code := make([]byte, sym.Size)
173+
_, err = ef.ReadVirtualMemory(code, int64(sym.Address))
174+
require.NoError(t, err)
175+
176+
offset, err := symbolOffsetFromCodeX86(code)
177+
require.NoError(t, err)
178+
179+
offset2, err := ef.LookupTLSSymbolOffset("tdata")
180+
require.NoError(t, err)
181+
182+
require.Equal(t, offset, offset2)
183+
}
184+
}
185+
}

libpf/pfelf/testdata/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ kernel-image
55
ubuntu-kernel-image
66
go-binary
77
separate-debug-file
8+
tls-*
9+
!tls-lookup.c

0 commit comments

Comments
 (0)