Skip to content

Commit db29d41

Browse files
authored
Use libunwind for jemalloc memory profiling stack traces (#5602)
## Motivation The default libgcc DWARF unwinder (`_Unwind_Find_FDE`) used by jemalloc for profiling stack traces has a known livelock ([jemalloc/jemalloc#2282](jemalloc/jemalloc#2282)) — one thread hangs in `_Unwind_Find_FDE` and all others block on futex_wait. We've hit this in production on validator shards. ## Proposal Switch jemalloc's stack unwinder to libunwind by building with `--enable-prof-libunwind`. - Point `tikv-jemallocator` at [linera-io/jemallocator](https://github.com/linera-io/jemallocator) fork which adds a `profiling_libunwind` feature (upstream doesn't support it yet — [tikv/jemallocator#146](tikv/jemallocator#146)) - Enable `profiling_libunwind` in the `jemalloc` feature - Add `libunwind-dev` (builder) and `libunwind8` (runtime) to `docker/Dockerfile` ## Test Plan - CI - Verify memory profiling works end-to-end in a temporary network deployment ## Release Plan - These changes shobe backported to `testnet_conway`. ## Links - Fork PR: linera-io/jemallocator#1 - Upstream issue: [tikv/jemallocator#146](tikv/jemallocator#146) - Jemalloc livelock: [jemalloc/jemalloc#2282](jemalloc/jemalloc#2282)
1 parent 82548b6 commit db29d41

9 files changed

Lines changed: 154 additions & 113 deletions

File tree

Cargo.lock

Lines changed: 36 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ indexed-db = "0.4.2"
152152
insta = "1.36.1"
153153
is-terminal = "0.4.12"
154154
itertools = "0.14.0"
155-
jemalloc_pprof = "0.8.1"
156155
js-sys = "0.3.70"
157156
k256 = { version = "0.13.4", default-features = false, features = [
158157
"ecdsa",
@@ -163,10 +162,13 @@ k256 = { version = "0.13.4", default-features = false, features = [
163162
] }
164163
k8s-openapi = { version = "0.21.1", features = ["v1_28"] }
165164
kube = "0.88.1"
165+
linera-jemalloc-ctl = { version = "0.6.1", features = ["use_std"] }
166+
linera-jemallocator = "0.6.1"
166167
linera-kywasmtime = "0.1.0"
167168
linked-hash-map = "0.5.6"
168169
log = "0.4.21"
169170
lru = "0.15.0"
171+
mappings = "0.7.1"
170172
mini-moka = "0.10.3"
171173
nonzero_lit = "0.1.2"
172174
num-bigint = "0.4.3"
@@ -186,6 +188,7 @@ opentelemetry_sdk = { version = "0.30.0", features = ["trace", "rt-tokio"] }
186188
papaya = "0.2.3"
187189
pathdiff = "0.2.1"
188190
port-selector = "0.1.6"
191+
pprof_util = { version = "0.8.0", features = ["flamegraph", "symbolize"] }
189192
prettyplease = "0.2.16"
190193
proc-macro-error = "1.0.4"
191194
proc-macro2 = "1.0"
@@ -267,7 +270,6 @@ test-log = { version = "0.2.15", default-features = false, features = [
267270
test-strategy = "0.3.1"
268271
thiserror = "1.0.65"
269272
thiserror-context = "0.1.1"
270-
tikv-jemallocator = "0.6.0"
271273
tokio = "1.36.0"
272274
tokio-stream = "0.1.14"
273275
tokio-test = "0.4.3"

docker/Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ ARG copy=${binaries:+_copy}
1717
ARG build_flag=--release
1818
ARG build_folder=release
1919
ARG build_features=scylladb,metrics,jemalloc
20-
ARG rustflags="-C force-frame-pointers=yes"
20+
ARG rustflags="-C force-frame-pointers=yes -L /usr/lib/x86_64-linux-gnu"
2121

2222
FROM rust:1.86-slim-bookworm AS builder
2323
ARG git_commit
@@ -30,7 +30,8 @@ RUN apt-get update && apt-get install -y \
3030
pkg-config \
3131
protobuf-compiler \
3232
clang \
33-
make
33+
make \
34+
libunwind-dev
3435

3536
COPY . .
3637

@@ -71,7 +72,8 @@ LABEL build_date=$build_date
7172

7273
RUN apt-get update && apt-get install -y \
7374
ca-certificates \
74-
openssl
75+
openssl \
76+
libunwind8
7577
RUN update-ca-certificates
7678

7779
COPY --from=binaries \

linera-metrics/Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ version.workspace = true
1515
workspace = true
1616

1717
[features]
18-
jemalloc = ["jemalloc_pprof"]
18+
jemalloc = ["linera-jemalloc-ctl", "mappings", "pprof_util", "tempfile"]
1919

2020
[dependencies]
2121
anyhow.workspace = true
2222
axum.workspace = true
23-
jemalloc_pprof = { workspace = true, features = [
24-
"symbolize",
25-
"flamegraph",
26-
], optional = true }
23+
linera-jemalloc-ctl = { workspace = true, optional = true }
24+
mappings = { workspace = true, optional = true }
25+
pprof_util = { workspace = true, optional = true }
2726
prometheus.workspace = true
27+
tempfile = { workspace = true, optional = true }
2828
thiserror.workspace = true
2929
tokio = { workspace = true, features = ["full"] }
3030
tokio-util.workspace = true

0 commit comments

Comments
 (0)