diff --git a/.claude/ci/appsec-gradle-integration.md b/.claude/ci/appsec-gradle-integration.md new file mode 100644 index 00000000000..949b5748a4a --- /dev/null +++ b/.claude/ci/appsec-gradle-integration.md @@ -0,0 +1,288 @@ +# AppSec Gradle Integration Tests + +## CI Jobs + +**Source:** `.gitlab/generate-appsec.php` — generates the appsec-trigger +child pipeline; all job `script:` sections are defined inline in this +file. + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `appsec integration tests: [{target}]` | `docker:24.0.4-gbi-focal` | Gradle integration tests with C++ helper (release/zts variants) | +| `appsec integration tests (helper-rust): [{target}]` | same | Same tests with Rust helper (`-PuseHelperRust`); includes `debug` variant | +| `appsec integration tests (ssi): [{target}]` | same | SSI mode (PHP 8.3 only) | +| `helper-rust build and test` | same | `cargo fmt --check` + build + unit tests | +| `helper-rust code coverage` | same | Unit test coverage via `cargo-llvm-cov` | +| `helper-rust integration coverage` | same | Integration coverage collection (not needed locally) | + +Runner: `docker-in-docker:amd64` +Matrix: PHP 7.0+ × release/debug/zts/musl/ssi (varies by job group) + +**Important:** `testX.Y-debug` are not gradle targets that are run on CI. They +may, however, be useful for debugging. + +CI passes `TERM=dumb` and `--scan -Pbuildscan` to Gradle. `TERM=dumb` +suppresses progress animations in CI logs; both flags are optional +locally. + +## Prerequisites + +- JDK 17+. If not available via your system package manager, use SDKMAN + (`sdk install java 17`) or download from + https://download.oracle.com/java/17/archive/. +- Docker daemon running + +## Working Directory + +All `./gradlew` commands run from: + +``` +appsec/tests/integration/ +``` + +## Running Tests + +### Full suite for one PHP target + +```bash +./gradlew test8.3-debug --info +``` + +### Single test (fastest feedback loop) + +```bash +./gradlew test8.3-debug --info \ + --tests "com.datadog.appsec.php.integration.Apache2FpmTests.Pool environment" +``` + +The `--tests` filter accepts: +- Full method: `"com.datadog.appsec.php.integration.Apache2FpmTests.Pool environment"` +- Class only: `"*Apache2FpmTests*"` or `"com.datadog.appsec.php.integration.Apache2FpmTests"` +- Wildcard: `"*FpmTests*"` + +### With helper-rust (instead of C++ helper) + +```bash +./gradlew test8.3-debug -PuseHelperRust --info \ + --tests "com.datadog.appsec.php.integration.Apache2FpmTests.Pool environment" +``` + +This builds the Rust helper via the `buildHelperRust` task (musl build, works on both glibc and musl targets) and stores the binary in the `php-helper-rust` Docker volume. + +### C++ helper (default) + +Omit `-PuseHelperRust` and `-PhelperBinary`. The C++ helper is built via the `buildAppsec-*` task. + +## Image Tags + +By default, Gradle resolves Docker images via pinned SHA256 digests in `gradle/tag_mappings.gradle`. To use floating tags (locally-built images or latest from Docker Hub): + +```bash +./gradlew test8.3-debug -PfloatingImageTags --info +``` + +## Available Gradle Tasks + +### Test tasks + +Pattern: `test{version}-{variant}` + +| Variant | Notes | +|---|---| +| `release` | Standard build | +| `debug` | Debug build (assertions enabled) | +| `release-zts` | Thread-safe build | +| `release-musl` | Alpine/musl (only `8.5-release-musl`) | +| `release-ssi` / `debug-ssi` | SSI mode (only PHP 8.3) | + +Full list: `./gradlew tasks --group=Verification` + +### Helper-rust tasks + +| Task | Description | +|---|---| +| `buildHelperRust` | Build helper-rust with musl (universal binary). Output in `php-helper-rust` volume. | +| `testHelperRust` | `cargo fmt --check` + `cargo build --release` + `cargo test --release` (runs inside `php-deps` image) | +| `coverageHelperRust` | Unit test coverage via `cargo-llvm-cov`. Output: `php-helper-rust-coverage` volume. | +| `buildHelperRustWithCoverage` | Build with `-C instrument-coverage` for integration coverage collection. | +| `generateHelperRustIntegrationCoverage` | Merge `.profraw` files into lcov after integration run. | + +### Build tasks + +| Task | Description | +|---|---| +| `buildTracer-{v}-{var}` | Build ddtrace.so for given PHP version/variant | +| `buildAppsec-{v}-{var}` | Build ddappsec.so (C++ extension + helper) | +| `buildHelperRust` | Build Rust helper (musl, universal) | +| `buildLibddwaf` | Build libddwaf shared library | + +### Other tasks + +| Task | Description | +|---|---| +| `loadCaches` | Restore Docker volume caches from tarball | +| `saveCaches` | Save Docker volume caches to tarball | +| `clean` | Delete build directory and clean Docker volumes | +| `check` | Run all test tasks | + +All tasks: `./gradlew tasks --all` + +## Interactive Container (runMain) + +Start a test container without running tests (for manual debugging): + +```bash +./gradlew runMain8.3-release -PtestClass=com.datadog.appsec.php.integration.Apache2FpmTests +``` + +The `-PtestClass` property is required (the task is not created without it). Add `-PuseHelperRust` or `-PhelperBinary=...` as needed. + +SSI variant: + +```bash +./gradlew runMain8.3-release-ssi -PtestClass=com.datadog.appsec.php.integration.Apache2FpmTests +``` + +## Logs + +After a test run, logs are in: + +``` +build/test-logs/--/ +``` + +For example: + +``` +build/test-logs/com.datadog.appsec.php.integration.Apache2FpmTests-8.3-debug/ +├── access.log +├── appsec.log # PHP extension appsec log +├── error.log # Apache error log +├── helper.log # Helper process log (C++ or Rust) +├── php_error.log +├── php_fpm_error.log +└── sidecar.log +``` + +To distinguish which helper ran, check `helper.log`: +- Rust: starts with `[INFO] AppSec helper starting` +- C++: starts with `[info]` lines like `Started listening on abstract socket` + +## Musl/Alpine Target + +The `test8.5-release-musl` target uses an Alpine-based nginx+fpm image. Tests tagged with `@Tag("musl")` are included; untagged tests are excluded. + +```bash +./gradlew test8.5-release-musl -PuseHelperRust --info +``` + +The `buildHelperRust` task already produces a musl-linked binary (built on Alpine with `cargo +nightly`, using LLVM libunwind). The `patchelf --remove-needed libc.musl-*` step makes it load on both musl and glibc systems. + +## CI Job Mapping + +| CI Job | Gradle Command | +|---|---| +| `appsec integration tests: [test8.3-release]` | `./gradlew test8.3-release` | +| `appsec integration tests (helper-rust): [test8.3-debug]` | `./gradlew test8.3-debug -PuseHelperRust` | +| `appsec integration tests (ssi): [test8.3-release-ssi]` | `./gradlew test8.3-release-ssi` | +| `helper-rust build and test` | `./gradlew testHelperRust` | +| `helper-rust code coverage` | `./gradlew coverageHelperRust` | +| `helper-rust integration coverage` | `./gradlew buildHelperRustWithCoverage` then integration test with `-PuseHelperRustCoverage` | + +CI also passes `--scan -Pbuildscan` for Gradle build scans, which is optional locally. + +## Docker Volumes + +Gradle uses named Docker volumes for build artifacts and caches. Key volumes: + +| Volume | Contents | +|---|---| +| `php-helper-rust` | `libddappsec-helper.so` (Rust helper binary) | +| `php-tracer-{v}-{var}` | Built `ddtrace.so` | +| `php-appsec-{v}-{var}` | Built `ddappsec.so` + C++ helper | +| `php-tracer-cargo-cache` | Cargo registry cache | +| `php-tracer-cargo-cache-git` | Cargo git cache | +| `php-appsec-boost-cache` | Boost build cache | +| `php-helper-rust-coverage` | Coverage-instrumented binary + profraw files | + +To force a rebuild, remove the relevant volume: + +```bash +docker volume rm php-helper-rust +``` + +To clean everything: + +```bash +./gradlew clean +``` + +## Debugging + +Attach a Java debugger to the test runner: + +```bash +./gradlew test8.3-debug --tests "*Apache2FpmTests*" --debug-jvm +``` + +Enable PHP Xdebug in the test container: + +```bash +./gradlew test8.3-debug --tests "*Apache2FpmTests*" -PXDEBUG=1 +``` + +## Expected skips + +A significant number of tests are skipped on any given target — this is +normal. Skip conditions are `@EnabledIf` guards in the test classes: + +| Test class | Skips on | Reason | +|---|---|---| +| `FrankenphpClassicTests`, `FrankenphpWorkerTests` | anything except `8.4-zts` | Requires ZTS + PHP 8.4 | +| `Laravel8xTests` | anything except `7.4` (NTS) | Requires PHP 7.4 non-ZTS | +| `Symfony62Tests` | anything except `8.1` (NTS) | Requires PHP 8.1 non-ZTS | +| `RaspSqliTests` | no MySQL service | Requires a running MySQL | +| `SsiStableConfigTests` | non-SSI variants | Requires `-DSSI=true` | + +On `test8.3-debug` expect ~67 skips out of ~300 tests; all are expected. + +## Test report + +After a run, the HTML report is at: + +``` +appsec/tests/integration/build/reports/tests/test8.3-debug/index.html +``` + +(Replace `test8.3-debug` with your target.) Open in a browser for a +structured pass/fail/skip breakdown. + +# Debugging + +Run gradle with `--debug-jvm`. This will stop for the debugger, indicating so in the output. +When you see the message, start jdb in a tmux session. + +If you need to inspect sidecar/helper or PHP issues: + +* Put a breakpoint in jdb that stops the test in the appropriate place (usually + just before a request is executed). +* Inside the test container (determine first its id), attach gdb to sidecar + (`pref -f dd-ipc-helper`) or to PHP (usually an apache or an FPM worker -- if + you're investigating code run during processes it will not be the master + process). sidecar requires as a first command `file /proc//exe`). +* See [gdb.md](../gdb.md) for more information on how to run gdb. Always read + this file before attempting to use gdb. + +## Gotchas + +- The `test` task itself is disabled (`tasks['test'].enabled = false`). Use versioned tasks like `test8.3-debug`. +- Docker images are pulled from `docker.io/datadog/dd-appsec-php-ci`. Without `-PfloatingImageTags`, images are resolved by SHA256 digest from `gradle/tag_mappings.gradle`. If a digest is not locally available, Docker will pull it. +- The `buildHelperRust` task uses the `nginx-fpm-php-8.5-release-musl` image (Alpine with Rust nightly). This image must be available locally or pullable. +- On first run, Gradle downloads its wrapper, dependencies, and Docker images. Expect 5-10 minutes. Subsequent runs with warm caches take ~20-50 seconds for a single test. +- **c-ares DNS failure in Alpine containers.** Alpine's `curl` and `git` use + c-ares for DNS, which fails to resolve hosts when the DNS server includes + EDNS COOKIE options in responses (common with home routers). `wget` and + `getent` are unaffected (they use musl's native resolver). This breaks the + `buildAppsec-*-musl` task which needs to `git clone` cmake dependencies. + Fix: pass `--dns 8.8.8.8` to the Docker command. +- `--info` is recommended for seeing test output in the console. Without it, output goes only to the HTML report in `build/reports/tests/`. diff --git a/.claude/ci/appsec-native-tests.md b/.claude/ci/appsec-native-tests.md new file mode 100644 index 00000000000..21521e3b249 --- /dev/null +++ b/.claude/ci/appsec-native-tests.md @@ -0,0 +1,263 @@ +# Appsec Native Linux Tests + +## CI Jobs + +**Source:** `.gitlab/generate-appsec.php` — generates the appsec-trigger +child pipeline; all job `script:` sections are defined inline in this +file. + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `test appsec extension: [{ver}, {arch}, debug]` | `datadog/dd-trace-ci:php-{ver}_bookworm-6` | Builds appsec PHP extension + runs phpunit `.phpt` tests | +| `test appsec extension: [{ver}, {arch}, debug-zts]` | same | ZTS variant | +| `test appsec extension: [{ver}, {arch}, debug-zts-asan]` | same | ASAN variant (PHP 7.4+) | +| `test appsec helper asan` | `datadog/dd-trace-ci:bookworm-6` | Builds C++ helper with ASAN, runs gtest suite | +| `appsec lint` | `datadog/dd-trace-ci:php-8.3_bookworm-6` | clang-format + clang-tidy | +| `appsec code coverage` | `datadog/dd-trace-ci:php-8.3_bookworm-6` | Coverage instrumented build (not needed locally) | + +Runner: `arch:amd64` + `arch:arm64` +Matrix: PHP 7.0+ × {debug, debug-zts, debug-zts-asan (7.4+)} + +The `{arch}` dimension only controls the GitLab runner tag. It has no +effect on the Docker image or commands run. On macOS (Apple Silicon), +prefer the `arm64` variant using `--platform linux/arm64` — pass it as a +Docker option between the image name and `--`: + +```bash +.claude/ci/dockerh --cache appsec-ext-8.3-debug-arm64 --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 --platform linux/arm64 -- bash -c '...' +``` + +## Why `--overlayfs` is needed + +These builds need a writable source tree because cmake writes generated +headers (`src/extension/version.h`, `src/version.hpp`) back into the +source directory. See [index.md](index.md) for how `--overlayfs` works. + +## Extension tests + +### Full suite + +All commands are run from the repo root. Replace `8.3` with the desired PHP version and `debug` with the desired variant +(`debug`, `debug-zts`, `debug-zts-asan`). + +```bash +.claude/ci/dockerh --cache appsec-ext-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +sudo apt-get update -qq && sudo apt-get install -y -qq \ + libc++-17-dev libc++abi-17-dev > /dev/null 2>&1 + +# Build and run appsec extension tests +# (cmake's xtest target builds ddtrace.so automatically as a dependency) +mkdir -p appsec/build +cd appsec/build +cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_BUILD_HELPER=OFF \ + -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_CXX_LINK_FLAGS="-stdlib=libc++" \ + -DDD_APPSEC_TESTING=ON +ASAN_OPTIONS=malloc_context_size=0 make -j$(nproc) xtest +' +``` + +`clang-tidy-17` is installed by CI's shared `before_script` template but is not needed +for extension tests — only for `appsec lint`. Omitting it saves ~10 seconds of apt time. + +`ASAN_OPTIONS=malloc_context_size=0` is passed by CI unconditionally (even for non-ASAN +builds) and is harmless when ASAN is not active. + +The `appsec/build` and `tmp/` overlays persist between runs, so +subsequent invocations skip the Boost compile and incremental-build only +what changed. To start from scratch: add `--clean-cache`. + +For the ASAN variant, add `-DENABLE_ASAN=ON` to cmake and prepend +`ASAN_OPTIONS=malloc_context_size=0` to the make command. Use a separate +cache name (e.g. `appsec-ext-8.3-debug-asan`) since the cmake cache is +incompatible: + +```bash +cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_BUILD_HELPER=OFF \ + -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_CXX_LINK_FLAGS="-stdlib=libc++" \ + -DDD_APPSEC_TESTING=ON -DENABLE_ASAN=ON +ASAN_OPTIONS=malloc_context_size=0 make -j4 xtest +``` + +### Single test file + +Once the build directory exists (cmake has already been run), re-run a +single test using the `TESTS` env var. Paths are relative to `appsec/` +(the cmake source directory). Drop `clang-tidy-17` from the apt install +to save ~10 seconds: + +```bash +.claude/ci/dockerh --cache appsec-ext-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +sudo apt-get update -qq && sudo apt-get install -y -qq \ + libc++-17-dev libc++abi-17-dev > /dev/null 2>&1 +cd appsec/build +TESTS=tests/extension/waf_timeout_default.phpt ASAN_OPTIONS=malloc_context_size=0 make xtest +' +``` + +Multiple test files can be passed space-separated in `TESTS`. Glob +patterns also work (e.g., `TESTS="tests/extension/user_req_*.phpt"`). + +## Extension tests via Gradle + +`appsec/tests/integration/build.gradle` exposes `xtest` tasks that run +the phpt suite inside Docker, reusing the same volumes as the integration +tests. This is an alternative that avoids writing into the host tree +entirely. + +Working directory: `appsec/tests/integration/` + +### Full suite for one target + +```bash +./gradlew xtest8.3-debug --info +``` + +Replace version and variant to match the integration test matrix +(`release`, `debug`, `release-zts`, …). Musl targets have no `xtest` +task. + +### Single test file + +Pass the `tests` property (path relative to `appsec/`, space-separated, +globs work): + +```bash +./gradlew xtest8.3-debug --info \ + -Ptests="tests/extension/waf_timeout_default.phpt" + +./gradlew xtest8.3-debug --info \ + -Ptests="tests/extension/user_req_*.phpt" +``` + +### Build caching + +Gradle stores artifacts in Docker volumes (`php-tracer-*`, +`php-appsec-*`). First run compiles Boost from source; subsequent runs +reuse it. To force a full rebuild: + +```bash +docker volume rm php-appsec-8.3-debug php-tracer-8.3-debug +``` + +## Helper tests (C++ ASAN) + +The C++ helper tests use the `bookworm-6` image (no PHP needed). The +binary is a gtest executable. With `--overlayfs --root`, all writes +(including `appsec/build-helper`) persist in the Docker volume +automatically — no manual bind mount needed. + +### Full suite + +```bash +.claude/ci/dockerh --cache appsec-helper --overlayfs --root \ + datadog/dd-trace-ci:bookworm-6 -- bash -c ' +set -e +apt-get update -qq && apt-get install -y -qq \ + libc++-17-dev libc++abi-17-dev > /dev/null 2>&1 +# Required: libddwaf submodule dir may trigger git's safe.directory check. +git config --global --add safe.directory \ + /project/dd-trace-php/appsec/third_party/libddwaf +mkdir -p appsec/build-helper +cd appsec/build-helper +cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_BUILD_EXTENSION=OFF \ + -DDD_APPSEC_ENABLE_COVERAGE=OFF -DDD_APPSEC_TESTING=ON \ + -DCMAKE_CXX_FLAGS="-stdlib=libc++ -fsanitize=address -fsanitize=leak -DASAN_BUILD" \ + -DCMAKE_C_FLAGS="-fsanitize=address -fsanitize=leak -DASAN_BUILD" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address -fsanitize=leak" \ + -DCMAKE_MODULE_LINKER_FLAGS="-fsanitize=address -fsanitize=leak" +make -j$(nproc) ddappsec_helper_test +cd /project/dd-trace-php +./appsec/build-helper/tests/helper/ddappsec_helper_test +' +``` + +`clang-tidy-17` is not needed here — omitting it saves ~5–8 seconds of +apt time. CI installs it via a shared `before_script` template. + +CI also passes `-DBOOST_CACHE_PREFIX=$CI_PROJECT_DIR/boost-cache` and +`-DCLANG_TIDY=/usr/bin/run-clang-tidy-17` to cmake. The first only +affects where the Boost cache lands (not a correctness issue locally). +The second means CI silently runs clang-tidy checks during the helper +build that are skipped locally. + +### Single test + +Once built, the pre-built binary can be re-run without rebuilding, but +the `libc++` runtime must still be installed (container rootfs is not +persisted by `--overlayfs`): + +```bash +.claude/ci/dockerh --cache appsec-helper --overlayfs --root \ + datadog/dd-trace-ci:bookworm-6 -- bash -c ' +apt-get update -qq && apt-get install -y -qq libc++1-17 libc++abi1-17 > /dev/null 2>&1 +./appsec/build-helper/tests/helper/ddappsec_helper_test \ + --gtest_filter="WafTest.TraceAttributesAreSent" +' +``` + +The filter supports wildcards: `--gtest_filter="WafTest.*"` runs all +tests in the `WafTest` suite. Use `--gtest_list_tests` to see available +tests. + +### Without ASAN + +For a faster non-ASAN build (useful when debugging test logic rather than +memory issues), drop the sanitizer flags: + +```bash +cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_BUILD_EXTENSION=OFF \ + -DDD_APPSEC_TESTING=ON \ + -DCMAKE_CXX_FLAGS="-stdlib=libc++" -DCMAKE_CXX_LINK_FLAGS="-stdlib=libc++" +``` + +## Appsec lint + +This can easily run locally: + +```bash +mkdir -p appsec/build +cd appsec/build +if [[ ! -f CMakeCache.txt ]]; then + cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_DDTRACE_ALT=ON +fi +make format_fix # fix clang-format violation +make tidy_fix # fix clang-tidy violations +``` + +## Gotchas + +- The build directory (`appsec/build`) must be **inside the repo** + (in-tree). Using an out-of-tree build directory fails because + `appsec/cmake/extension.cmake` writes `src/extension/version.h` into + `CMAKE_CURRENT_SOURCE_DIR`. + +- The first build takes several minutes because Boost is compiled from + source. Subsequent builds reuse the cached Boost in + `~/.cache/dd-ci//appsec/build/boost_cache/` (extension tests) or + in the `php-appsec-boost-cache` Docker volume (Gradle). + +- `appsec/build-helper` (helper tests) is **not** a `dockerh` cache + overlay. Pass it explicitly as `-v ~/.cache/dd-ci/appsec-helper/appsec/build-helper:...` + (see the Helper tests section). Files are owned by root because helper + tests run with `--user root`; clean up with + `docker run --rm -v ~/.cache/dd-ci/appsec-helper/appsec:/w alpine rm -rf /w/build-helper`. + +- The `libc++-17-dev` and `libc++abi-17-dev` packages must be installed + in every new container — the cmake cache references libc++ headers and + will fail to compile without them. + +- When switching PHP versions or variants, use a distinct `--cache` name + per version/variant (e.g. `appsec-ext-8.3-debug` vs + `appsec-ext-7.4-debug`). + +- Avoid `--clean-cache` unless absolutely necessary — it destroys the + Boost build cache, which takes 10+ minutes to rebuild. To force only a + cmake reconfigure, delete `CMakeCache.txt` directly: + ```bash + rm ~/.cache/dd-ci/appsec-ext-8.3-debug/appsec/build/CMakeCache.txt + ``` + Boost stays intact in `boost_cache/`. diff --git a/.claude/ci/benchmarks.md b/.claude/ci/benchmarks.md new file mode 100644 index 00000000000..06e35f40fba --- /dev/null +++ b/.claude/ci/benchmarks.md @@ -0,0 +1,218 @@ +# Benchmarks + +## CI Jobs + +**Source:** +- `.gitlab/generate-package.php` — declares `include: .gitlab/benchmarks.yml` and the pipeline stages +- `.gitlab/benchmarks.yml` — all benchmark job definitions (if present; may be in the `DataDog/benchmarking-platform` repo instead) + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `benchmarks-tracer` | `486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-php-82-dev` | Runs PHP tracer microbenchmarks via the `benchmarking-platform` framework; skips automatically if no tracer-relevant files changed | +| `benchmarks-appsec` | same | Runs appsec microbenchmarks; produces `candidate.tar.gz` and `baseline.tar.gz` artifacts | +| `benchmarks-profiler` | same | Runs profiler microbenchmarks | +| `macrobenchmarks: [{PHP_VERSION}]` | `486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:php_laravel-realworld` | Runs a Laravel Realworld application under k6 load at three traffic levels; PHP 7.4 and 8.1 | +| `check-big-regressions` | `registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest` | Post-benchmark gate: fails if `benchmarks-tracer` results contain a regression above threshold | +| `check-slo-breaches` | (benchmarking-platform-tools template) | Post-macrobenchmark gate: evaluates SLO breaches | +| `notify-slo-breaches` | (benchmarking-platform-tools template) | Posts SLO breach notifications to `#guild-dd-php` Slack channel | + +Runner: `runner:apm-k8s-tweaked-metal` (microbenchmarks); `runner:apm-k8s-same-cpu` +(macrobenchmarks); `arch:amd64` (gate jobs). + +**Trigger rules:** +- `benchmarks-tracer` — runs on every push; has an early-exit guard: if none of + `ext/`, `src/`, `components/`, `components-rs/`, + `zend_abstract_interface/`, `tests/Benchmarks/`, `benchmark/`, `tea/` + changed relative to `master`, exits 0 and `check-big-regressions` + also skips. +- `benchmarks-appsec` — auto-runs when `appsec/src/**/*` changed; + available as a manual job otherwise. +- `benchmarks-profiler` — auto-runs when `profiling/**/*` changed; + available as a manual job otherwise. +- `macrobenchmarks` — automatic on `master` and release branches; manual otherwise. + +## What It Tests + +Microbenchmarks measure overhead of the tracer, appsec, and profiler components +using the `DataDog/benchmarking-platform` framework (`dd-trace-php` branch). The +scenario name is passed as `BP_SCENARIO` to `bp-runner`. + +Macrobenchmarks run a realistic PHP application (Laravel Realworld) under k6 load +at three traffic levels and upload results to S3. They depend on compiled extension +and `datadog-setup.php` artifacts. + +## Local Reproduction + +Benchmark jobs run on dedicated performance hardware that is not accessible +outside CI. **Local runs produce numbers incomparable to CI results**, but +they are useful for before/after comparisons on the same machine and for +verifying that benchmark code runs without errors. + +The CI `benchmarks-tracer` job ultimately runs `make benchmarks`, +`make benchmarks_opcache`, and `make benchmarks_tea` (via +`benchmark/runall.sh`). You can run these same targets locally inside a +dev container. + +### Running the benchmarks + +The `benchmarking-platform` repo (branch `dd-trace-php`) is only the +CI orchestrator — it calls `make benchmarks` and friends. You do not +need `bp-runner` for local runs. Three approaches, from simplest to +most flexible: + +**Approach 1 — `make benchmarks` (CI-like, clean cache)** + +This matches what CI does: build + benchmark in one step from a clean +overlay. The default autotools configure includes the Rust sidecar in +the link, so no special handling is needed. + +```bash +.claude/ci/dockerh --cache bench-82 --clean-cache --overlayfs --root \ + datadog/dd-trace-ci:php-8.2_bookworm-6 \ + -e DD_TRACE_AUTOLOAD_NO_COMPILE=true \ + -- bash -c ' +git config --global --add safe.directory /project/dd-trace-php +make -j$(nproc) composer_tests_update +make -j$(nproc) benchmarks FILTER=SpanBench +' +``` + +First run compiles Rust + C (~10 min). Subsequent runs (without +`--clean-cache`) reuse the cached build artifacts and are fast. + +**Approach 2 — `compile_extension.sh` + manual install** + +Use this if you already have a `ddtrace.so` built by +`compile_extension.sh` (see +[compile-artifacts.md](compile-artifacts.md) § Local Reproduction) +or downloaded from CI. This approach requires manual `cp` because +`compile_extension.sh` uses `--enable-ddtrace-rust-library-split` +(see caveats above). + +Build step (skip if `.so` already in overlay or downloaded from CI): + +```bash +.claude/ci/dockerh --cache bench-82-split --overlayfs --root \ + datadog/dd-trace-ci:php-8.2_bookworm-6 \ + -e SHARED=1 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash .gitlab/compile_extension.sh +``` + +Benchmark step (every run — the ext dir is outside the overlay): + +```bash +.claude/ci/dockerh --cache bench-82-split --overlayfs --root \ + datadog/dd-trace-ci:php-8.2_bookworm-6 \ + -e SHARED=1 \ + -e DD_TRACE_AUTOLOAD_NO_COMPILE=true \ + -- bash -c ' +git config --global --add safe.directory /project/dd-trace-php +cp tmp/build_extension/modules/ddtrace.so \ + $(php-config --extension-dir)/ddtrace.so +make install_ini +make -j$(nproc) composer_tests_update +make benchmarks_run_dependencies ASSUME_COMPILED=1 +make call_benchmarks FILTER=SpanBench +' +``` + +On subsequent runs (deps already in overlay) you can drop the +`composer_tests_update` and `benchmarks_run_dependencies` lines. + +**Approach 3 — Download `.so` from CI** + +Fastest when you don't need to modify the extension itself. See +[compile-artifacts.md](compile-artifacts.md) and the +`download-artifacts` script. + +```bash +# Download the "compile extension: debug" artifact for PHP 8.2 +tooling/bin/download-artifacts \ + --job-name "compile extension: debug [8.2, amd64]" \ + -o /tmp/bench-ext + +# Place it in the overlay via bind-mount +.claude/ci/dockerh --cache bench-82-dl --overlayfs --root \ + datadog/dd-trace-ci:php-8.2_bookworm-6 \ + -v /tmp/bench-ext:/tmp/bench-ext:ro \ + -- bash -c ' +mkdir -p tmp/build_extension/modules +cp /tmp/bench-ext/tmp/build_extension/modules/ddtrace.so \ + tmp/build_extension/modules/ddtrace.so +' +``` + +Then run benchmarks as in Approach 2 (using `--cache bench-82-dl`). + +Replace `FILTER=SpanBench` with any PHPBench filter, or remove it +to run all suites. For OPcache benchmarks use `call_benchmarks_opcache`. + +### Before/after comparison + +1. Check out the baseline (e.g. `origin/master`), build (or download + from CI), run benchmarks, save the CSV. +2. Check out your branch, build, run benchmarks, save the CSV. +3. Compare the two CSVs (the `subject` column identifies each + benchmark; `time_avg` is the average time in microseconds). + +Use separate `--cache` names (e.g. `bench-82-baseline` and +`bench-82-candidate`) to avoid rebuilding when switching branches. + +### Services needed by some benchmarks + +Some benchmark suites (e.g. `PDOBench`, `PHPRedisBench`) need MySQL +and Redis. The CI image has them pre-installed. Add these to the +`bash -c` script before running benchmarks: + +```bash +service mysql start +redis-server --daemonize yes +echo "127.0.0.1 mysql-integration" >> /etc/hosts +echo "127.0.0.1 redis-integration" >> /etc/hosts +``` + +Or filter them out: +`make call_benchmarks FILTER='PDO(*SKIP)(*F)|Redis(*SKIP)(*F)|.'` + +## Investigating a Regression + +When `check-big-regressions` fails, download the `reports/` artifact from the +`benchmarks-tracer` job. The gate runs `bp-runner.fail-on-regression.yml` from the +`benchmarking-platform` repo (cloned into `/platform`). To read the job log directly: + +```bash +curl -s -H "PRIVATE-TOKEN: $GITLAB_PERSONAL_ACCESS_TOKEN" \ + "https://gitlab.ddbuild.io/api/v4/projects/355/jobs//trace" +``` + +## Gotchas + +- If the job log says "No tracer-related file changes detected — skipping benchmark + execution", `check-big-regressions` also skips. This is expected, not a failure. + +- `macrobenchmarks` has `allow_failure: true` — it will not block the pipeline. + +- The macrobenchmark image (`php_laravel-realworld`) is maintained on the + `php/laravel-realworld` branch of `DataDog/benchmarking-platform`, distinct from + the microbenchmark image (`dd-trace-php` branch). + +- The PHP extension dir (`/opt/php/...`) is outside the overlayfs + project tree, so `ddtrace.so` must be installed into it on every + container run. The built `.so` in `tmp/build_extension/modules/` + **does** persist in the overlay, so the install step is just a `cp`. + This affects Approaches 2 and 3; Approach 1 handles it automatically. + +- `git config --global --add safe.directory /project/dd-trace-php` + must be run each container invocation (PHPBench uses git internally). + +- When using Approaches 2 or 3, do **not** use `make install` or + `make benchmarks` — `compile_extension.sh` runs `make static` which + configures with `--enable-ddtrace-rust-library-split`, telling + autotools to omit the Rust sidecar from the link. That configure + state persists in the overlay, so a subsequent `make install` + rebuilds a `.so` without the sidecar (undefined `ddtrace_sidecar` + symbol). Instead, `cp` the `.so` manually and use + `make call_benchmarks`. Approach 1 does not have this problem + because the default configure includes the sidecar. diff --git a/.claude/ci/building-locally.md b/.claude/ci/building-locally.md new file mode 100644 index 00000000000..7abeb91adfc --- /dev/null +++ b/.claude/ci/building-locally.md @@ -0,0 +1,462 @@ +# Building Artifacts Locally + +Consolidated reference for building project artifacts locally. For +CI job details and exact CI command equivalents, see +[compile-artifacts.md](compile-artifacts.md). + +## Common Gotchas + +### CARGO_HOME is root-owned in CI images + +`/rust/cargo/` in CI images is root-owned. When running `dockerh` as non-root +(i.e. without `--root`), Cargo cannot write to it. Override it: + +- With `--overlayfs` (recommended): pass + `-e CARGO_HOME=/project/dd-trace-php/.cache/cargo`. +- Without overlayfs, place it in one of the cache bind mounted directories: + `-e CARGO_HOME=/project/dd-trace-php/tmp/cargo_home`. + +This affects `build-sidecar.sh`, profiler builds, and any other Rust build that +does not use `--root`. First local run downloads all crates from scratch. + +### Submodule initialisation + +Before any build, ensure the relevant submodules are initialised +(see also [../general.md](../general.md) section 4): + +```bash +# Tracer extension (ddtrace.so) — needs libdatadog +git submodule update --init libdatadog + +# Appsec extension or helper — needs these additionally +git submodule update --init \ + appsec/third_party/libddwaf \ + appsec/third_party/msgpack-c \ + appsec/third_party/cpp-base64 + +# Appsec helper rust — needs libddwaf-rust +git submodule update --init --recursive \ + appsec/third_party/libddwaf-rust +``` + +### switch-php naming differs between images + +On **centos-7** images, PHP variants are version-prefixed: `8.3`, +`8.3-debug`, `8.3-zts`. On **bookworm** images, variants are bare +names: `nts`, `debug`, `zts`, `nts-asan`, `debug-zts-asan`. + +Build scripts that call `switch-php` internally (e.g. +`compile_extension.sh`, `build-tracing.sh`, `build-appsec.sh`, +`build-profiler.sh`) handle this themselves and need `--root` (not +`--php`) so they can modify `/usr/local/bin/` symlinks. + +### devtoolset-7 on centos-7 + +The centos-7 base ships GCC 4.8, which is too old for C++17 code +(appsec extension). On centos-7 images, activate GCC 7 first: + +```bash +source /opt/rh/devtoolset-7/enable +``` + +This is needed for `build-appsec.sh` on centos-7 but not on bookworm +(which has a modern GCC). + +### make vs make static + +`make` links Rust inline and produces a self-contained `ddtrace.so`. +`make static` splits the Rust library out into `.a` archives (used by +the package pipeline's two-phase build). For local testing, always use +`make` unless you specifically need the split build. + +## Tracer Extension (ddtrace.so) + +### For test jobs (bookworm, debug build) + +Used before running tracer unit tests, .phpt tests, etc.: + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +git submodule update --init libdatadog +make -j$(nproc) all +make install +' +``` + +`make install` (not `make install_all`) suffices for `test_c` and +`test_opcache`. PHPUnit jobs need these additional steps after +`make all`: + +```bash +make install_all +composer update --no-interaction +make generate +``` + +See [tracer-unit-tests.md](tracer-unit-tests.md#phpunit-unit-tests) +for full PHPUnit run commands. + +### For system tests (centos-7, release-like build) + +Used when building packages for system-tests. Targets GLIBC 2.17 for +maximum compatibility: + +```bash +.claude/ci/dockerh --cache systest-82 --php 8.2 \ + datadog/dd-trace-ci:php-8.2_centos-7 -- \ + bash -c 'export CARGO_HOME=$PWD/tmp/cargo_home; make -j$(nproc)' +``` + +First build: ~20 min (Rust sidecar). Incremental (C-only): ~1 min. + +For `-O0` debugging (fewer `` in gdb): + +```bash +CFLAGS="-std=gnu11 -O0 -g" make -j$(nproc) +``` + +### Via CI compile script (exact CI reproduction) + +Reproduces the `compile extension: debug` CI job exactly: + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -e SHARED=1 \ + -- bash .gitlab/compile_extension.sh +``` + +See [compile-artifacts.md](compile-artifacts.md) for all CI compile +job variants (ASAN, ZTS, package pipeline, etc.). + +### ASAN build + +Use a **separate cache** from the normal debug build. `COMPILE_ASAN=1` +enables `-fsanitize=address` in the Rust sidecar. + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +export COMPILE_ASAN=1 +make -j$(nproc) all +' +``` + +## Appsec Extension + +### For release / system tests (centos-7) + +Needs `devtoolset-7` for C++17 support: + +```bash +.claude/ci/dockerh --cache compile-appsec-8.3-gnu --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.3 bash .gitlab/build-appsec.sh' +``` + +(`build-appsec.sh` sources `devtoolset-7` internally on centos-7.) + +### For native tests (bookworm, with test targets) + +Uses cmake directly with test flags. This is a **different build** +from the CI release build above (builds test targets, uses libc++): + +```bash +mkdir -p appsec/build && cd appsec/build +cmake .. -DCMAKE_BUILD_TYPE=Debug -DDD_APPSEC_BUILD_HELPER=OFF \ + -DCMAKE_CXX_FLAGS="-stdlib=libc++" \ + -DCMAKE_CXX_LINK_FLAGS="-stdlib=libc++" \ + -DDD_APPSEC_TESTING=ON +make -j$(nproc) xtest +``` + +For ASAN, add `-DENABLE_ASAN=ON` to cmake. See +[appsec-native-tests.md](appsec-native-tests.md) for full details. + +## Appsec Helpers + +The tarball needs two helper binaries in `appsec_$(uname -m)/`: +`libddappsec-helper.so` (C++) and `libddappsec-helper-rust.so` +(Rust), plus `appsec/recommended.json`. + +### Rust helper + +Image is on Docker Hub. Output: `appsec_$(uname -m)/libddappsec-helper-rust.so`. + +```bash +git submodule update --init --recursive \ + appsec/third_party/libddwaf-rust + +.claude/ci/dockerh --cache compile-appsec-helper-rust --overlayfs \ + datadog/dd-appsec-php-ci:nginx-fpm-php-8.5-release-musl \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash .gitlab/build-appsec-helper-rust.sh +``` + +### C++ helper + +The CI image (`nginx_musl_toolchain`) lives at +`registry.ddbuild.io/images/mirror/b1o7r7e0/nginx_musl_toolchain` +(not on Docker Hub). Transfer it via +`docker save ... | ssh HOST docker load` if needed. Output: +`appsec_$(uname -m)/libddappsec-helper.so` + `recommended.json`. + +```bash +git submodule update --init \ + appsec/third_party/libddwaf \ + appsec/third_party/msgpack-c \ + appsec/third_party/cpp-base64 + +.claude/ci/dockerh --cache compile-appsec-helper-cpp --overlayfs \ + registry.ddbuild.io/images/mirror/b1o7r7e0/nginx_musl_toolchain \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash .gitlab/build-appsec-helper.sh +``` + +## Profiler Extension + +### For correctness tests (bookworm) + +`CARGO_TARGET_DIR` **must** be set explicitly (see +[github-actions-profiler.md](github-actions-profiler.md) for why): + +```bash +dockerh --cache profiler-8.3-nts --php nts \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export CARGO_TARGET_DIR=/project/dd-trace-php/target +cd profiling && cargo rustc --features=trigger_time_sample \ + --profile profiler-release --crate-type=cdylib +' +``` + +### For release / packaging / system tests (centos-7) + +Bookworm is too recent for binary compatibility purposes. + +`build-profiler.sh` takes two arguments: the output directory prefix +and the thread safety mode (`nts` or `zts`). It calls `switch-php` +internally, so use `--root` (not `--php`). The output prefix must +match the directory layout expected by `generate-final-artifact.sh`: +`datadog-profiling/{triplet}/lib/php/{PHP_API}/`. + +Build one PHP version at a time (each centos-7 image ships one +version). For a single version (e.g. 8.2, ABI `20220829`): + +```bash +.claude/ci/dockerh --cache compile-profiler-8.2-gnu --overlayfs \ + --root \ + datadog/dd-trace-ci:php-8.2_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.2 bash .gitlab/build-profiler.sh \ + datadog-profiling/x86_64-unknown-linux-gnu/lib/php/20220829 nts' +``` + +## Sidecar (Rust) + +```bash +.claude/ci/dockerh --cache compile-sidecar-gnu --overlayfs \ + datadog/dd-trace-ci:php-8.1_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -e CARGO_HOME=/project/dd-trace-php/.cache/cargo \ + -- bash -c 'HOST_OS=linux-gnu bash .gitlab/build-sidecar.sh' +``` + +## SSI Loader + +```bash +# linux-gnu +.claude/ci/dockerh --cache compile-loader-gnu --overlayfs \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'HOST_OS=linux-gnu bash .gitlab/build-loader.sh' + +# linux-musl (requires --root for apk add) +.claude/ci/dockerh --cache compile-loader-musl --overlayfs --root \ + datadog/dd-trace-ci:php-compile-extension-alpine-8.3 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'HOST_OS=linux-musl bash .gitlab/build-loader.sh' +``` + +## Release Package Assembly + +`generate-final-artifact.sh` assembles a release tarball from +compiled artifacts. It takes three arguments: + +``` +generate-final-artifact.sh VERSION OUTPUT_DIR PROJECT_ROOT +``` + +- `VERSION` — version string (from the `VERSION` file) +- `OUTPUT_DIR` — where to write the tarball (e.g. `build/packages`) +- `PROJECT_ROOT` — repo root (for PHP stub files in `src/`, `ext/`) + +Set `TRIPLET` to limit assembly to one platform (e.g. +`x86_64-unknown-linux-gnu`). Without it, the script tries all +platforms and fails if artifacts are missing. + +**Prerequisites:** the script expects these directories to contain +compiled `.so` files: +- `extensions_$(uname -m)/` — ddtrace extensions + (`ddtrace-{API}[-zts|-debug|-debug-zts].so`) +- `appsec_$(uname -m)/` — appsec extensions (`ddappsec-{API}[-zts].so`) + + helpers (`libddappsec-helper.so` and `libddappsec-helper-rust.so`) + + `recommended.json` +- `datadog-profiling/{triplet}/lib/php/{API}/` — profiler + extensions + +Missing files cause hard `cp` failures. This means that we need to build (or +download from CI) all these individual artifacts. This is rarely desirable when +testing locally. See the section "Slim package with debug binaries" for a more +practical alternative when locally producing artifacts from some jobs, like +system tests. + +**Naming conventions differ by platform.** GNU/glibc extensions use +bare names (`ddtrace-{API}.so`, `ddtrace-{API}-zts.so`) plus +`-debug` and `-debug-zts` variants (4 total). Alpine/musl extensions +use the `-alpine` suffix (`ddtrace-{API}-alpine.so`, +`ddtrace-{API}-alpine-zts.so`) and have **no `-debug` variants** +(2 total). Appsec follows the same pattern (no `-debug` for either +platform). + +The script only needs basic shell tools (`cp`, `tar`, `mkdir`). +The `php_fpm_packaging` image is used in CI because the same job +also runs nfpm for .deb/.rpm/.apk, but any image with bash works +for tarball assembly alone. + +### Build datadog-setup.php + +```bash +docker run --rm -v "$(pwd)":/work -w /work php:8.2-cli \ + bash -c 'make build/packages/datadog-setup.php VERSION=$(cat VERSION)' +``` + +### Assemble the tarball (glibc amd64) + +The `php_fpm_packaging` image has entrypoint `["bash"]`, so pass +`-c '...'` directly (not `bash -c '....'`). + +```bash +.claude/ci/dockerh --cache pkg-amd64-gnu --overlayfs \ + datadog/dd-trace-ci:php_fpm_packaging -- -c ' +set -e +TRIPLET=x86_64-unknown-linux-gnu \ + ./tooling/bin/generate-final-artifact.sh \ + $(-x86_64-linux-gnu.tar.gz` +- `datadog-setup.php` + +To also build `.deb`/`.rpm` packages (full CI equivalent), add the +fpm targets before the tarball assembly in the same dockerh session: + +```bash +.claude/ci/dockerh --cache pkg-amd64-gnu --overlayfs \ + datadog/dd-trace-ci:php_fpm_packaging -- -c ' +set -e +make -j 4 .rpm.x86_64 .deb.x86_64 .tar.gz.x86_64 +TRIPLET=x86_64-unknown-linux-gnu \ + ./tooling/bin/generate-final-artifact.sh \ + $( str: + result = subprocess.run(["git", "rev-parse", ref], capture_output=True, text=True) + if result.returncode != 0: + print(f"Error: git rev-parse {ref} failed: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + return result.stdout.strip() + + +async def api_get(session: aiohttp.ClientSession, path: str, params: dict | None = None) -> tuple[int, any]: + """Make a GET request. Returns (status_code, json_or_text).""" + url = f"{API_BASE}{path}" + try: + async with session.get(url, params=params) as resp: + if resp.status in (401, 403): + text = await resp.text() + print(f"Error: authentication failed ({resp.status}): {text}", file=sys.stderr) + sys.exit(1) + if resp.content_type and "json" in resp.content_type: + return resp.status, await resp.json() + return resp.status, await resp.text() + except aiohttp.ClientError as e: + return 0, str(e) + + +async def api_get_json(session: aiohttp.ClientSession, path: str, params: dict | None = None) -> any: + """GET request expecting JSON. Returns None on network error.""" + status, data = await api_get(session, path, params) + if status == 0: + print(f"Warning: network error fetching {path}: {data}", file=sys.stderr) + return None + if status >= 400: + print(f"Warning: HTTP {status} fetching {path}", file=sys.stderr) + return None + return data + + +async def api_get_text(session: aiohttp.ClientSession, path: str) -> str | None: + """GET request expecting text. Returns None on error.""" + status, data = await api_get(session, path) + if status == 0 or status >= 400: + return None + return data + + +async def paginated_get(session: aiohttp.ClientSession, path: str, params: dict | None = None) -> list: + """Fetch all pages of a paginated endpoint.""" + all_items = [] + p = dict(params or {}) + p["per_page"] = 100 + page = 1 + while True: + p["page"] = page + data = await api_get_json(session, path, p) + if data is None or not isinstance(data, list) or len(data) == 0: + break + all_items.extend(data) + if len(data) < 100: + break + page += 1 + return all_items + + +async def discover_pipeline(session: aiohttp.ClientSession, sha: str, timeout: int) -> int: + """Poll GitLab until a pipeline is found for the given SHA.""" + deadline = time.monotonic() + timeout + interval = 5 + while True: + status, data = await api_get(session, f"/projects/{PROJECT_ID}/pipelines", {"sha": sha, "per_page": 20}) + if status == 0: + print(f"Error: network error during pipeline discovery: {data}", file=sys.stderr) + sys.exit(1) + if status in (401, 403): + # already handled in api_get + sys.exit(1) + if status >= 400: + print(f"Error: HTTP {status} during pipeline discovery", file=sys.stderr) + sys.exit(1) + if isinstance(data, list) and len(data) > 0: + # Pick the most recently updated pipeline + best = max(data, key=lambda p: p.get("updated_at", "")) + print(f"Found pipeline {best['id']} (status: {best.get('status', 'unknown')})") + return best["id"] + if time.monotonic() >= deadline: + print(f"Error: no pipeline found for SHA {sha} within {timeout}s", file=sys.stderr) + sys.exit(1) + remaining = deadline - time.monotonic() + wait = min(interval, remaining) + if wait > 0: + print(f"No pipeline found yet for {sha[:12]}... retrying in {int(wait)}s") + await asyncio.sleep(wait) + + +async def get_child_pipelines(session: aiohttp.ClientSession, pipeline_id: int, depth: int = 0) -> list[int]: + """Recursively find all descendant pipeline IDs within this project (up to 3 levels).""" + if depth >= 3: + return [] + bridges = await paginated_get(session, f"/projects/{PROJECT_ID}/pipelines/{pipeline_id}/bridges") + child_ids = [] + for bridge in bridges: + downstream = bridge.get("downstream_pipeline") + if not downstream or not downstream.get("id"): + continue + if downstream.get("project_id") != PROJECT_ID: + continue # cross-project trigger — different project, skip + cid = downstream["id"] + child_ids.append(cid) + grandchildren = await get_child_pipelines(session, cid, depth + 1) + child_ids.extend(grandchildren) + return child_ids + + +async def get_all_pipeline_ids(session: aiohttp.ClientSession, root_id: int) -> list[int]: + """Get root + all descendant pipeline IDs.""" + children = await get_child_pipelines(session, root_id) + return [root_id] + children + + +async def fetch_all_jobs(session: aiohttp.ClientSession, pipeline_ids: list[int]) -> list[dict]: + """Fetch jobs for all pipelines in parallel.""" + tasks = [paginated_get(session, f"/projects/{PROJECT_ID}/pipelines/{pid}/jobs") for pid in pipeline_ids] + results = await asyncio.gather(*tasks) + all_jobs = [] + for job_list in results: + if job_list: + all_jobs.extend(job_list) + return all_jobs + + +async def fetch_pipeline_statuses(session: aiohttp.ClientSession, pipeline_ids: list[int]) -> dict[int, str]: + """Fetch status of each pipeline in parallel.""" + async def fetch_one(pid): + data = await api_get_json(session, f"/projects/{PROJECT_ID}/pipelines/{pid}") + if data and isinstance(data, dict): + return pid, data.get("status", "unknown") + return pid, "unknown" + + results = await asyncio.gather(*[fetch_one(pid) for pid in pipeline_ids]) + return dict(results) + + +def compute_duration(job: dict) -> int | None: + """Compute duration in seconds from created_at to finished_at.""" + created = job.get("created_at") + finished = job.get("finished_at") + if not created or not finished: + return None + try: + t_created = datetime.fromisoformat(created.replace("Z", "+00:00")) + t_finished = datetime.fromisoformat(finished.replace("Z", "+00:00")) + return round((t_finished - t_created).total_seconds()) + except (ValueError, TypeError): + return None + + +async def download_job_log(session: aiohttp.ClientSession, job_id: int, log_dir: Path): + """Download a job's trace log to a file.""" + log_path = log_dir / f"{job_id}.log" + text = await api_get_text(session, f"/projects/{PROJECT_ID}/jobs/{job_id}/trace") + if text is None: + msg = f"Failed to download log for job {job_id}" + print(f"Warning: {msg}", file=sys.stderr) + log_path.write_text(msg) + else: + log_path.write_text(text) + + +async def list_jobs(session: aiohttp.ClientSession, root_id: int): + """List all jobs grouped by pipeline, then exit.""" + pipeline_ids = await get_all_pipeline_ids(session, root_id) + pipeline_statuses = await fetch_pipeline_statuses(session, pipeline_ids) + + # Fetch jobs for all pipelines individually so we can group them + jobs_by_pipeline: dict[int, list[dict]] = {} + tasks = [] + for pid in pipeline_ids: + tasks.append(paginated_get(session, f"/projects/{PROJECT_ID}/pipelines/{pid}/jobs")) + results = await asyncio.gather(*tasks) + for pid, job_list in zip(pipeline_ids, results): + jobs_by_pipeline[pid] = sorted(job_list or [], key=lambda j: j.get("name", "")) + + for pid in pipeline_ids: + status = pipeline_statuses.get(pid, "unknown") + jobs = jobs_by_pipeline.get(pid, []) + print(f"\nPipeline {pid} (status: {status}):") + for job in jobs: + job_status = job.get("status", "unknown") + job_name = job.get("name", "") + print(f" {job_status:<10}{job_name}") + + +async def run(args): + token = os.environ.get("GITLAB_PERSONAL_ACCESS_TOKEN", "").strip() + if not token: + print("Error: GITLAB_PERSONAL_ACCESS_TOKEN environment variable is not set or empty", file=sys.stderr) + sys.exit(1) + + connector = aiohttp.TCPConnector(limit=20) + headers = {"PRIVATE-TOKEN": token} + async with aiohttp.ClientSession(headers=headers, connector=connector) as session: + # Determine root pipeline ID + if args.pipeline is not None: + root_id = args.pipeline + print(f"Using pipeline {root_id}") + else: + ref = args.commit if args.commit else "HEAD" + sha = resolve_sha(ref) + print(f"Resolved {ref} to {sha}") + root_id = await discover_pipeline(session, sha, args.discovery_timeout) + + if args.list_jobs: + await list_jobs(session, root_id) + return + + # Set up working directory + wdir = Path(f"/tmp/gitlab_{root_id}") + wdir.mkdir(parents=True, exist_ok=True) + fail_log_dir = wdir / "fail_logs" + fail_log_dir.mkdir(exist_ok=True) + print(f"Working directory: {wdir}") + + seen_success: set[int] = set() + seen_failure: set[int] = set() + failure_count = 0 + deadline = time.monotonic() + args.timeout + + while True: + # Discover all pipeline IDs + pipeline_ids = await get_all_pipeline_ids(session, root_id) + + # Fetch all jobs in parallel + all_jobs = await fetch_all_jobs(session, pipeline_ids) + + # Process new successes + success_path = wdir / "success.txt" + for job in all_jobs: + jid = job["id"] + if job.get("status") == "success" and jid not in seen_success: + with open(success_path, "a") as f: + f.write(f"{jid}\t{job.get('name', '')}\n") + seen_success.add(jid) + + # Process new failures + failure_path = wdir / "failure.txt" + new_failure_jobs = [] + for job in all_jobs: + jid = job["id"] + if job.get("status") == "failed" and jid not in seen_failure: + new_failure_jobs.append(job) + + # Download logs in parallel for new failures + if new_failure_jobs: + await asyncio.gather(*[download_job_log(session, job["id"], fail_log_dir) for job in new_failure_jobs]) + + for job in new_failure_jobs: + jid = job["id"] + name = job.get("name", "") + with open(failure_path, "a") as f: + f.write(f"{jid}\t{name}\n") + duration = compute_duration(job) + if duration is not None: + print(f'FAILED: ({jid}) "{name}" in {duration} secs') + else: + print(f'FAILED: ({jid}) "{name}"') + seen_failure.add(jid) + failure_count += 1 + + # Check failure threshold + if failure_count >= args.max_failures: + print(f"Stopping script after maximum number of failures ({failure_count}) was encountered") + sys.exit(1) + + # Status line + now = datetime.now().strftime("%H:%M:%S") + total_jobs = len(all_jobs) + running = sum(1 for j in all_jobs if j.get("status") in ACTIVE_STATUSES) + passed = sum(1 for j in all_jobs if j.get("status") == "success") + failed = sum(1 for j in all_jobs if j.get("status") == "failed") + print(f"[{now}] pipelines={len(pipeline_ids)} jobs={total_jobs} running={running} passed={passed} failed={failed}") + + # Check if all pipelines are done + pipeline_statuses = await fetch_pipeline_statuses(session, pipeline_ids) + all_pipelines_done = all(s in DONE_STATUSES for s in pipeline_statuses.values()) + no_active_jobs = all(j.get("status") not in ACTIVE_STATUSES for j in all_jobs) + + if all_pipelines_done and no_active_jobs: + if failure_count == 0: + print("All pipelines completed successfully.") + sys.exit(0) + else: + print(f"All pipelines completed with {failure_count} failure(s).") + sys.exit(1) + + if time.monotonic() >= deadline: + print(f"Timeout after {args.timeout}s — pipelines still running.", file=sys.stderr) + sys.exit(2) + + await asyncio.sleep(args.poll_interval) + + +def main(): + args = parse_args() + asyncio.run(run(args)) + + +if __name__ == "__main__": + main() diff --git a/.claude/ci/ci-watch b/.claude/ci/ci-watch new file mode 100755 index 00000000000..3873e459f88 --- /dev/null +++ b/.claude/ci/ci-watch @@ -0,0 +1,139 @@ +#!/usr/bin/env -S uv run --script +"""Watch a check-ci output file and exit when actionable. + +Usage: ci-watch OUTPUT_FILE [--start-offset N] [--stale-timeout SECS] [--poll-interval SECS] + +Exits when: + - FAILED: line(s) detected (exit 1) + - All pipelines completed (exit 0) + - check-ci timed out (exit 3) + - No new output for N secs (stale) (exit 2) + +Always prints "RESUME_OFFSET: " before exiting so the caller can +restart with --start-offset to skip already-processed content. + +Streams new lines as progress while waiting. +""" + +import argparse +import os +import re +import sys +import time + +FAILURE_PATTERN = re.compile(r"^FAILED:", re.MULTILINE) +SUCCESS_PATTERN = re.compile( + r"^(All pipelines completed|Stopping script after maximum)", + re.MULTILINE, +) +TIMEOUT_PATTERN = re.compile(r"^Timeout after \d+s", re.MULTILINE) + + +def parse_args(): + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("output_file", help="Path to check-ci output file") + p.add_argument( + "--start-offset", + type=int, + default=0, + help="Start reading from this byte offset (default: 0)", + ) + p.add_argument( + "--stale-timeout", + type=int, + default=300, + help="Exit after this many seconds without new output (default: 300)", + ) + p.add_argument( + "--poll-interval", + type=int, + default=5, + help="Seconds between polls (default: 5)", + ) + return p.parse_args() + + +def read_new_content(path, offset): + """Read bytes from offset to EOF. Returns (new_text, new_offset).""" + try: + size = os.path.getsize(path) + except OSError: + return "", offset + if size <= offset: + return "", offset + with open(path, "r", errors="replace") as f: + f.seek(offset) + text = f.read() + return text, size + + +def find_matching_lines(text, pattern): + return [line for line in text.splitlines() if pattern.search(line)] + + +def watch(output_file, start_offset, stale_timeout, poll_interval): + offset = start_offset + last_activity = time.monotonic() + full_text = "" + + while True: + new_content, offset = read_new_content(output_file, offset) + + if new_content: + last_activity = time.monotonic() + sys.stdout.write(new_content) + if not new_content.endswith("\n"): + sys.stdout.write("\n") + sys.stdout.flush() + full_text += new_content + + failures = find_matching_lines(full_text, FAILURE_PATTERN) + if failures: + print("\n=== FAILURES DETECTED ===") + print("\n".join(failures)) + completions = ( + find_matching_lines(full_text, SUCCESS_PATTERN) + + find_matching_lines(full_text, TIMEOUT_PATTERN) + ) + if completions: + print("=== FINAL STATUS ===") + print("\n".join(completions)) + else: + print("=== check-ci still running ===") + print(f"RESUME_OFFSET: {offset}") + return 1 + + timeouts = find_matching_lines(full_text, TIMEOUT_PATTERN) + if timeouts: + print("\n=== FINAL STATUS (check-ci timed out) ===") + print("\n".join(timeouts)) + print(f"RESUME_OFFSET: {offset}") + return 3 + + completions = find_matching_lines(full_text, SUCCESS_PATTERN) + if completions: + print("\n=== FINAL STATUS ===") + print("\n".join(completions)) + print(f"RESUME_OFFSET: {offset}") + return 0 + + elapsed = time.monotonic() - last_activity + if elapsed >= stale_timeout: + print(f"\n=== STALE: no new output for {stale_timeout}s ===") + tail = full_text.rstrip("\n").splitlines()[-5:] + if tail: + print("Last lines:") + print("\n".join(tail)) + print(f"RESUME_OFFSET: {offset}") + return 2 + + time.sleep(poll_interval) + + +def main(): + args = parse_args() + sys.exit(watch(args.output_file, args.start_offset, args.stale_timeout, args.poll_interval)) + + +if __name__ == "__main__": + main() diff --git a/.claude/ci/compile-artifacts.md b/.claude/ci/compile-artifacts.md new file mode 100644 index 00000000000..e39b9fb7d9b --- /dev/null +++ b/.claude/ci/compile-artifacts.md @@ -0,0 +1,426 @@ +# Compile / Build Artifact Jobs + +These jobs produce the compiled `.so`, `.a`, `.dll`, and sidecar +binaries consumed by test jobs (Groups B, C, E, F) and packaging +jobs (Group I). They run in the `compile` stage (tracer pipeline) +and the `prepare` / `profiler` / `appsec` / `tracing` stages +(package pipeline). + +## Build Conventions + +Most build scripts post-process output `.so` files with +`objcopy --compress-debug-sections` (exceptions: `compile_extension.sh`, +`build-appsec-helper-rust.sh`, and `build-loader.sh`). Debug symbols are retained but compressed. +If you need to run this step outside a build script and your host lacks `binutils`: + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- \ + objcopy --compress-debug-sections /project/dd-trace-php/tmp/build_extension/modules/ddtrace.so +``` + +## CI Jobs + +**Source:** +- `.gitlab/generate-tracer.php` -- generates the tracer-trigger child pipeline; + defines `compile extension: debug` and `compile extension: debug-zts-asan` +- `.gitlab/generate-package.php` -- generates the package-trigger child pipeline; + defines all other compile/link/aggregate jobs listed below +- `.gitlab/generate-common.php` -- shared PHP-version and arch matrices +- `.gitlab/compile_extension.sh` -- build script for tracer-pipeline `compile extension` jobs +- `compile_rust.sh` -- shared Rust build wrapper invoked by `compile_extension.sh` + and `build-sidecar.sh`; sets `RUSTFLAGS`, `RUSTC_BOOTSTRAP=1`, and `SIDECAR_VERSION` +- `.gitlab/build-tracing.sh` -- builds NTS + ZTS `.a` (static archives) for the package pipeline +- `.gitlab/build-sidecar.sh` -- builds `libddtrace_php.{a,so}` (Rust sidecar) +- `.gitlab/link-tracing-extension.sh` -- links `.a` archives with the sidecar into final `.so` files +- `.gitlab/build-appsec.sh` -- builds `ddappsec-{ABI}.so` (NTS + ZTS) +- `.gitlab/build-appsec-helper.sh` -- builds `libddappsec-helper.so` (C++ helper, musl toolchain) +- `.gitlab/build-appsec-helper-rust.sh` -- builds `libddappsec-helper-rust.so` (Rust helper, musl nightly) +- `.gitlab/build-loader.sh` -- builds `dd_library_loader.so` (SSI loader) +- `.gitlab/build-profiler.sh` -- builds profiler extension (NTS + ZTS) + +### Tracer pipeline (generate-tracer.php) + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `compile extension: debug` | `dd-trace-ci:php-{ver}_bookworm-6` | Runs `append-build-id.sh` to stamp VERSION; compiles Rust (`compile_rust.sh`, debug profile) and C (`make -j static`) in parallel; `make static` also builds `php_sidecar_mockgen` (a secondary Rust build generating `mock_php.c` stubs); rewrites ldflags via `sed -i`; links `ddtrace.a` + `libddtrace_php.a` → `ddtrace.so` with `-soname ddtrace.so`. Sets `SHARED=1` (adds `--cfg php_shared_build` to `RUSTFLAGS`). | +| `compile extension: debug-zts-asan` | `dd-trace-ci:php-{ver}_bookworm-6` | Same as `compile extension: debug` (inherits `SHARED=1` via `extends:`) but with `WITH_ASAN=1` (sets `ASAN=1`+`COMPILE_ASAN=1`) and `SWITCH_PHP_VERSION=debug-zts-asan`; produces `ddtrace.so` instrumented with AddressSanitizer for ASAN test jobs | +| `Prepare code` | `php:8.2-cli` | Runs `composer update` + `make generate` to produce `src/bridge/_generated_*.php` | + +Runner: `arch:{amd64,arm64}` +Matrix (`compile extension: debug`): PHP 7.0+ x {amd64, arm64} +Matrix (`compile extension: debug-zts-asan`): PHP 7.4+ x {amd64, arm64} + +**Note on `Prepare code` vs `prepare code`:** These are two distinct jobs. The tracer +pipeline `Prepare code` uses `php:8.2-cli` (which has no Composer), installs Composer +from scratch, runs `composer update` + `make generate`, and lives in the `compile` +stage. The package pipeline `prepare code` uses `composer:2`, runs +`append-build-id.sh` first (bumping VERSION to `{major}.{minor+1}.0+{CI_COMMIT_SHA}` on +non-release branches; for pre-release versions like `1.2.3-beta1` it strips the +suffix to produce `1.2.3+{CI_COMMIT_SHA}` instead; no-op on tags and `ddtrace-` +release branches), then `composer self-update` + `composer update` ++ `make generate`, and lives in the `prepare` stage. `make generate` produces three +files via `classpreloader`: `_generated_api.php`, `_generated_tracer.php`, and +`_generated_opentelemetry.php`. + +### Package pipeline (generate-package.php) + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `prepare code` | `composer:2` | `.gitlab/append-build-id.sh` (bumps VERSION first) + `composer self-update` + `composer update` + `make generate`; produces VERSION + generated bridge files | +| `cache cargo deps: [{arch}, {triplet}]` | `dd-trace-ci:php-8.1_{platform}` (alpine uses `php-compile-extension-alpine-8.1`) | `cargo fetch` to warm the Cargo cache for the given target triplet | +| `compile tracing extension: [{ver}, {arch}, {triplet}]` | `dd-trace-ci:php-{ver}_{platform}` | Builds NTS + debug + ZTS static archives (`.a`) and standalone `.so` via `build-tracing.sh` (debug skipped on alpine); outputs `ddtrace-{PHP_API}{suffix}[-debug\|-zts].{a,so}` under `extensions_{arch}/` and `standalone_{arch}/` | +| `compile tracing sidecar: [{arch}, {triplet}]` | `dd-trace-ci:php-8.1_{platform}` | Builds `libddtrace_php.{a,so}` (FFI bridge library; `ddtrace-php` crate in `components-rs/`) via `build-sidecar.sh` → `compile_rust.sh` → `cargo build`; profile `tracer-release` (LTO, 1 codegen unit, panic=abort); `RUSTFLAGS=--cfg tokio_unstable --cfg php_shared_build`; `SIDECAR_VERSION` embedded from `VERSION` file | +| `link tracing extension: [{arch}, {triplet}]` | `dd-trace-ci:php-8.1_{platform}` | Rewrites `-export-symbols` → `-Wl,--retain-symbols-file` in the `.ldflags` file via `sed -i`; links each per-version `.a` in `extensions_$(uname -m)/` against `libddtrace_php_$(uname -m)${suffix}.a` with `-whole-archive` and the rewritten ldflags, setting `-soname ddtrace.so`; all links run in parallel background processes; post-processes each `.so` with `objcopy --compress-debug-sections` | +| `aggregate tracing extension: [{arch}]` | `dd-trace-ci:php-7.4_bookworm-6` | No-op `ls` that aggregates artifacts from all `compile tracing extension` jobs for one arch into a single artifact set | +| `compile tracing extension asan: [{ver}, {arch}, {triplet}]` | `dd-trace-ci:php-{ver}_bookworm-6` | Switches to `debug-zts-asan` PHP; builds `ddtrace.so` directly with `RUST_DEBUG_BUILD=1` (Rust debug profile, no `.a` intermediate); copies to `extensions_$(uname -m)/ddtrace-${ABI_NO}-debug-zts.so`; post-processes with `objcopy --compress-debug-sections` | +| `compile appsec extension: [{ver}, {arch}, {triplet}]` | `dd-trace-ci:php-{ver}_{platform}` | Builds NTS and ZTS appsec extensions sequentially via cmake+make in `appsec/build/` and `appsec/build-zts/`; cmake flags: `-DCMAKE_BUILD_TYPE=RelWithDebInfo -DDD_APPSEC_BUILD_HELPER=OFF -DDD_APPSEC_TESTING=OFF -DDD_APPSEC_EXTENSION_STATIC_LIBSTDCXX=ON`; outputs `appsec_$(uname -m)/ddappsec-$PHP_API${suffix}[-zts].so`; post-processes with `objcopy --compress-debug-sections` | +| `compile appsec helper` | `registry.ddbuild.io/images/mirror/b1o7r7e0/nginx_musl_toolchain` (original gone) | Builds `libddappsec-helper.so` via cmake+make with musl toolchain (`-DCMAKE_TOOLCHAIN_FILE=/sysroot/$(arch)-none-linux-musl/Toolchain.cmake`); `DD_APPSEC_ENABLE_PATCHELF_LIBC=ON` strips musl libc dependency via patchelf; runs gtest suite (`make ddappsec_helper_test && ./tests/helper/ddappsec_helper_test`); copies `recommended.json` to `appsec_$(uname -m)/` | +| `compile appsec helper rust` | `dd-appsec-php-ci:nginx-fpm-php-8.5-release-musl` | Builds `libddappsec-helper-rust.so` via `cargo +nightly-$RUST_TARGET` with `--release -Zhost-config -Ztarget-applies-to-host --target $(uname -m)-unknown-linux-musl`; removes musl libc dep with `patchelf --remove-needed`; runs `cargo +nightly-$RUST_TARGET test --release` after build; output in `appsec_$(uname -m)/` | +| `compile profiler extension: [{ver}, {arch}, {triplet}]` | `dd-trace-ci:php-{ver}_{platform}` | Builds NTS and ZTS profiler extensions via `cargo build --profile profiler-release` in `profiling/`; for ZTS, `touch build.rs` forces the build script to re-run after `switch-php` to pick up ZTS headers; outputs `datadog-profiling[-zts].so` under a prefix dir; on alpine+aarch64 symlinks clang17 over clang20 to work around a bindgen incompatibility | +| `compile loader: [{host_os}, {arch}]` | `dd-trace-ci:php-8.3_{platform}` (alpine: `php-compile-extension-alpine-8.3`) | Builds `dd_library_loader-$(uname -m)-${HOST_OS}.so` (SSI loader) via `phpize`+`configure`+`make` in `loader/`; on musl installs build deps via `apk add`; embeds `PHP_DD_LIBRARY_LOADER_VERSION` from `VERSION` file in CFLAGS | +| `compile extension windows: [{ver}]` | `dd-trace-ci:php-{ver}_windows` | Runs a long-lived container via `docker run -d` + `docker exec`; builds NTS then ZTS via `phpize.bat` + `configure.bat --enable-debug-pack` + `nmake`; reuses NTS Rust `target/` for ZTS by moving it; outputs `extensions_x86_64/php_ddtrace-${ABI_NO}[-zts].dll` and `.pdb` debug symbols | +| `pecl build` | `dd-trace-ci:php-7.4_bookworm-6` | Runs `tooling/bin/pecl-build` via `make build_pecl_package`; regenerates PHP bridge files via `composer -dtooling/generation`; mutates `package.xml` (version, date, file list) and `Cargo.toml` (strips profiling workspace member) in-place; produces `datadog_trace-*.tgz` via `pear package`; requires a clean tree to re-run | + +Runner: `arch:{amd64,arm64}` (Linux jobs) or `windows-v2:2019` (Windows) +Matrix (tracing/appsec extension): PHP 7.0+ x 4 build platforms (x86_64-alpine-linux-musl, aarch64-alpine-linux-musl, x86_64-unknown-linux-gnu, aarch64-unknown-linux-gnu) +Matrix (profiler extension): PHP 7.1+ x same 4 platforms +Matrix (ASAN tracing): PHP 7.4+ x {x86_64-unknown-linux-gnu, aarch64-unknown-linux-gnu} +Matrix (Windows): PHP 7.2+ + +## What It Builds + +The package pipeline compile stage has a two-phase structure for the tracing extension: + +1. **Phase 1 -- per-version compilation:** `compile tracing extension` produces a `.a` + static archive in `extensions_$(uname -m)/` and a standalone `.so` in + `standalone_$(uname -m)/` for each PHP version (per ABI). The `.a` is consumed by + the link phase; the standalone `.so` is consumed by `aggregate tracing extension` + (for `package loader`). This is PHP-version-specific because each PHP ABI requires + different headers. At the same time, `compile tracing sidecar` builds the Rust + sidecar library (one per platform, not per PHP version). + +2. **Phase 2 -- linking:** `link tracing extension` takes all the per-version `.a` archives + and links each one against the single sidecar `.a` to produce the final `.so` shared + objects. This is done in parallel (one process per archive). + +**Aggregation (sibling of linking):** `aggregate tracing extension` is a pass-through +job that collects the per-version `.a` archives, standalone `.so` files, and `.ldflags` +from all `compile tracing extension` jobs for one architecture into a single artifact +set. Its sole downstream consumer is `package loader`. Note that `link tracing +extension` and `aggregate tracing extension` are siblings -- both depend on +`compile tracing extension` -- not sequential phases. + +The `compile extension: debug` jobs in the **tracer pipeline** are simpler: they compile +Rust + C in parallel and produce a single `ddtrace.so` per PHP version. These are used +by the test jobs, not by the packaging pipeline. + +## Build Platforms + +| Triplet | Arch | Host OS | Package targets | +|---------|------|---------|-----------------| +| `x86_64-alpine-linux-musl` | amd64 | linux-musl | `.apk.x86_64` | +| `aarch64-alpine-linux-musl` | arm64 | linux-musl | `.apk.aarch64` | +| `x86_64-unknown-linux-gnu` | amd64 | linux-gnu | `.rpm.x86_64`, `.deb.x86_64`, `.tar.gz.x86_64` | +| `aarch64-unknown-linux-gnu` | arm64 | linux-gnu | `.rpm.arm64`, `.deb.arm64`, `.tar.gz.aarch64` | +| `x86_64-pc-windows-msvc` | amd64 | windows-msvc | `dbgsym.tar.gz` | + +## Dependency Graph + +``` +prepare code cache cargo deps: [{arch}, {triplet}] + | | + | +-- compile tracing sidecar: [{arch}, {triplet}]* + | | | + | | +----. + | | | + | +-- compile profiler extension: [{ver}, {arch}, {triplet}]* + | + +-- compile tracing extension: [{ver}, {arch}, {triplet}] (prepare code only) + | | + | +-- aggregate tracing extension: [{arch}] + | +-- link tracing extension: [{arch}, {triplet}] <-- also needs compile tracing sidecar + | + +-- compile tracing extension asan: [{ver}, {arch}, {triplet}] + +-- compile appsec extension: [{ver}, {arch}, {triplet}] + +-- compile appsec helper + +-- compile appsec helper rust + +-- compile loader: [{host_os}, {arch}] + +-- compile extension windows: [{ver}] + +-- pecl build + +* also needs prepare code (not shown to keep the graph readable) +``` + +## Gotchas + +- The tracer pipeline's `compile extension: debug` and the package pipeline's `compile + tracing extension` are **different jobs** that produce differently-structured artifacts. + The tracer pipeline version produces a ready-to-load `ddtrace.so`; the package pipeline + version produces static `.a` archives that need a separate link step. + +- `link tracing extension` uses the `.ldflags` file generated during `compile tracing + extension` for the PHP 7.0 build specifically (`ddtrace_$(uname -m)${suffix}.ldflags`). + The ldflags file contains the linker symbol-retention flags needed for all versions. + +- `aggregate tracing extension` does not actually compile or link anything -- its `script:` + is literally `ls ./`. Its sole purpose is to fan-in pre-link artifacts (`.a` archives, + standalone `.so` files, `.ldflags`) from all per-version `compile tracing extension` + jobs into a single artifact set for `package loader`. + +- `compile appsec helper rust` uses `cargo +nightly` with `-Zhost-config + -Ztarget-applies-to-host` to cross-compile for musl, then `patchelf --remove-needed` + to strip the musl libc dependency, making the binary work on both musl and glibc systems. + +- `compile appsec helper` (C++) runs its gtest suite as part of the build (`make + ddappsec_helper_test && ./tests/helper/ddappsec_helper_test`). A test failure will + fail the compile job. + +- `compile tracing sidecar` on alpine: the `-alpine` suffix variant force-installs + `bindgen-cli` via `cargo install --force --locked` before building, as a workaround + for `aws-lc-sys` build failures on musl targets. + +- The Cargo cache uses the default `pull-push` policy in `cache cargo deps` and + `policy: pull` (read-only) in `compile profiler extension` and `compile tracing + sidecar`. `compile tracing extension` has no `cache:` block at all (this is expected + since it runs `make static`, a pure C/PHP build; the Rust compilation is + handled by `compile tracing sidecar`). + +- Windows compile jobs use Docker on the Windows runner (not DinD): the script starts a + long-lived container with `docker run -d`, then drives it via `docker exec`. The + `GIT_STRATEGY: none` variable means the runner does not clone the repo -- instead the + job script manually clones via `git clone` + `git checkout`. + +- `ddtrace.sym` (repo root) is the export list for the final `ddtrace.so`. All symbols + not listed are hidden via `--retain-symbols-file` + `-fvisibility=hidden`. If you add + a new function that must be callable from appsec, profiler, or the SSI loader, add it + to `ddtrace.sym` or the linker will drop it. + +- `CARGO_TARGET_DIR` must not be set explicitly for `compile_rust.sh`. The default + (`target`) is resolved relative to the workspace root by Cargo. An explicit value + becomes CWD-relative; since `compile_rust.sh` `cd`s into `components-rs/`, this + silently breaks the build. + +- ASAN artifacts in the package pipeline have no "asan" in their filename: + `compile tracing extension asan` outputs `ddtrace-{ABI}-debug-zts.so`, which is + indistinguishable from a non-ASAN debug-zts build by filename alone. + +- Windows Cargo profile is `debug`: `config.w32` hardcodes + `ddtrace_cargo_profile = "debug"`. Unlike all Linux builds, the Windows `.dll` ships + with unoptimized Rust code. + +- Submodule requirements: `compile tracing sidecar` and + `compile extension: debug` need the `libdatadog` submodule; + `compile appsec helper rust` needs + `appsec/third_party/libddwaf-rust`. Local runs need + `git submodule update --init --recursive` before building. + +- **centos-7 vs bookworm images — do not mix them.** The package-pipeline + `compile tracing extension` jobs for `x86_64-unknown-linux-gnu` and + `aarch64-unknown-linux-gnu` use **centos-7** images (targeting GLIBC 2.17 + for maximum compatibility), not bookworm. Only the ASAN variant + (`compile tracing extension asan`) and the tracer-pipeline + `compile extension: debug` jobs use bookworm. Using the wrong image causes + the `switch-php` and `BASH_ENV` failures described below. + +- **`switch-php` variant naming differs between centos and bookworm.** On + centos-7 images, PHP variants under `/opt/php/` are version-prefixed: + `8.3`, `8.3-debug`, `8.3-zts`. On bookworm images, variants are bare names: + `nts`, `debug`, `zts`, `nts-asan`, `debug-zts-asan`. `build-tracing.sh` + calls `switch-php "${PHP_VERSION}"` (e.g. `switch-php 8.3`), which works on + centos but fails on bookworm. Conversely, `compile_extension.sh` uses + `switch-php debug` / `switch-php debug-zts-asan` (bookworm names). + +- **`CARGO_HOME=/rust/cargo/` is root-owned in CI images.** See + [building-locally.md](building-locally.md#cargo_home-is-root-owned-in-ci-images) + for the workaround. This affects `build-sidecar.sh` and any other + Rust build that does not use `--root`. + +- **Alpine images use a different naming convention.** Alpine/musl compile + images follow the pattern `php-compile-extension-alpine-{ver}` (e.g. + `php-compile-extension-alpine-8.3`), not the `php-{ver}_{os}-{N}` pattern + used by bookworm/centos images. + +- **`compile loader` on musl requires `--root`.** `build-loader.sh` runs + `apk add` to install build dependencies on Alpine, which needs root. This + only applies to the musl variant; the linux-gnu variant runs fine without + `--root`. + +- **`compile appsec extension` is pure C/C++ — no Rust/Cargo.** Unlike + tracing and profiler compile jobs, this build has no Cargo dependency and + no cache block. `DD_APPSEC_BUILD_HELPER=OFF` skips the heavy helper + dependencies (libddwaf, googletest, etc.); only the extension `.so` is + built. + +- **`compile appsec helper rust` sets `CARGO_TARGET_DIR` explicitly.** + Unlike `compile_rust.sh` (where `CARGO_TARGET_DIR` must NOT be set), + `build-appsec-helper-rust.sh` sets `CARGO_TARGET_DIR=/tmp/cargo-target` + intentionally. The `build.rs` also embeds `DDAPPSEC_VERSION` from the + `VERSION` file, which is why this job depends on `prepare code`. + +- **`compile extension: debug` Rust profile.** The "debug" in the job name + refers to the PHP debug build variant, not the Rust profile — but + coincidentally the Rust code also builds with the `debug` (dev) profile + (unoptimized). CI also sets `SHARED=1`, which adds `--cfg php_shared_build` + to `RUSTFLAGS`. The `debug-zts-asan` job inherits `SHARED=1` via + `extends:` — it is not visible in the job definition itself; do not omit + it when reproducing locally. + +- **`CI_COMMIT_BRANCH` on detached HEAD.** When running on a detached + HEAD (e.g., after `git checkout `), `git rev-parse --abbrev-ref HEAD` + returns the literal string `HEAD`. `append-build-id.sh` still works, but + the embedded version string will contain `HEAD` as the branch name. + +- **Silent final link step in `compile_extension.sh`.** The final `sed -i` + + `cc -shared` commands produce no output (no `set -x`). On a successful + build, the last visible log line is `compile_rust.sh`'s `Finished ...` + message. Verify success by checking the output exists: + ```bash + docker run --rm -v dd-ci-:/v alpine \ + ls -lh /v/upper/tmp/build_extension/modules/ddtrace.so + ``` + +- **`devtoolset-7` on centos-7.** The ancient CentOS 7 base ships GCC 4.8; + `build-tracing.sh` activates `devtoolset-7` (GCC 7) via `scl_source`. + This is specific to centos-7/glibc builds — bookworm has a modern GCC. + +- **`compile loader` is the simplest compile job.** Pure C (phpize + + configure + make), no Rust, no submodules, no `switch-php`. Takes seconds. + `HOST_OS` affects the output filename and controls whether + `apk add` installs build dependencies (musl only); `config.m4` + independently detects musl at compile time by checking whether + `ldd --version` output starts with `musl`. The build produces + `loader/modules/dd_library_loader.so`, then copies it to the project + root as `dd_library_loader-$(uname -m)-${HOST_OS}.so` (e.g., + `dd_library_loader-x86_64-linux-gnu.so`). + +## Local Reproduction + +For a quick-reference guide to building each artifact locally, see +[building-locally.md](building-locally.md). The commands below are +exact CI job equivalents with full environment variables. + +Use `.claude/ci/dockerh` (see `index.md`). Pass `CI_COMMIT_SHA` and +`CI_COMMIT_BRANCH` from the host so `append-build-id.sh` embeds the +correct version string. + +**Expected build times (first run, empty cache):** + +| Job | arm64 (Apple Silicon) | amd64 (Linux) | +|-----|-----------------------|---------------| +| compile extension: debug | ~2 min | ~2 min | +| compile extension: debug-zts-asan | ~2 min | ~2 min | +| compile tracing extension (per version) | — | ~1.5 min | +| compile tracing sidecar | — | ~3 min | +| compile appsec extension (per version) | ~2 min | ~2 min | +| compile appsec helper rust | ~3 min | ~3 min | +| compile profiler extension (per version) | — | ~2 min | +| compile loader | ~4 sec | ~4 sec | + +Subsequent runs with cached Rust artifacts: C-only changes rebuild +in ~10 s; Rust changes in ~30–60 s. + +Scripts that call `switch-php` internally (`compile_extension.sh`, +`build-tracing.sh`, `build-appsec.sh`, `build-profiler.sh`) need root to +modify `/usr/local/bin/` symlinks. Use `--root` for these — do **not** use +`--php` since the script already handles variant switching. Scripts that do +not call `switch-php` (`build-sidecar.sh`, `build-loader.sh`) run fine +without `--root` **on GNU/Linux images**. On Alpine (musl) images, +`build-loader.sh` requires `--root` because it runs `apk add` to install +build dependencies. + +```bash +# compile extension: debug (tracer pipeline, PHP 8.3) +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -e SHARED=1 \ + -- bash .gitlab/compile_extension.sh + +# compile extension: debug-zts-asan (tracer pipeline, PHP 8.3) +.claude/ci/dockerh --cache tracer-8.3-debug-zts-asan --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -e WITH_ASAN=1 \ + -e SWITCH_PHP_VERSION=debug-zts-asan \ + -e SHARED=1 \ + -- bash .gitlab/compile_extension.sh + +# compile tracing extension (package pipeline, PHP 8.3, linux-gnu) +.claude/ci/dockerh --cache compile-tracing-8.3-gnu --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.3 bash .gitlab/build-tracing.sh' + +# compile tracing sidecar (linux-gnu) +# CARGO_HOME override needed — see building-locally.md +# HOST_OS is passed through to compile_rust.sh to select the Rust target triplet +# (linux-gnu vs linux-musl). Use linux-gnu for glibc, linux-musl for Alpine. +.claude/ci/dockerh --cache compile-sidecar-gnu --overlayfs \ + datadog/dd-trace-ci:php-8.1_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -e CARGO_HOME=/project/dd-trace-php/.cache/cargo \ + -- bash -c 'HOST_OS=linux-gnu bash .gitlab/build-sidecar.sh' + +# compile appsec extension (PHP 8.3, linux-gnu) +.claude/ci/dockerh --cache compile-appsec-8.3-gnu --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.3 bash .gitlab/build-appsec.sh' + +# compile profiler extension (PHP 8.3, linux-gnu) +.claude/ci/dockerh --cache compile-profiler-8.3-gnu --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.3 bash .gitlab/build-profiler.sh datadog-profiling/x86_64-unknown-linux-gnu/lib/php/20230831 nts' + +# compile profiler extension ZTS variant (PHP 8.3, linux-gnu) +# Reuse the same cache — build-profiler.sh calls switch-php internally +.claude/ci/dockerh --cache compile-profiler-8.3-gnu --overlayfs --root \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'PHP_VERSION=8.3 bash .gitlab/build-profiler.sh datadog-profiling/x86_64-unknown-linux-gnu/lib/php/20230831 zts' + +# compile loader (linux-gnu) +.claude/ci/dockerh --cache compile-loader-gnu --overlayfs \ + datadog/dd-trace-ci:php-8.3_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'HOST_OS=linux-gnu bash .gitlab/build-loader.sh' + +# compile loader (linux-musl) -- requires --root for apk add +.claude/ci/dockerh --cache compile-loader-musl --overlayfs --root \ + datadog/dd-trace-ci:php-compile-extension-alpine-8.3 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash -c 'HOST_OS=linux-musl bash .gitlab/build-loader.sh' + +# compile appsec helper rust +.claude/ci/dockerh --cache compile-appsec-helper-rust --overlayfs \ + datadog/dd-appsec-php-ci:nginx-fpm-php-8.5-release-musl \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- bash .gitlab/build-appsec-helper-rust.sh + +# pecl build +.claude/ci/dockerh --cache compile-pecl --overlayfs \ + datadog/dd-trace-ci:php-7.4_bookworm-6 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \ + -- make build_pecl_package +``` + +`--overlayfs` is used for package pipeline jobs because their output directories +(`extensions_*/`, `standalone_*/`, `appsec_*/`, `datadog-profiling/`, etc.) are +written to the project root and to files like `VERSION` and `*.ldflags`. The +overlayfs mode mounts the checkout read-only as the lower dir and uses a Docker +named volume (`dd-ci-{NAME}`) as the upper dir, so all writes go to the volume +transparently via copy-up. This also handles `append-build-id.sh` modifying +`VERSION`, which would fail with a read-only mount. diff --git a/.claude/ci/docker-compose.services.yml b/.claude/ci/docker-compose.services.yml new file mode 100644 index 00000000000..49e785f1e5e --- /dev/null +++ b/.claude/ci/docker-compose.services.yml @@ -0,0 +1,122 @@ +# Standalone service definitions for local CI reproduction. +# Mirrors the relevant services from the root docker-compose.yml but without +# host port bindings, so multiple instances (different PHP versions, parallel +# runs) can coexist on the same host. +# +# NOTE: Volume paths in this file are relative to this file's location (.claude/ci/). +# ../../tests/snapshots → repo root tests/snapshots/ +# +# Usage (substitute your cache/project name): +# docker compose -p tracer-web-83 -f .claude/ci/docker-compose.services.yml \ +# up -d test-agent request-replayer httpbin-integration mysql-integration +# +# Cleanup: +# docker compose -p tracer-web-83 -f .claude/ci/docker-compose.services.yml down + +services: + test-agent: + # CI uses v1.22.1 — pin to that version to avoid dev build behavioural differences. + # Find the current CI version in tracer-web-tests.md or .gitlab/generate-common.php. + image: ghcr.io/datadog/dd-apm-test-agent/ddapm-test-agent:v1.22.1 + volumes: + - ../../tests/snapshots:/snapshots + environment: + - LOG_LEVEL=DEBUG + - TRACE_LANGUAGE=php + - DD_TRACE_AGENT_URL=http://request-replayer:80 + - PORT=9126 + - DD_SUPPRESS_TRACE_PARSE_ERRORS=true + - DD_POOL_TRACE_CHECK_FAILURES=true + - DD_DISABLE_ERROR_RESPONSES=true + - SNAPSHOTS_DIR=/snapshots + - SNAPSHOT_CI=0 + - SNAPSHOT_REMOVED_ATTRS=start,duration,metrics.php.compilation.total_time_ms,metrics.php.memory.peak_usage_bytes,metrics.php.memory.peak_real_usage_bytes,metrics.process_id + - ENABLED_CHECKS=trace_stall,trace_peer_service,trace_dd_service + - SNAPSHOT_REGEX_PLACEHOLDERS=path:/home/circleci/app|/project/dd-trace-php,httpbin:(?<=//)httpbin-integration:8080 + + request-replayer: + image: datadog/dd-trace-ci:php-request-replayer-2.0 + + httpbin-integration: + image: kong/httpbin:0.2.2 + # CI runs httpbin via gunicorn on port 8080. Without this, the default port + # is 80, which causes cli-server curl spans to record `:80` explicitly while + # HTTPBIN_INTEGRATION strips it, producing 10 mismatched assertions. + command: ["pipenv", "run", "gunicorn", "-b", "0.0.0.0:8080", "httpbin:app", "-k", "gevent"] + + mysql-integration: + image: datadog/dd-trace-ci:php-mysql-dev-5.6 + environment: + - MYSQL_ROOT_PASSWORD=test + - MYSQL_PASSWORD=test + - MYSQL_USER=test + - MYSQL_DATABASE=test + + redis-integration: + image: datadog/dd-trace-ci:php-redis-5.0 + + rabbitmq-integration: + image: rabbitmq:3.9.20-alpine + + memcached-integration: + image: memcached:1.5-alpine + + elasticsearch7-integration: + image: elasticsearch:7.17.23 + environment: + - discovery.type=single-node + - "ES_JAVA_OPTS=-Xms1g -Xmx1g" + cap_add: + - IPC_LOCK + ulimits: + memlock: + soft: -1 + hard: -1 + + # elasticsearch1 tests use this (PHP 7.0-7.2 only) + elasticsearch2-integration: + image: elasticsearch:2 + platform: linux/amd64 + + mongodb-integration: + image: mongo:4.2.24 + environment: + - MONGO_INITDB_ROOT_USERNAME=test + - MONGO_INITDB_ROOT_PASSWORD=test + + zookeeper: + image: confluentinc/cp-zookeeper:7.8.0 + environment: + - ZOOKEEPER_CLIENT_PORT=2181 + - ZOOKEEPER_TICK_TIME=2000 + - ALLOW_ANONYMOUS_LOGIN=yes + - ZOOKEEPER_ADMIN_ENABLE_SERVER=false + - KAFKA_OPTS=-Dzookeeper.4lw.commands.whitelist=srvr,ruok + + kafka-integration: + image: confluentinc/cp-kafka:7.8.0 + depends_on: + - zookeeper + environment: + - KAFKA_BROKER_ID=111 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka-integration:9092 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT + - KAFKA_INTER_BROKER_LISTENER_NAME=PLAINTEXT + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_TRANSACTION_STATE_LOG_MIN_ISR=1 + - KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR=1 + - KAFKA_AUTO_CREATE_TOPICS_ENABLE=true + - KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS=120000 + - KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS=120000 + + sqlsrv-integration: + image: mcr.microsoft.com/mssql/server:2019-CU15-ubuntu-20.04 + platform: linux/amd64 + environment: + - ACCEPT_EULA=Y + - MSSQL_SA_PASSWORD=Password12! + - MSSQL_PID=Developer + + googlespanner-integration: + image: gcr.io/cloud-spanner-emulator/emulator:1.5.25 diff --git a/.claude/ci/docker-upper-cp b/.claude/ci/docker-upper-cp new file mode 100755 index 00000000000..7cb2a071145 --- /dev/null +++ b/.claude/ci/docker-upper-cp @@ -0,0 +1,34 @@ +#!/bin/bash +# Copy files from a dockerh overlayfs volume to the host. +# Usage: docker-upper-cp +# +# Example: +# docker-upper-cp dd-ci-st-compile-8.2-nts \ +# tmp/build_extension/modules/ddtrace.so \ +# extensions_x86_64/ddtrace-20220829.so + +set -euo pipefail + +if [[ $# -ne 3 ]]; then + echo "Usage: docker-upper-cp " >&2 + exit 1 +fi + +vol="$1" +src="$2" +dest="$3" + +# Detect whether src is a directory or file in the volume +src_type=$(docker run --rm -v "${vol}:/v:ro" alpine \ + sh -c "test -d \"/v/upper/$src\" && echo dir || echo file") + +if [[ "$src_type" == "dir" ]]; then + mkdir -p "$dest" + strip=$(echo "$src" | awk -F/ '{print NF}') + docker run --rm -v "${vol}:/v:ro" alpine \ + tar -cf - -C /v/upper "$src" | tar -xf - --strip-components="$strip" -C "$dest" +else + mkdir -p "$(dirname "$dest")" + docker run --rm -v "${vol}:/v:ro" alpine \ + cat "/v/upper/$src" > "$dest" +fi diff --git a/.claude/ci/dockerh b/.claude/ci/dockerh new file mode 100755 index 00000000000..5de6c9543eb --- /dev/null +++ b/.claude/ci/dockerh @@ -0,0 +1,477 @@ +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.11" +# /// +""" +dockerh - Docker helper for dd-trace-php local CI reproduction. + +Wraps `docker run` with automatic bind mounts for the repo checkout +(read-only) and a persistent cache overlay for build artifacts. +""" + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile + +CONTAINER_PROJECT = "/project/dd-trace-php" +CONTAINER_LOWER = "/project/dd-trace-php-ro" +CONTAINER_CACHE = "/cache" + +# Overlays inside the project: repo-relative paths mounted as writable +# over the read-only checkout. Docker requires mountpoint dirs to exist +# on the base layer, so we create them in the checkout if missing (they +# are gitignored build artifact dirs). +PROJECT_OVERLAYS: list[str] = [ + "target", + "profiling/target", + "appsec/build", + "tmp", +] + + +_OVL_MOUNT_HELPER_C = r""" +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +struct cap_data { unsigned int effective, permitted, inheritable; }; +struct cap_header { unsigned int version; int pid; }; + +int main(int argc, char **argv) { + if (argc < 7) { + fprintf(stderr, "Usage: ovl_mount \n"); + return 1; + } + int uid = atoi(argv[1]), gid = atoi(argv[2]); + + /* Drop to target uid/gid but keep capabilities */ + prctl(PR_SET_KEEPCAPS, 1); + setgid(gid); + setuid(uid); + + /* Re-acquire caps needed for overlayfs setup: + * SYS_ADMIN(21): mount syscall + * DAC_OVERRIDE(1): workdir internal operations + * FOWNER(3): xattr operations on workdir + * CHOWN(0): ownership adjustments */ + struct cap_header h = { .version = 0x20080522 }; + struct cap_data d[2] = { + { .effective = (1u<<21)|(1u<<1)|(1u<<3)|(1u<<0), + .permitted = (1u<<21)|(1u<<1)|(1u<<3)|(1u<<0) }, + { 0 } + }; + syscall(SYS_capset, &h, d); + + char opts[4096]; + snprintf(opts, sizeof(opts), + "lowerdir=%s,upperdir=%s,workdir=%s,userxattr", + argv[3], argv[4], argv[5]); + if (mount("overlay", argv[6], "overlay", 0, opts)) { + fprintf(stderr, "ovl_mount: %s\n", strerror(errno)); + return 1; + } + return 0; +} +""" + +# Minimal static setpriv replacement. Supports the subset of flags that +# dockerh actually uses: --reuid, --regid, --clear-groups. Works on any +# Linux container (Alpine/BusyBox, Debian slim, scratch, …) because it is +# compiled as a static musl binary – no runtime dependencies at all. +_SETPRIV_HELPER_C = r""" +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + uid_t uid = 0; + gid_t gid = 0; + int got_uid = 0, got_gid = 0, clear_groups = 0; + int i; + + for (i = 1; i < argc; i++) { + if (strncmp(argv[i], "--reuid=", 8) == 0) { + uid = (uid_t)atoi(argv[i] + 8); + got_uid = 1; + } else if (strncmp(argv[i], "--regid=", 8) == 0) { + gid = (gid_t)atoi(argv[i] + 8); + got_gid = 1; + } else if (strcmp(argv[i], "--clear-groups") == 0) { + clear_groups = 1; + } else { + break; /* first non-option → start of command */ + } + } + + if (i >= argc) { + fprintf(stderr, "Usage: setpriv [--reuid=UID] [--regid=GID] [--clear-groups] CMD [ARGS...]\n"); + return 1; + } + + if (clear_groups && setgroups(0, NULL) != 0) { + perror("setgroups"); + return 1; + } + if (got_gid && setgid(gid) != 0) { + perror("setgid"); + return 1; + } + if (got_uid && setuid(uid) != 0) { + perror("setuid"); + return 1; + } + + execvp(argv[i], &argv[i]); + perror("execvp"); + return 127; +} +""" + + +def _get_ovl_mount_helper() -> str: + """Build (or return cached) static ovl_mount binary for each arch.""" + import platform + arch = platform.machine() # e.g. arm64, x86_64 + cache_dir = os.path.expanduser("~/.cache/dd-ci") + os.makedirs(cache_dir, exist_ok=True) + bin_path = os.path.join(cache_dir, f"ovl_mount-{arch}") + src_path = os.path.join(cache_dir, "ovl_mount.c") + + if os.path.exists(bin_path): + return bin_path + + with open(src_path, "w") as f: + f.write(_OVL_MOUNT_HELPER_C) + + # Cross-compile for both architectures using Docker + target = "aarch64-linux-musl" if arch == "arm64" else "x86_64-linux-musl" + print(f"Building ovl_mount helper for {arch}...", file=sys.stderr) + subprocess.run( + ["docker", "run", "--rm", "--platform", f"linux/{arch}", + "-v", f"{cache_dir}:/src", + "alpine:3.20", "sh", "-c", + f"apk add --no-cache gcc musl-dev >/dev/null 2>&1 && " + f"cc -static -o /src/ovl_mount-{arch} /src/ovl_mount.c && " + f"chmod 755 /src/ovl_mount-{arch}"], + check=True, + ) + return bin_path + + +def _get_setpriv_helper() -> str: + """Build (or return cached) static setpriv binary for each arch.""" + import platform + arch = platform.machine() # e.g. arm64, x86_64 + cache_dir = os.path.expanduser("~/.cache/dd-ci") + os.makedirs(cache_dir, exist_ok=True) + bin_path = os.path.join(cache_dir, f"setpriv-{arch}") + src_path = os.path.join(cache_dir, "setpriv.c") + + if os.path.exists(bin_path): + return bin_path + + with open(src_path, "w") as f: + f.write(_SETPRIV_HELPER_C) + + print(f"Building setpriv helper for {arch}...", file=sys.stderr) + subprocess.run( + ["docker", "run", "--rm", "--platform", f"linux/{arch}", + "-v", f"{cache_dir}:/src", + "alpine:3.20", "sh", "-c", + f"apk add --no-cache gcc musl-dev >/dev/null 2>&1 && " + f"cc -static -o /src/setpriv-{arch} /src/setpriv.c && " + f"chmod 755 /src/setpriv-{arch}"], + check=True, + ) + return bin_path + + +def repo_root() -> str: + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, + ) + return result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError): + sys.exit("error: not inside a git repository") + + + +def main() -> None: + # Split on first bare "--" to separate dockerh+docker options from the + # container command. + argv = sys.argv[1:] + cmd_args: list[str] | None = None + if "--" in argv: + idx = argv.index("--") + pre = argv[:idx] + cmd_args = argv[idx + 1:] + else: + pre = argv + + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--cache", dest="cache_name", default=None) + parser.add_argument("--clean-cache", action="store_true", default=False) + parser.add_argument("--no-cache-overlay", action="store_true", default=False) + parser.add_argument("--writable-tree", action="store_true", default=False, + help="Mount the repo read-write instead of read-only") + parser.add_argument("--overlayfs", action="store_true", default=False, + help="Use overlayfs instead of per-dir bind mounts; " + "all writes go to the cache dir transparently") + parser.add_argument("--root", action="store_true", default=False, + help="Stay as root inside the container (skip privilege drop)") + parser.add_argument("--php", dest="php_variant", default=None, + metavar="VARIANT", + help="Call switch-php VARIANT before running (nts, zts, debug, nts-asan, debug-zts-asan)") + parser.add_argument("--help", action="store_true", default=False) + + known, remaining = parser.parse_known_args(pre) + + if known.help: + print( + "Usage: dockerh --cache NAME --overlayfs [OPTIONS] IMAGE [DOCKER_OPTIONS...] -- COMMAND [ARGS...]\n" + "\n" + "Wraps docker run with the repo mounted read-only and a persistent\n" + "overlay so all writes go to the cache transparently.\n" + "\n" + "Options:\n" + " --cache NAME Cache name (required)\n" + " --overlayfs Use overlayfs (recommended); writes go to Docker\n" + " volume dd-ci-NAME. Use --clean-cache to reset.\n" + " --php VARIANT Call switch-php before running\n" + " (nts, zts, debug, nts-asan, debug-zts-asan)\n" + " --root Stay as root (skip privilege drop to host uid)\n" + " --clean-cache Delete the cache (volume or dir), then continue\n" + " --no-cache-overlay Skip cache mounts (--cache not required)\n" + " --help Show this help\n" + "\n" + "Cache storage:\n" + " --overlayfs Docker volume dd-ci-NAME (inspect with\n" + " docker run --rm -v dd-ci-NAME:/v alpine ls /v/upper)\n" + " legacy (no flag) Host directory ~/.cache/dd-ci/NAME/\n", + file=sys.stderr, + ) + sys.exit(0) + + if known.cache_name is None and (not known.no_cache_overlay or known.clean_cache or known.overlayfs): + sys.exit("error: --cache NAME is required. Run with --help for usage.") + + if known.overlayfs and known.writable_tree: + sys.exit("error: --overlayfs and --writable-tree are mutually exclusive") + + if not remaining: + sys.exit("error: IMAGE is required. Run with --help for usage.") + + image = remaining[0] + docker_opts = remaining[1:] + + # Translate bookworm-style --php values for centos-7 images. + # centos-7 uses version-prefixed variants (8.3, 8.3-debug, 8.3-zts); + # bookworm uses bare names (nts, debug, zts, nts-asan, debug-zts-asan). + if known.php_variant: + centos_match = re.search(r"php-(\d+\.\d+)_centos", image) + if centos_match: + php_ver = centos_match.group(1) + variant = known.php_variant + asan_variants = {"nts-asan", "debug-zts-asan"} + if variant in asan_variants: + sys.exit(f"error: --php {variant} is not available on centos-7 images " + f"(no ASAN builds). Use a bookworm image instead.") + centos_map = {"nts": php_ver, "debug": f"{php_ver}-debug", "zts": f"{php_ver}-zts"} + if variant in centos_map: + mapped = centos_map[variant] + print(f"Mapped --php {variant} to {mapped} for centos-7 image", file=sys.stderr) + known.php_variant = mapped + + root = repo_root() + cache_dir = os.path.expanduser(f"~/.cache/dd-ci/{known.cache_name}") if known.cache_name else None + + if known.clean_cache and known.cache_name: + if known.overlayfs: + vol_name = f"dd-ci-{known.cache_name}" + result = subprocess.run( + ["docker", "volume", "rm", "-f", vol_name], + capture_output=True, text=True, + ) + if result.returncode == 0 and vol_name in result.stdout: + print(f"Removed volume {vol_name}", file=sys.stderr) + else: + print(f"Volume {vol_name} does not exist", file=sys.stderr) + elif cache_dir: + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + print(f"Removed {cache_dir}", file=sys.stderr) + else: + print(f"Cache dir does not exist: {cache_dir}", file=sys.stderr) + + uid = os.getuid() + gid = os.getgid() + + docker_cmd = ["docker", "run", "--rm"] + + if sys.stdin.isatty(): + docker_cmd.append("-it") + + # Always start as root so we can register the host uid/gid in /etc/passwd, + # /etc/group, and /etc/sudoers, then drop to the host uid via setpriv. + # This means bind-mounted files and cache overlay dirs (owned by the host + # user) are writable, files written appear as the host user on the host + # filesystem, and scripts that call sudo (e.g. switch-php) always work. + orig_ep = _image_entrypoint(image) + tmpdir = tempfile.mkdtemp(prefix="dockerh-") + entrypoint_path = os.path.join(tmpdir, "init.sh") + # Use our static setpriv helper so this works on every base image + # (Alpine/BusyBox, slim Debian, scratch, etc.). + if known.root: + # --root: stay as root, no privilege drop. + if orig_ep: + drop_and_exec = "exec " + " ".join(_shell_quote(a) for a in orig_ep) + ' "$@"' + else: + drop_and_exec = 'exec "$@"' + elif orig_ep: + drop_and_exec = ( + f"exec /dockerh-setpriv --reuid={uid} --regid={gid} --clear-groups " + + " ".join(_shell_quote(a) for a in orig_ep) + ' "$@"' + ) + else: + drop_and_exec = f'exec /dockerh-setpriv --reuid={uid} --regid={gid} --clear-groups "$@"' + + # Redirect BASH_ENV to a writable location. Some CI images (e.g. + # centos-7) bake BASH_ENV=/etc/profile.d/zzz-ddtrace.sh which is + # owned by root. Build scripts append to $BASH_ENV, so it must be + # writable after setpriv drops to the host uid. We copy the + # original content (e.g. devtoolset-7 activation) to preserve it. + bash_env_lines = ( + f'cp "${{BASH_ENV:-/dev/null}}" /tmp/.bash_env 2>/dev/null || true\n' + f"export BASH_ENV=/tmp/.bash_env\n" + f"chown {uid}:{gid} \"$BASH_ENV\"\n" + ) + + overlayfs_lines = "" + if known.overlayfs: + # The mount helper drops to the target uid/gid but retains the + # capabilities needed for overlayfs setup (SYS_ADMIN, DAC_OVERRIDE, + # FOWNER, CHOWN). This way overlayfs sees the lower-dir files with + # the virtiofs uid mapping for the target user, producing correct + # ownership in the merged view. After mount, capabilities are + # dropped by the setpriv call in drop_and_exec. + overlayfs_lines = ( + f"mkdir -p {CONTAINER_CACHE}/upper {CONTAINER_CACHE}/work {CONTAINER_PROJECT}\n" + f"chown {uid}:{gid} {CONTAINER_CACHE}/upper\n" + f"chmod 777 {CONTAINER_CACHE}/work\n" + f"/dockerh-ovl-mount {uid} {gid}" + f" {CONTAINER_LOWER} {CONTAINER_CACHE}/upper {CONTAINER_CACHE}/work" + f" {CONTAINER_PROJECT}\n" + f"cd {CONTAINER_PROJECT}\n" + ) + + switch_php_lines = "" + if known.php_variant: + switch_php_lines = f"""switch-php {known.php_variant} +# Transfer ownership of the PHP extension/ini dirs to localuser so that make +# install (which runs plain cp/tee, no sudo) can write there as uid {uid}. +chown localuser "$(php-config --extension-dir)" "$(php -r 'echo PHP_CONFIG_FILE_SCAN_DIR;')" || true +# Allow nginx to create temp dirs under /var/lib/nginx/ when running as localuser. +chown localuser /var/lib/nginx/ 2>/dev/null || true +""" + + with open(entrypoint_path, "w") as f: + f.write(f"""#!/bin/sh +set -e +echo 'localuser:x:{uid}:{gid}::/tmp:/bin/sh' >> /etc/passwd +echo 'localgroup:x:{gid}:localuser' >> /etc/group +[ -f /etc/shadow ] && echo 'localuser:*:19000:0:99999:7:::' >> /etc/shadow || true +echo 'localuser ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers +{bash_env_lines}{overlayfs_lines}{switch_php_lines}{drop_and_exec} +""") + os.chmod(entrypoint_path, 0o755) + docker_cmd += ["--user", "root"] + docker_cmd += ["-e", "HOME=/tmp"] + docker_cmd += ["-v", f"{entrypoint_path}:/dockerh-init.sh:ro"] + docker_cmd += ["--entrypoint", "/dockerh-init.sh"] + + # Always mount the static setpriv helper so privilege dropping works + # on every base image (Alpine/BusyBox, slim Debian, scratch, etc.). + setpriv_helper = _get_setpriv_helper() + docker_cmd += ["-v", f"{setpriv_helper}:/dockerh-setpriv:ro"] + + if known.overlayfs: + mount_helper = _get_ovl_mount_helper() + docker_cmd += ["-v", f"{mount_helper}:/dockerh-ovl-mount:ro"] + + if known.overlayfs: + # overlayfs mode: mount repo read-only as the lower dir, use a Docker + # named volume for the overlay upper/work dirs (native VM filesystem + # supports overlayfs properly, unlike virtiofs bind mounts). + vol_name = f"dd-ci-{known.cache_name}" + docker_cmd += ["--cap-add", "SYS_ADMIN"] + docker_cmd += ["--security-opt", "apparmor=unconfined"] + docker_cmd += ["-v", f"{root}:{CONTAINER_LOWER}:ro"] + docker_cmd += ["-v", f"{vol_name}:{CONTAINER_CACHE}"] + else: + # Legacy mode: per-directory bind mount overlays. + # Checkout mount (read-only by default; read-write with --writable-tree) + tree_mode = "" if known.writable_tree else ":ro" + docker_cmd += ["-v", f"{root}:{CONTAINER_PROJECT}{tree_mode}"] + + # Cache overlay mounts + if not known.no_cache_overlay: + for repo_subdir in PROJECT_OVERLAYS: + # Ensure the mountpoint dir exists in the checkout so Docker + # can layer the writable bind mount on top of the :ro base. + os.makedirs(os.path.join(root, repo_subdir), exist_ok=True) + host_path = os.path.join(cache_dir, repo_subdir) # type: ignore[arg-type] + os.makedirs(host_path, exist_ok=True) + docker_cmd += ["-v", f"{host_path}:{CONTAINER_PROJECT}/{repo_subdir}"] + + docker_cmd += ["-w", CONTAINER_PROJECT] + + # Pass through any extra docker options + docker_cmd += docker_opts + + docker_cmd.append(image) + + if cmd_args is not None: + docker_cmd += cmd_args + # If no --, remaining args after image were consumed as docker_opts; + # no command means docker default entrypoint runs. + + print("+ " + " ".join(_shell_quote(a) for a in docker_cmd), file=sys.stderr) + os.execvp("docker", docker_cmd) + + +def _image_entrypoint(image: str) -> list[str]: + try: + result = subprocess.run( + ["docker", "inspect", "--format", "{{json .Config.Entrypoint}}", image], + capture_output=True, text=True, check=True, + ) + val = json.loads(result.stdout.strip()) + return val if val else [] + except (subprocess.CalledProcessError, FileNotFoundError, json.JSONDecodeError): + return [] + + +def _shell_quote(s: str) -> str: + if s and not re.search(r"[^\w@%+=:,./-]", s): + return s + return "'" + s.replace("'", "'\\''") + "'" + + +if __name__ == "__main__": + main() diff --git a/.claude/ci/github-actions-other.md b/.claude/ci/github-actions-other.md new file mode 100644 index 00000000000..8c0f9dfeb58 --- /dev/null +++ b/.claude/ci/github-actions-other.md @@ -0,0 +1,71 @@ +# GitHub Actions — PR Automation and Release Tooling + +## CI Jobs + +**Source:** +- `.github/workflows/auto_check_snapshots.yml` — snapshot change summary +- `.github/workflows/auto_label_prs.yml` — PR auto-labeling +- `.github/workflows/auto_add_pr_to_miletone.yml` — milestone automation +- `.github/workflows/add-asset-to-gh-release.yml` — release asset upload +- `.github/workflows/update_latest_versions.yml` — version update automation +- `github-actions-helpers/build.sh` — .NET build script used by snapshot/label/milestone workflows + +Profiler tests (`prof_asan`, `prof_correctness`) are documented in +[github-actions-profiler.md](github-actions-profiler.md). + +| CI Job | Runner | Trigger | What it does | +|--------|--------|---------|--------------| +| `Check snapshots / check-snapshots` | `ubuntu-24.04` | `pull_request` | Summarizes snapshot changes in a PR comment | +| `Label PRs / add-labels` | `ubuntu-24.04` | `pull_request` | Auto-assigns labels to PRs based on changed files | +| `Auto add PR to vNext milestone / add_to_milestone` | `ubuntu-24.04` | `pull_request` (closed+merged to master/main) | Assigns merged PRs to the vNext milestone | +| `Add assets to release / add-assets-to-release` | `ubuntu-8-core-latest` | `workflow_dispatch` | Downloads `packages.tar.gz` and uploads contents to a GitHub release | +| `Update Latest Versions / update-latest-versions` | `ubuntu-24.04` | `schedule` (Mon 06:30 UTC) or `workflow_dispatch` | Runs `tests/PackageUpdater.php` and opens a PR updating pinned test dependency versions | + +## What They Do + +### auto_check_snapshots + +Uses .NET (`dotnet-7.0.101`) to build and run `SummaryOfSnapshotChanges` from +`github-actions-helpers/`. Posts a PR comment summarizing any snapshot file +changes (requires `fetch-depth: 0` for diff against base branch). + +### auto_label_prs + +Uses .NET to run `AssignLabelsToPullRequest`. Labels are applied based on which +files were changed. + +### auto_add_pr_to_miletone + +Runs only when a PR is merged into `master`/`main` and the title does not start +with `[Version Bump]`. Uses .NET to run `AssignPullRequestToMilestone`, which +assigns the PR to the next open milestone. + +### add-asset-to-gh-release + +Manual-only (`workflow_dispatch`). Takes two inputs: a URL to `packages.tar.gz` +and a release version string. Downloads the tarball, extracts it, and uploads +all files under `build/packages/` to the specified GitHub release using `gh +release upload --clobber`. + +### update_latest_versions + +Weekly cron (Monday 06:30 UTC) or manual trigger. Installs the latest +dd-trace-php release, runs `make composer_tests_update` and `php +tests/PackageUpdater.php`, then opens a PR via +`peter-evans/create-pull-request` on a branch `update-latest-versions`. + +## Local Reproduction + +### Automation workflows (snapshots, labels, milestones, version updates) + +These workflows depend on GitHub API tokens, .NET tooling, and the PR event +context. They are not practical to reproduce locally. If a failure occurs, +inspect the workflow run log on GitHub Actions directly. + +## Gotchas + +- The .NET-based automation workflows (`auto_check_snapshots`, `auto_label_prs`, + `auto_add_pr_to_miletone`) all use dotnet 7.0.101 and share the + `github-actions-helpers/build.sh` entry point with different task names. +- `auto_add_pr_to_miletone.yml` has a typo in its filename (missing an "s" in + "milestone") — this is intentional/historical; do not rename it. diff --git a/.claude/ci/github-actions-profiler.md b/.claude/ci/github-actions-profiler.md new file mode 100644 index 00000000000..d362ef36646 --- /dev/null +++ b/.claude/ci/github-actions-profiler.md @@ -0,0 +1,277 @@ +# Profiler Tests (GitHub Actions) + +## CI Jobs + +**Source:** +- `.github/workflows/prof_correctness.yml` — correctness job definition, + matrix, build and run steps +- `Datadog/prof-correctness/analyze@main` — external action that + decompresses pprof output and checks it against JSON expectations +- `.github/workflows/prof_asan.yml` — ASAN job definition and matrix + +| CI Job | Runner | What it does | +|--------|--------|-------------| +| `Profiling correctness / prof-correctness ({ver}, nts)` | `ubuntu-24.04` | Builds profiler + runs NTS correctness test cases | +| `Profiling correctness / prof-correctness ({ver}, zts)` | `ubuntu-24.04` | Same + `exceptions_zts` (requires `parallel` PECL extension) | +| `Profiling ASAN Tests / prof-asan ({ver}, {arch})` | `arm-8core-linux` / `ubuntu-8-core-latest` | Builds profiler with ASAN + runs `.phpt` profiling tests | + +Correctness matrix: PHP 8.0+ × {nts, zts}. +ASAN matrix: PHP 8.3+ × {arm64, amd64}. + +## What It Tests + +Each job builds the profiler extension with `--features=trigger_time_sample`, then runs +PHP scripts that exercise profiling (allocations, wall/cpu time, exceptions, IO, timeline, +strange frames). The scripts output pprof files (zstd-compressed protobuf). The +`Datadog/prof-correctness/analyze` GitHub Action then checks each pprof against a JSON +expectations file. + +Test cases (NTS): `allocations`, `time`, `strange_frames`, `timeline`, `exceptions`, `io`, +`allocation_time_combined`, plus `allocations` re-run with 1-byte sampling distance (with +and without `USE_ZEND_ALLOC=0`). + +ZTS adds: `exceptions_zts`. + +## Local Reproduction + +Use `.claude/ci/dockerh` with the `datadog/dd-trace-ci:php-_bookworm-{N}` image +matching the PHP version under test (see `index.md` for image version and contents). The CI +uses clang-19 on ubuntu-24.04; clang-17 in the image works fine. + +Actions jobs use `shivammathur/setup-php` instead, but the same `dd-trace-ci` +image is a suitable local substitute. + +**Image naming:** use `php-8.1_bookworm-N` for PHP 8.1 tests, `php-8.3_bookworm-N` for +8.3, etc. — the image is tagged by PHP version, so the version in the tag must match the +PHP version being tested. + +**Cache naming:** use a separate `--cache` name per `(php-version, phpts)` pair (e.g. +`profiler-8.1-zts`) to avoid mixing NTS and ZTS build artifacts. + +### Build the profiler extension + +`cargo rustc` must be run from the `profiling/` subdirectory (the workspace `profiler-release` +profile is defined in the repo root `Cargo.toml`, but the crate itself lives in `profiling/`). + +**`CARGO_TARGET_DIR` must be set explicitly** to `/project/dd-trace-php/target`. Without +it the `cbindgen` build script inside `libdatadog` calls `cargo locate-project --workspace`, +which resolves to the `libdatadog/` submodule's own workspace (not the repo root), and +tries to create `libdatadog/target/include/datadog/library-config.h`. That path is inside +the read-only source mount with no writable overlay, causing a +`ReadOnlyFilesystem (os error 30)` panic. Pointing `CARGO_TARGET_DIR` at the already- +overlaid `/project/dd-trace-php/target` fixes it. + +```bash +# NTS example (PHP 8.3) +dockerh --cache profiler-8.3-nts --php nts datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export CARGO_TARGET_DIR=/project/dd-trace-php/target +cd profiling && cargo rustc --features=trigger_time_sample --profile profiler-release --crate-type=cdylib +' + +# ZTS example (PHP 8.1) — note --php zts, matching image version, and separate cache name +dockerh --cache profiler-8.1-zts --php zts datadog/dd-trace-ci:php-8.1_bookworm-6 -- bash -c ' +export CARGO_TARGET_DIR=/project/dd-trace-php/target +cd profiling && cargo rustc --features=trigger_time_sample --profile profiler-release --crate-type=cdylib +' +``` + +Output: `/project/dd-trace-php/target/profiler-release/libdatadog_php_profiling.so` +(persisted in the host cache at `~/.cache/dd-ci//target/`). + +The second run reuses the build cache and completes in seconds. Never run `--clean-cache` +between iterations — the Rust build takes 5–15 minutes from scratch. + +### Run a single test case + +The `tmp/` directory is already a writable `dockerh` cache overlay, so +write pprof output there — no extra mounts needed: + +```bash +dockerh --cache profiler-8.3-nts --php nts \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export CARGO_TARGET_DIR=/project/dd-trace-php/target +export DD_PROFILING_LOG_LEVEL=warn # use "trace" only when debugging — trace is verbose and slows execution +export DD_PROFILING_EXPERIMENTAL_FEATURES_ENABLED=1 +export DD_PROFILING_EXPERIMENTAL_EXCEPTION_SAMPLING_DISTANCE=1 +export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1 +export EXECUTION_TIME=3 # default is 10s; 3s is enough for local testing; applies to ALL time-based tests + +TEST_CASE=allocations +OUT=/project/dd-trace-php/tmp/correctness/$TEST_CASE +mkdir -p $OUT +DD_PROFILING_OUTPUT_PPROF=$OUT/test.pprof \ + php -d extension=/project/dd-trace-php/target/profiler-release/libdatadog_php_profiling.so \ + /project/dd-trace-php/profiling/tests/correctness/$TEST_CASE.php +ls -la $OUT/ +' +``` + +The output file is `test.pprof.1.zst` (zstd-compressed pprof protobuf). + +For `strange_frames`, the test is instant (no loop) and does not need `EXECUTION_TIME`. + +**Speed tip:** when investigating a single failing test case, run only that script and +inspect with `go tool pprof -top` (see below) rather than running the full suite. + +### Run the "no profile" check + +CI also verifies that with `DD_PROFILING_ENABLED=Off` no pprof file is produced: + +```bash +export DD_PROFILING_ENABLED=Off +# ... run the same php command ... +# Verify test.pprof.1.zst does NOT exist +``` + +**Note:** the CI script checks for the `.lz4` extension (an older format), but the current +profiler outputs `.zst`. This means the CI "no profile" check always passes regardless of +whether a `.zst` file is produced. Locally, check for `.zst` if you want a meaningful +verification. + +### Inspecting pprof output + +The pprof files are zstd-compressed protobuf. Use `go tool pprof` (available in the +dd-trace-ci image) to inspect them. Pass `--user root` so `apt-get install` works: + +```bash +dockerh --cache profiler-8.3-nts --php nts datadog/dd-trace-ci:php-7.3_bookworm-6 --user root -- bash -c ' +apt-get update -qq > /dev/null 2>&1 && apt-get install -y -qq zstd > /dev/null 2>&1 + +PPROF_DIR=/project/dd-trace-php/tmp/correctness/allocations +zstd -d $PPROF_DIR/test.pprof.1.zst -o $PPROF_DIR/test.pprof.1 + +# Top functions by alloc-size +go tool pprof -top -sample_index=alloc-size $PPROF_DIR/test.pprof.1 + +# Full stack traces with labels +go tool pprof -traces -sample_index=alloc-size $PPROF_DIR/test.pprof.1 +' +``` + +Available `-sample_index` values (matching the pprof value types): +`sample`, `wall-time`, `cpu-time`, `alloc-samples`, `alloc-size`, `timeline`, +`exception-samples`, `file-io-read-size`, `file-io-write-size`, +`socket-read-size`, `socket-write-size`, and their `-time` / `-samples` variants. + +### Understanding the JSON expectations + +Each `profiling/tests/correctness/.json` defines expected stack distributions. +Structure: + +```json +{ + "scale_by_duration": true, + "test_name": "php_allocations", + "stacks": [ + { + "profile-type": "alloc-size", + "stack-content": [ + { + "regular_expression": "` and check that +the cumulative percentages of the listed functions match the JSON expectations. + +## `trigger_time_sample` Feature + +This cargo feature (not for production) exposes a PHP function +`Datadog\Profiling\trigger_time_sample()` that forces an immediate time sample capture. +Used by `strange_frames.php` to get a deterministic single-sample profile for testing +frame name formatting. The implementation is in `profiling/src/capi.rs` and +`profiling/src/php_ffi.c`. + +## Debug Build + +For a debug (unoptimized) build: + +```bash +cargo rustc --features=trigger_time_sample --profile dev --crate-type=cdylib +``` + +Output: `target/debug/libdatadog_php_profiling.so` (~144 MB vs ~20 MB for profiler-release). +Use the same `php -d extension=...` command, just point to the debug path. + +## ZTS tests -- parallel PECL extension + +The `exceptions_zts.php` test uses the `parallel` PECL extension. The dd-trace-ci bookworm +images already include it for PHP 8+ ZTS builds (installed by `build-extensions.sh`), so no +extra setup is needed when reproducing locally with `dockerh`. + +CI, on the other hand, runs on a bare `ubuntu-24.04` runner and installs PHP via +`shivammathur/setup-php`, which does not include `parallel` by default. The workflow +installs version `v1.2.7` from GitHub via the `extensions` matrix parameter +(`parallel-krakjoe/parallel@v1.2.7`). + +## ASAN Build + +Builds the profiler with AddressSanitizer using a pinned nightly Rust toolchain +and clang-17, then runs the `.phpt` test suite with `--asan`. + +### Local reproduction + +```bash +dockerh --cache profiler-asan-8.3-nts --php nts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 --user root --privileged -- bash -c ' +export CARGO_TARGET_DIR=/project/dd-trace-php/target +export CC=clang-17 +export CFLAGS="-fsanitize=address -fno-omit-frame-pointer" +export LDFLAGS="-fsanitize=address -shared-libasan" +export RUSTC_LINKER=lld-17 +RUST_TOOLCHAIN=nightly-2025-06-13 + +cd profiling +triplet=$(uname -m)-unknown-linux-gnu +RUSTFLAGS="-Zsanitizer=address" cargo +${RUST_TOOLCHAIN} build -Zbuild-std=std,panic_abort \ + --target $triplet --profile profiler-release +cp -v "$CARGO_TARGET_DIR/$triplet/profiler-release/libdatadog_php_profiling.so" \ + "$(php-config --extension-dir)/datadog-profiling.so" + +# run-tests.php writes temp files next to .phpt files, so both must be in a writable dir. +# Use the tmp/ overlay which dockerh mounts writable over the read-only checkout. +PHPT_RUN=/project/dd-trace-php/tmp/phpt-run +rm -rf "$PHPT_RUN" && mkdir -p "$PHPT_RUN" +cp $(php-config --prefix)/lib/php/build/run-tests.php "$PHPT_RUN/" +cp -r /project/dd-trace-php/profiling/tests/phpt "$PHPT_RUN/" +cd "$PHPT_RUN" +DD_PROFILING_OUTPUT_PPROF=/tmp/pprof \ + php run-tests.php -j$(nproc) --show-diff --asan -d extension=datadog-profiling.so phpt +' +``` + +Requires `--user root --privileged` — ASAN needs both. + +The nightly toolchain version (`nightly-2025-06-13`) is pinned in +`.github/workflows/prof_asan.yml`, not in `profiling/rust-toolchain.toml`. Check +the workflow file for the current pinned version. + +## Gotchas + +- **Expected ASAN test counts:** 39 total, ~27 pass, ~12 skip (30%), 0 fail. The skips are normal + (platform/env conditions). A non-zero fail count indicates a real problem. +- The `profiler-release` profile is defined in the workspace root `Cargo.toml`, not in + `profiling/Cargo.toml`. It inherits from `release` with `panic = "abort"`. +- `dockerh` runs the container as your host UID so cache dirs are writable without any + permission tricks. Pass `--user root` after the image name if you need to install + packages with `apt-get`. +- CI checks for `.lz4` extension in the "no profile" test, but the current profiler + outputs `.zst` (zstandard). Both are valid pprof compression formats. diff --git a/.claude/ci/index.md b/.claude/ci/index.md new file mode 100644 index 00000000000..ce5a741985c --- /dev/null +++ b/.claude/ci/index.md @@ -0,0 +1,540 @@ +# CI Job Groups — Local Reproduction Guide + +This directory documents how to reproduce CI failures locally for each class of CI job. +Each file covers one group of jobs that share the same runner type, Docker image, and +execution model. Where possible, it also covers how to narrow the test run and substitute +debug binaries. + +## `dockerh` — Docker helper + +Most groups use `.claude/ci/dockerh` to run containers. It wraps +`docker run` with: + +- Repo checkout mounted **read-only** at `/project/dd-trace-php` +- With `--overlayfs` (recommended): a Linux overlayfs merges the + read-only checkout (lower) with a Docker named volume + `dd-ci-` (upper). All writes go to the volume transparently. +- With `--php`: starts as root, sets up the host uid in + `/etc/passwd`/sudoers, calls `switch-php`, then drops to host uid + via `setpriv` — the user's command runs as the host user with + full sudo access +- `-e HOME=/tmp` so tools that write to `~` don't fail + +Two small static binaries are bind-mounted into the container: +- `ovl_mount` — performs the overlayfs mount after dropping to the + host uid (retaining only the capabilities overlayfs needs) +- `setpriv` — drops privileges from root to the host uid/gid + (replaces the system `setpriv`, which is missing or incompatible + on Alpine/BusyBox) + +Both are compiled once as static musl binaries and cached at +`~/.cache/dd-ci/`. + +``` +Usage: dockerh --cache NAME --overlayfs [OPTIONS] IMAGE [DOCKER_OPTIONS...] -- COMMAND [ARGS...] + + --cache NAME Cache name (required) + --overlayfs Use overlayfs; writes go to Docker volume dd-ci-NAME + --php VARIANT Call switch-php before running + (nts, zts, debug, nts-asan, debug-zts-asan) + --root Stay as root (skip privilege drop to host uid) + --clean-cache Delete the cache volume, then continue + --no-cache-overlay Skip cache mounts (--cache not required) + --help Show this help +``` + +Pass extra Docker options between `IMAGE` and `--`. + +Use `--root` for jobs that need to run as root (e.g., `apt-get +install`, writing to `/opt`). Without `--root`, dockerh drops to +the host uid/gid and makes `sudo` available (NOPASSWD). + +**Cache storage:** with `--overlayfs`, writes go to Docker volume +`dd-ci-`. To inspect: `docker run --rm -v dd-ci-NAME:/v +alpine ls /v/upper`. Use `--clean-cache` to reset. Without +`--overlayfs` (legacy mode), cache lives at `~/.cache/dd-ci//` +as bind-mounted directories. + +To extract files from the overlay to the host (e.g., to commit +generated output), copy from the volume: +```bash +docker run --rm -v dd-ci-NAME:/v -v "$PWD:/out" alpine \ + cp /v/upper/path/to/file /out/ +``` + +### Troubleshooting overlayfs + +**Stale root-owned files in the host checkout.** The host checkout +is the overlayfs lower layer. Root-owned files left by direct +`docker run` invocations or builds outside dockerh (e.g. in `tmp/`, +`extensions_*/`, `appsec_*/`) are visible through the overlay. When +overlayfs copies them up, it preserves root ownership — the +non-root container user then gets `Permission denied`. Clean them: +```bash +# If root-owned files exist in the checkout: +docker run --rm -v "$PWD:/w" alpine \ + sh -c 'find /w -maxdepth 3 -user root \ + -not -path "/w/.git/*" -exec chown '"$(id -u):$(id -g)"' {} +' +``` + +**Stale overlay cache from a different PHP version.** When reusing +a `--cache` name after switching PHP versions or images, the cached +`Makefile` / `config.status` in `tmp/build_extension/` references +the old PHP include paths. Builds fail silently. Use `--clean-cache` +to reset the overlay volume when switching versions. + +### PHP variant selection (`--php`) + +`datadog/dd-trace-ci` images ship multiple PHP builds under `/opt/php/` and a +`switch-php` command that symlinks the active build into `/usr/local/bin/`. +**The default `php` on `$PATH` is the debug build**, which rarely is the correct +default. Always pass `--php nts` (or another variant) when building or running +PHP code. Use `--php debug` when you need debug symbols for gdb or extra +runtime assertions to diagnose failures. + +`--php VARIANT` starts the container as root, registers the host uid/gid as +`localuser` in `/etc/passwd` and `/etc/sudoers`, calls `switch-php VARIANT`, +then drops to the host uid via `setpriv`. Your command runs as the host user +with passwordless sudo and `php` pointing to the selected build. + +**NTS vs ZTS matters for compiled extensions:** building a PHP extension with +NTS headers and loading it into a ZTS PHP (or vice versa) will crash. Always +pass `--php zts` when building or running extensions for ZTS PHP, and use a +separate `--cache` name per `(php-version, phpts, architecture)` tuple to avoid +mixing build artifacts. Conversely, jobs that share the same tuple (e.g. +unit tests and web tests both using PHP 8.3 debug on amd64) **can** share a +`--cache` name to reuse compiled artifacts and skip redundant builds. + +## Image versions + +CI images are tagged `datadog/dd-trace-ci:php-{version}_bookworm-{N}` where `N` +is an iteration number shared across all GitLab appsec jobs. Find the current +value by searching for `bookworm-` in `.gitlab/generate-appsec.php` +. +The `php-8.3_bookworm-{N}` image contains: Rust (see +`profiling/rust-toolchain.toml` for the pinned version), clang-17, Go, and +multiple PHP builds under `/opt/php/` (nts, zts, debug, etc.). Use `--php nts` +(or another variant) with `dockerh` to select the right build — see the `--php` +section above. + +Images referenced as `registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:TAG` +in CI scripts are mirrors of `datadog/dd-trace-ci:TAG` on Docker Hub. Pull them +directly without authentication — no registry login or image export/import needed: + +```bash +docker pull datadog/dd-trace-ci:php-8.3_bookworm-6 +``` + +(The exception is registry.ddbuild.io/images/mirror/b1o7r7e0/nginx_musl_toolchain, +which does not exist anymore in its original location) + +**Other image naming patterns:** + +- **centos-7 compile images:** `php-{ver}_centos-7` (e.g. + `php-8.3_centos-7`). Used by package pipeline compile jobs for + `x86_64-unknown-linux-gnu` / `aarch64-unknown-linux-gnu` to target + GLIBC 2.17 for maximum compatibility. +- **Alpine compile images:** `php-compile-extension-alpine-{ver}` (e.g. + `php-compile-extension-alpine-8.3`). No bookworm/centos suffix. +- **Appsec helper rust image:** `dd-appsec-php-ci:nginx-fpm-php-8.5-release-musl` + (on Docker Hub, unlike the defunct C++ helper image). + +**`switch-php` variant naming differs between images.** On centos-7 images, +PHP variants under `/opt/php/` are version-prefixed: `8.3`, `8.3-debug`, +`8.3-zts`. On bookworm images, variants are bare names: `nts`, `debug`, +`zts`, `nts-asan`, `debug-zts-asan`. Build scripts that call +`switch-php "${PHP_VERSION}"` (e.g. `build-tracing.sh`) work on centos but +fail on bookworm. Scripts that use bare names (e.g. `compile_extension.sh` +with `switch-php debug`) work on bookworm but not centos. + +## Pipeline overview + +The main `.gitlab-ci.yml` generates four child pipelines via PHP scripts: + +| Pipeline | Generator | Child pipeline | +|---|---|---| +| appsec | `.gitlab/generate-appsec.php` | appsec-trigger | +| tracer | `.gitlab/generate-tracer.php` | tracer-trigger | +| profiler | `.gitlab/generate-profiler.php` | profiler-trigger | +| package/release | `.gitlab/generate-package.php` | package-trigger | +| shared | `.gitlab/generate-shared.php` | shared-trigger | + +Additionally, a small number of jobs run on **GitHub Actions** (`.github/workflows/`), +not GitLab. + +## GitLab access + +The repo is mirrored from GitHub to GitLab at +`DataDog/apm-reliability/dd-trace-php` (project ID **355**) via gitsync. +All CI pipelines run on the GitLab mirror. + +### API token + +The environment variable `GITLAB_PERSONAL_ACCESS_TOKEN` should already be set. + +### Reading job logs + +```bash +curl -s -H "PRIVATE-TOKEN: $GITLAB_PERSONAL_ACCESS_TOKEN" \ + "https://gitlab.ddbuild.io/api/v4/projects/355/jobs//trace" +``` + +### Checking CI (Gitlab) + +Use `.claude/ci/check-ci` to follow a pipeline until all jobs complete. + +Results are written to `/tmp/gitlab_/`: +- `success.txt` — `\t` per line +- `failure.txt` — same format for failed jobs +- `fail_logs/.log` — full job trace for each failure + +Exit codes: 0 = all passed, 1 = failures or threshold reached. + +#### Invocation pattern + +Available options: `--commit ` OR `--pipeline `, +`--discovery-timeout ` (default 60), `--poll-interval ` (default 60), +`--max-failures ` (default 50), `--timeout ` (default 7200 = 2 h), +`--list-jobs` (see below). + +##### `--list-jobs` + +Prints all jobs grouped by pipeline with their status, then exits +immediately — does not monitor or download logs. Useful for a quick +snapshot of what ran and what failed: + +```bash +.claude/ci/check-ci --commit HEAD --list-jobs +``` + +Output format: + +``` +Pipeline 105413994 (status: failed): + failed test_extension_ci: [7.2] + success compile extension: debug [8.3] + ... +``` + +#### Monitor CI + +If --list-jobs is not passed, check-ci will run until all monitored pipelines +finish, until a timeout, or until the maximum number of failures is reached. + +**Step 1 — Start check-ci in the background (Bash tool, +`run_in_background: true`):** + +```bash +PYTHONUNBUFFERED=1 .claude/ci/check-ci [OPTIONS] +``` + +Do NOT add `&` or `mktemp` — run the command directly and let +`run_in_background: true` handle backgrounding. `PYTHONUNBUFFERED=1` +is required so Python flushes stdout into the task output file. +The Bash tool returns immediately with a line like: +``` +Output is being written to: /path/to/tasks/.output +``` +Note that path — it is the output file for the next step. + +**Step 2 — Run ci-watch in the background (Bash tool, +`run_in_background: true`):** + +```bash +.claude/ci/ci-watch [--start-offset N] OUTPUT_FILE +``` + +`ci-watch` tails the output file and exits when there is something to +act on. Run it with `run_in_background: true` — you will be notified +when it completes. While it runs, you can do other work. + +Exit codes: +- 0 — all pipelines completed (no failures) +- 1 — one or more FAILED: lines detected +- 2 — stale: no new output for 5 minutes +- 3 — check-ci timed out + +On exit, ci-watch always prints `RESUME_OFFSET: `. Record this +value — pass it as `--start-offset N` when re-running ci-watch to +skip already-processed content and wait for further failures. + +When ci-watch completes, immediately call the `speak_when_done` MCP tool: +- "All CI jobs passed" if exit 0. +- " CI jobs failed" if exit 1 (count is + `grep "^FAILED:" OUTPUT_FILE | wc -l`). +- "CI monitor timed out" if exit 2 or 3. + +**Step 3 — Act on the result** + +Choose mong these actions, as appropriate: + +- **Just report:** summarise the result to the user and stop. +- **Investigate failures:** read `fail_logs/.log` under the + output directory for each failed job and diagnose the root cause. +- **Wait for more failures:** if check-ci is still running and you want + to keep watching after investigating, re-run ci-watch with + `--start-offset ` (back to Step 2). +- **Kill check-ci:** if you want to stop monitoring entirely, kill it + by its task ID or PID (noted from Step 1). +- **Push fixes**: if a) the user asked you to (NOT OTHERWISE), AND b) + you have made changes to fix the CI failures AND c) the current + branch has an upstream branch, then commit and push. Then go back to + Step 1. If any of the three preconditions don't match, stop and + report the results (and your findings, if any). + +### Downloading artifacts + +Use `tooling/bin/download-artifacts` to download CI artifacts from GitLab jobs. + +**Modes:** +- `--preset KEY` — download a well-known artifact by key (e.g., `ssi-amd64`, + `extension-amd64-gnu`, `datadog-setup`). Use `--list-presets` to see all. +- `--job-name NAME` — download artifacts from a job matched by name (substring). +- `--job-id ID` — download artifacts directly by job ID (no pipeline needed). +- `--list-presets` — show available preset keys. + +**Pipeline source** (for `--preset` and `--job-name`): +- `--pipeline ID` — use a specific pipeline. +- `--commit REF` — resolve a git ref and find its pipeline (default: HEAD). + +```bash +# Download the SSI loader for amd64 from HEAD's pipeline +tooling/bin/download-artifacts --preset ssi-amd64 -o /tmp/artifacts + +# Download artifacts from a specific job by name +tooling/bin/download-artifacts --job-name "compile extension: debug [8.3]" --pipeline 12345 + +# Download directly by job ID +tooling/bin/download-artifacts --job-id 98765 -o /tmp/artifacts +``` + +--- + +## Building artifacts locally + +→ **[building-locally.md](building-locally.md)** +Consolidated reference for building each artifact locally (tracer +extension, appsec, profiler, sidecar, loader, release packages). +Covers common gotchas (CARGO_HOME, submodules, devtoolset-7, +`make` vs `make static`). Individual job group docs cross-reference +this file instead of duplicating build commands. + +## Job groups + +### Group A — Native Linux unit / extension / helper tests + +Runner: `arch:amd64` + `arch:arm64` +Image: `datadog/dd-trace-ci:php-{version}_bookworm-6` or `datadog/dd-trace-ci:bookworm-6` +No Docker daemon — tests run directly in the container. + +→ **[appsec-native-tests.md](appsec-native-tests.md)** +Covers: `test appsec extension`, `test appsec helper asan`, `appsec lint`, `appsec code coverage` + +→ **[shared-zai-tea-tests.md](shared-zai-tea-tests.md)** +Covers: `Build & Test Tea`, `Extension Tea Tests`, `Zend Abstract Interface Tests`, +`ZAI Shared Tests`, `C components ASAN/UBSAN`, `Configuration Consistency` + +→ **[tracer-unit-tests.md](tracer-unit-tests.md)** +Covers: `Unit tests`, `PHP Language Tests`, `test_c`, `ASAN test_c`, `Opcache tests`, +`xDebug tests`, `test_extension_ci`, `test_distributed_tracing`, `test_composer`, +`test_auto_instrumentation`, `test_integration` + +--- + +### Group B — Native Linux web framework tests + +Runner: `arch:amd64` +Image: `datadog/dd-trace-ci:php-{version}_bookworm-6` +GitLab service containers: test-agent, httpbin, request-replayer + +→ **[tracer-web-tests.md](tracer-web-tests.md)** +Covers: `test_web_laravel_*`, `test_web_symfony_*`, `test_web_wordpress_*`, +`test_web_drupal_*`, `test_web_magento_*`, `test_web_slim_*`, `test_web_cakephp_*`, +`test_web_codeigniter_*`, `test_web_lumen_*`, `test_web_nette_*`, +`test_web_laminas_*`, `test_web_yii_*`, `test_web_zend_*`, `test_web_custom`, +`test_metrics` + +--- + +### Group C — Native Linux service integration tests + +Runner: `arch:amd64` +Image: `datadog/dd-trace-ci:php-{version}_bookworm-6` +GitLab service containers: MySQL, Redis, Kafka, Elasticsearch, MongoDB, etc. + +→ **[tracer-integration-tests.md](tracer-integration-tests.md)** +Covers: `test_integrations_amqp*`, `test_integrations_curl`, `test_integrations_elasticsearch*`, +`test_integrations_guzzle*`, `test_integrations_kafka`, `test_integrations_memcach*`, +`test_integrations_mongodb*`, `test_integrations_monolog*`, `test_integrations_mysql*`, +`test_integrations_pdo`, `test_integrations_phpredis*`, `test_integrations_predis*`, +`test_integrations_roadrunner`, `test_integrations_swoole_5`, `test_integrations_openai_latest`, +`test_opentelemetry_*`, `test_opentracing_10`, `test_integrations_deferred_loading`, +`test_integrations_frankenphp`, `test_integrations_googlespanner_latest`, +`test_integrations_laminaslog2`, `test_integrations_pcntl`, `test_integrations_sqlsrv`, +`test_integrations_stripe_latest` + +--- + +### Group D — Native Linux compile / artifact build + +Runner: `arch:amd64` + `arch:arm64` +Image: `datadog/dd-trace-ci:php-{version}_bookworm-6` +Produces `.so` artifacts consumed by Groups B, C, H. + +→ **[compile-artifacts.md](compile-artifacts.md)** +Covers: `compile extension: debug/release/zts/...` (tracer pipeline), +`compile tracing extension / sidecar / loader / asan` (package pipeline), +`compile appsec extension`, `compile appsec helper`, `compile appsec helper rust`, +`compile profiler extension`, `compile extension windows`, `link tracing extension`, +`aggregate tracing extension`, `pecl build`, `prepare code`, `cache cargo deps` + +--- + +### Group E — Docker-in-Docker Gradle integration tests (appsec) + +Runner: `docker-in-docker:amd64` +Image: `docker:24.0.4-gbi-focal` +Script: installs Java → Gradle → Gradle spins up Docker containers (PHP + helper + test-agent) + +→ **[appsec-gradle-integration.md](appsec-gradle-integration.md)** +Covers: `appsec integration tests [test7.0..test8.5-*]`, +`appsec integration tests (ssi) [test8.3-release-ssi]`, +`appsec integration tests (helper-rust) [test7.4, test8.1, test8.3-debug, test8.4-zts, test8.5-musl]`, +`helper-rust build and test`, `helper-rust code coverage`, `helper-rust integration coverage` + +*Note: Basic instructions are also in `appsec/helper-rust/CLAUDE.md`.* + +--- + +### Group F — System tests + +Runner: `docker-in-docker:amd64` +Image: `docker:24.0.4-gbi-focal` +Python-based `datadog/system-tests` framework; lives in `../../../system-tests/` + +→ **[system-tests.md](system-tests.md)** +Covers: `System Tests: [default]`, `System Tests: [parametric]`, +`System Tests: [APPSEC_API_SECURITY*]`, `System Tests: [INTEGRATIONS]`, +`System Tests: [CROSSED_TRACING_LIBRARIES]` + +→ **[system-tests-onboarding.md](system-tests-onboarding.md)** +Covers: `configure_system_tests` and onboarding/SSI scenario groups +(`simple_onboarding`, `lib-injection`, `docker-ssi`, etc.) — requires AWS +credentials; Vagrant path available but limited. + +*Note: Basic instructions are also in `appsec/helper-rust/CLAUDE.md` under "System Tests".* + +--- + +### Group G — Docker-in-Docker package verification + +Runner: `docker-in-docker:amd64` +Image: `docker:24.0.4-gbi-focal` +Distinct from system tests; uses a different test harness. + +→ **[package-dind-verification.md](package-dind-verification.md)** +Covers: `framework test [flow, mongodb-driver, phpredis*, wordpress]` (and `*_no_ddtrace` variants), +`installer tests`, +`randomized tests [amd64, asan/no-asan, 1..5]` + +--- + +### Group H — Native Linux package verification (install / distribution smoke tests) + +Runner: `arch:amd64` + `arch:arm64` +Various distro base images (alpine, debian, centos) +Requires packaging artifacts from Group D / Group I. + +→ **[package-native-verification.md](package-native-verification.md)** +Covers: `verify alpine [*]`, `verify centos [*]`, `verify debian [*]`, +`verify .tar.gz`, `verify no json ext`, `verify windows`, +`Loader test on {amd64,arm64} {libc,alpine} [*]`, +`min install tests`, `pecl tests [*]`, `test early PHP 8.1`, +`x-profiling phpt tests on Alpine [*]` + +--- + +### Group I — Native Linux packaging & OCI publishing + +Runner: `arch:amd64` +Produces release packages and OCI images. Mostly relevant only on the release pipeline. + +→ **[packaging-oci.md](packaging-oci.md)** +Covers: `package extension [*]`, `package loader [*]`, `package extension asan/windows`, +`datadog-setup.php`, `package-oci [*]`, `oci-internal-publish`, +`create-multiarch-lib-injection-image`, `kubernetes-injection-test-ecr-publish`, +`internal-publish-lib-init-tags`, `promote-oci-to-{staging,prod,prod-beta}`, +`bundle for reliability env`, `configure_system_tests`, `publishing-gate`, +`requirements_json_test`, `validate_supported_configurations_v2_local_file`, +`publish to public s3` + +--- + +### Group J — Benchmarks + +Runner: `runner:apm-k8s-tweaked-metal` / `runner:apm-k8s-same-cpu` +Dedicated performance hardware — not easily reproducible locally. + +→ **[benchmarks.md](benchmarks.md)** +Covers: `benchmarks-tracer`, `benchmarks-appsec`, `benchmarks-profiler`, +`macrobenchmarks [7.4]`, `macrobenchmarks [8.1]` + +--- + +### Group K — Windows + +Runner: `windows-v2:2019` + +→ **[windows-tests.md](windows-tests.md)** +Covers: `windows test_c`, `compile extension windows [*]`, `verify windows` + +--- + +### Group L — Docker image push (manual) + +Runner: `docker-in-docker:amd64/arm64` +Manual trigger only; pushes CI Docker images to ECR. + +→ **[docker-image-push.md](docker-image-push.md)** *(not yet written)* +Covers: `push appsec images [amd64/arm64]`, `push appsec docker images multiarch` + + + +--- + +### Group M — Lightweight utility / gate jobs + +Runner: `arch:amd64` | Minimal image, quick scripts — usually not the source of failures. + +Covers: `check libxml2 version`, `aggregate tested versions`, +`check-big-regressions`, `check-slo-breaches`, `notify-slo-breaches`, `finished` + +--- + +### Group N — GitHub Actions workflows + +Runs on GitHub-hosted `ubuntu-24.04` runners, not GitLab. Triggered on `pull_request` +and `schedule`. Completely separate CI system. + +→ **[github-actions-profiler.md](github-actions-profiler.md)** +Covers: `Profiling correctness / prof-correctness [{8.0..8.5}, {nts,zts}]`, +`Profiling ASAN Tests / prof-asan [{8.3..8.5}, {arm64,amd64}]` + +→ **[github-actions-other.md](github-actions-other.md)** +Covers: `auto_check_snapshots`, `auto_label_prs`, `auto_add_pr_to_miletone`, +`add-asset-to-gh-release`, `update_latest_versions` + +→ **[github-actions-other.md](github-actions-other.md)** *(not yet written)* +Covers: `prof_asan`, `auto_check_snapshots`, `auto_label_prs`, +`auto_add_pr_to_miletone`, `add-asset-to-gh-release`, `update_latest_versions` + +## Improving job details + +See [meta-improv-instr.md](meta-improv-instr.md) for how to run the improvement +loop. In any case, should you find out-of-date or otherwise wrong information +in the job details files, or if you find undocumented but surprising details, +suggest to the user improvements to the files. Never apply those changes +without consulting the user. + +The common format of job details file is described in +[meta-job-group-doc.md](meta-job-group-doc.md). diff --git a/.claude/ci/meta-improv-instr.md b/.claude/ci/meta-improv-instr.md new file mode 100644 index 00000000000..1c4f107f783 --- /dev/null +++ b/.claude/ci/meta-improv-instr.md @@ -0,0 +1,106 @@ +# CI Documentation Improvement Loop + +This file describes a repeatable process for keeping CI reproduction docs +accurate and complete by running jobs locally and harvesting the gap between +what the docs say and what actually happens. + +## The Loop + +### Step 1 — Run the job locally via subagent + +Use the `/subagent_log` skill to launch a general-purpose subagent that reads +the CI docs and runs the job. The subagent prompt should be minimal and +self-contained — just name the job (e.g. "Execute locally the 'prof-correctness +(8.1, zts)'; do not give any other context or clues"). The subagent will read +the relevant CI docs itself and figure out what to run. + +The `/subagent_log` skill wraps the Agent tool call, captures timing and token +usage, and returns a link to the HTML log. + +### Step 2 — Ask the subagent what was missing + +After the subagent finishes, resume it (via `SendMessage`) with: + +> What new information about running the tests did you find out that was not in +> `@.claude/ci/.md` (or `index.md`)? Suggest changes to speed up +> further evaluations. + +The subagent has the full context of what it actually ran, what failed, what it had to +work around, and what the docs said — so it can produce a precise diff of reality vs. +documentation. + +### Step 3 — Apply the changes + +Take the subagent's output and update the relevant docs: + +- **Job-specific gaps** → update the job-group `.md` file (e.g. + `github-actions-profiler.md`) +- **Gaps that apply across all job groups** → update `index.md` (e.g. NTS vs + ZTS crash behaviour, `--php` variant semantics, cache naming conventions) +- **Speed-up tips** → add inline in the relevant section, not in a separate + "tips" block + +Prefer editing existing sections over appending new ones — the goal is accurate +prose, not an ever-growing list of addenda. + +See `@.claude/ci/meta-job-group-doc.md` for a description of the format of the +job docs. + +## What Makes a Good Gap Report + +Ask the subagent to be concrete: exact commands, environment variables, file +paths, timing observations. Vague observations like "the docs could be clearer" +are not actionable. + +Good gaps to capture: + +- Commands that fail verbatim from the docs and require a fix +- Dependencies (packages, files, env vars) that must be present but are not + mentioned +- Paths or filenames that differ between the docs and reality +- Steps that are described as necessary but turn out to be no-ops (e.g. CI + checks `.lz4` but profiler emits `.zst` — the check is vacuously true) +- Speed differences large enough to matter (e.g. `EXECUTION_TIME=3` saves 50+ + seconds per run; `DD_PROFILING_LOG_LEVEL=warn` vs `trace` measurably affects + throughput) + +Not worth capturing: + +- Stylistic preferences +- Observations that are already implied by the docs +- Flaky test outcomes with no actionable fix + +## Fidelity to CI + +Reproduction scripts in the docs must match what CI actually does as closely as +possible. The source of truth is the CI job definition (e.g. +`.gitlab/generate-package.php`, `.github/workflows/`). When writing or updating +a "Reproducing Locally" example: + +- Read the real `before_script` / `script` and replicate the same commands, + environment variables, and package installs — do not invent steps or guess + what might be needed. +- Only deviate where the local environment genuinely differs (e.g. mounting + artifacts from a host path instead of relying on GitLab artifact download, + or using `dockerh` instead of a bare CI runner). +- If a step looks unnecessary, verify by checking the CI definition before + removing it — it may exist for a non-obvious reason (e.g. `apk add + ca-certificates` works around an Alpine TLS issue). + +## Example + +**Job run:** `prof-correctness (8.1, zts)` via `/subagent_log` + +**Gaps found and applied:** + +| Gap | Where fixed | +|-----|-------------| +| `--php zts` required for ZTS builds (NTS headers crash ZTS PHP) | `index.md` — `--php` section | +| `parallel` PECL needs `libpcre2-dev` + GitHub URL for v1.2.7 | `github-actions-profiler.md` | +| `parallel.so` not persisted in overlay cache — copy to project dir | `github-actions-profiler.md` | +| Image tag must match PHP version under test | `github-actions-profiler.md` | +| `cargo rustc` must run from `profiling/` subdir | `github-actions-profiler.md` | +| Output path in docs differed from CI workflow path | `github-actions-profiler.md` | +| `EXECUTION_TIME=3` applies to all time-based tests, not just `allocations` | `github-actions-profiler.md` | +| `DD_PROFILING_LOG_LEVEL=warn` recommended (trace slows execution) | `github-actions-profiler.md` | +| "No profile" check is vacuously true (`.lz4` vs `.zst`) | `github-actions-profiler.md` | diff --git a/.claude/ci/meta-job-group-doc.md b/.claude/ci/meta-job-group-doc.md new file mode 100644 index 00000000000..2749b48afab --- /dev/null +++ b/.claude/ci/meta-job-group-doc.md @@ -0,0 +1,70 @@ +# Job-Group Documentation Format + +Each file in `.claude/ci/` documents one group of CI jobs that share the +same runner type, image, and execution model. + +## Section order + +1. **H1 title** — short name for the group +2. **`## CI Jobs`** — job names + source files + runner/image/matrix +3. **`## What It Tests`** — what the jobs build and run +4. **Reproduction sections** — one `##` per logical test target, each + with `### Full suite` → `### Single test` → named variants +5. **`## Gotchas`** — non-obvious facts that cause silent failure + +Omit any section that adds no value (e.g. trivial prerequisites). +Submodule init is documented in `general.md` — do not repeat it here. + +## `## CI Jobs` format + +The **Source** line must come first — before the job table — so a reader +can jump straight to the definition. List every file that is materially +relevant to understanding or modifying the job (generator script, shell +scripts it calls, workflow file, action definitions, etc.). Then a table +with columns `CI Job`, `Image`, `What it does`: + +``` +## CI Jobs + +**Source:** +- `.gitlab/generate-appsec.php` — generates the appsec-trigger child + pipeline; defines the job matrix and `script:` sections inline +- `appsec/scripts/compile_extension.sh` — build script called by the job + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `test appsec extension: [{ver}, {arch}, debug]` | `dd-trace-ci:php-{ver}_bookworm-6` | Builds extension + runs .phpt tests | +| `test appsec helper asan` | `dd-trace-ci:bookworm-6` | C++ helper ASAN gtest suite | +| `appsec code coverage` | `dd-trace-ci:php-8.3_bookworm-6` | (not needed locally) | + +Runner: `arch:amd64` + `arch:arm64` +Matrix: PHP 7.0+ × {debug, debug-zts, debug-zts-asan (7.4+)} +``` + +If there is only one source file, the single-line form is fine: + +``` +**Source:** `.github/workflows/prof_correctness.yml` +``` + +Use `{placeholder}` for matrix dimensions. Note any dimension that is +CI-only and has no effect on local commands (e.g. `{arch}` controls only +the runner tag). + +## Reproduction section rules + +- Show the full-suite command first; single-test second. Single-test is + the most frequent operation when iterating on a fix. +- Note CI/local divergences inline (not in Gotchas). E.g.: + - packages CI installs that are not needed locally + - env vars CI passes unconditionally that are harmless to include + - output paths that differ between CI and the local command +- Use `$(nproc)` not a hardcoded `-j4`. +- On macOS/Apple Silicon prefer `--platform linux/arm64`; use a separate + cache name per arch. + +## Gotchas rules + +Each bullet must be a fact **not derivable from reading the commands** +that would cause silent failure or wasted time. Do not repeat anything +already stated inline in the reproduction commands. diff --git a/.claude/ci/package-dind-verification.md b/.claude/ci/package-dind-verification.md new file mode 100644 index 00000000000..256dce66eb4 --- /dev/null +++ b/.claude/ci/package-dind-verification.md @@ -0,0 +1,427 @@ +# Docker-in-Docker Package Verification + +These jobs verify that the packaged dd-trace-php extension works correctly +inside real-world framework containers and randomized PHP environments. They +run in the `verify` stage of the **package-trigger** child pipeline and require +Docker-in-Docker runners because they spin up containers internally. + +## CI Jobs + +**Source:** +- `.gitlab/generate-package.php` -- defines `framework test`, + `installer tests`, and `randomized tests` jobs +- `dockerfiles/frameworks/Makefile` -- Makefile invoked by + `framework test` +- `dockerfiles/frameworks/*.yml` -- docker-compose files for + each framework test suite +- `tests/randomized/` -- randomized test infrastructure +- `dockerfiles/verify_packages/` -- `test_installer` target + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `framework test: [{suite}]` | `docker:24.0.4-gbi-focal` | Spins up a framework-specific Docker Compose stack and runs the framework's own test suite with ddtrace installed | +| `installer tests` | `docker:24.0.4-gbi-focal` | Runs `make -C dockerfiles/verify_packages test_installer`; verifies `datadog-setup.php` installer works on both amd64 and arm64 packages | +| `randomized tests: [amd64, {no-asan,asan}, {1..5}]` | `docker:24.0.4-gbi-focal` | Generates random PHP scenarios and runs them with ddtrace for 1m30s each; checks for crashes and unexpected behavior | + +Runner: `docker-in-docker:amd64` +Matrix (`framework test`): `{flow, flow_no_ddtrace, mongodb-driver, mongodb-driver_no_ddtrace, phpredis3, phpredis3_no_ddtrace, phpredis4, phpredis4_no_ddtrace, phpredis5, phpredis5_no_ddtrace, wordpress, wordpress_no_ddtrace}` +Matrix (`randomized tests`): `{no-asan, asan}` x `{1, 2, 3, 4, 5}` (arm64 variants exist but are commented out pending `docker-in-docker:arm64` runner availability) + +## What It Tests + +### framework test + +Each suite name maps to a docker-compose YAML in `dockerfiles/frameworks/`. The +Makefile: +1. Copies the built `.deb` package into + `dockerfiles/frameworks/nginx_file_server/ddtrace.deb` +2. Starts a docker-compose stack with the framework app + ddtrace installed +3. Runs the framework's own test suite against the app + +The `_no_ddtrace` variants run without ddtrace loaded, serving as a baseline to +confirm the framework itself is not broken. + +**Upstream artifacts needed:** +- `package extension: [amd64, x86_64-unknown-linux-gnu]` + +### installer tests + +Verifies that `datadog-setup.php` can correctly install the extension from the +built packages. Tests both amd64 and arm64 packages. + +**Upstream artifacts needed:** +- `package extension: [amd64, x86_64-unknown-linux-gnu]` +- `package extension: [arm64, aarch64-unknown-linux-gnu]` +- `datadog-setup.php` + +### randomized tests + +Generates 4 random PHP application scenarios per run, each exercising different +combinations of PHP versions, SAPIs (cli, fpm, apache), and extensions. Each +scenario runs for 1m30s with 2 concurrent jobs. The `analyze` step +post-processes results. + +The no-asan variant uses the regular glibc package; the asan variant uses the +ASAN-instrumented package to catch memory errors. + +**Upstream artifacts needed:** +- `package extension: [amd64, x86_64-unknown-linux-gnu]` (no-asan) or `package + extension asan` (asan variant) + +## Reproducing Locally + +All DinD verification jobs need packaged artifacts from upstream +compile/package jobs. Two ways to obtain them: + +- **From CI:** use `tooling/bin/download-artifacts` (e.g., `--preset + extension-amd64-gnu`, `--preset extension-asan`, `--preset datadog-setup`). + See "Downloading artifacts" in [index.md](index.md). +- **Build locally:** see the ".deb from source" section below. + +### Building a .deb from source + +The framework tests need a `.deb` containing `.so` variants for the PHP +version(s) used by the test containers. The wordpress test uses PHP 7.0 (API +20151012). + +`build-tracing.sh` produces both `.a` archives and standalone `.so` files. For +local builds, the standalone `.so` files can be used directly — no separate +sidecar build or link step needed. See also +[compile-artifacts.md](compile-artifacts.md) and +[packaging-oci.md](packaging-oci.md). + +**Step 1 — Compile tracing extension for PHP 7.0 (~49s):** + +```bash +cd ~/repos/dd-trace-php +git submodule update --init libdatadog + +.claude/ci/dockerh --cache compile-tracing-7.0-gnu \ + --overlayfs --root \ + datadog/dd-trace-ci:php-7.0_centos-7 \ + -e CI_COMMIT_SHA=$(git rev-parse HEAD) \ + -e CI_COMMIT_BRANCH=local-build \ + -- bash -c \ + 'PHP_VERSION=7.0 bash .gitlab/build-tracing.sh' +``` + +Produces `.a` archives in `extensions_x86_64/` and standalone `.so` files in +`standalone_x86_64/`, both in overlayfs volume `dd-ci-compile-tracing-7.0-gnu`. +Repeat with different `PHP_VERSION` and `--cache` for other versions. + +**Step 2 — Package into .deb (~5s):** + +```bash +.claude/ci/dockerh --cache package-deb \ + --clean-cache --overlayfs --root \ + datadog/dd-trace-ci:php-8.1_centos-7 \ + -v dd-ci-compile-tracing-7.0-gnu:/cache-tracing:ro \ + -- bash -c ' + cp /cache-tracing/upper/standalone_x86_64/*.so \ + extensions_x86_64/ + make .deb.x86_64 + ' +``` + +Extract the `.deb`: + +```bash +mkdir -p build/packages +docker run --rm \ + -v dd-ci-package-deb:/cache:ro \ + -v $(pwd)/build/packages:/out \ + alpine sh -c \ + 'cp /cache/upper/build/packages/*.deb /out/' +``` + +**Step 3 — Run the framework test:** + +```bash +rm -f \ + dockerfiles/frameworks/nginx_file_server/ddtrace.deb +CI=true make -f dockerfiles/frameworks/Makefile wordpress +``` + +### framework test + +The `dockerfiles/frameworks/Makefile` has two modes: +- **Without `CI`:** auto-downloads a `.deb` from GitHub Releases (~8 MB). + Simplest for quick smoke tests. +- **With `CI=true`:** uses the `.deb` from `build/packages/`. Required to test + your own build. + +```bash +# Quick smoke test (downloads .deb from GitHub Releases): +make -f dockerfiles/frameworks/Makefile wordpress + +# Test with your own build: +tooling/bin/download-artifacts --preset extension-amd64-gnu +mkdir -p build/packages +cp packages/datadog-php-tracer_*.deb build/packages/ +CI=true make -f dockerfiles/frameworks/Makefile wordpress + +# No-ddtrace baseline: +make -f dockerfiles/frameworks/Makefile wordpress_no_ddtrace +``` + +### randomized tests + +Valid platform names are `centos7` and `buster` (defined in +`tests/randomized/config/platforms.php`), **not** `debian`. Only the no-asan + +centos7 combination currently works. See the `after_script` gotcha below for +why other combinations fail. + +The base services (Elasticsearch, etc.) must be started before the test +scenarios. + +```bash +# Place the package .tar.gz at the repo root +cp packages/dd-library-php-*-x86_64-linux-gnu.tar.gz . + +# Start base services first +docker-compose \ + -f tests/randomized/lib/docker-compose.yml up -d + +# Generate and run (no-asan with centos7) +make -C tests/randomized library.local +make -C tests/randomized generate \ + PLATFORMS=centos7 NUMBER_OF_SCENARIOS=2 +make -C tests/randomized test \ + CONCURRENT_JOBS=2 DURATION=1m30s + +# Fix result file permissions (containers create as root) +docker run --rm \ + -v $(pwd)/tests/randomized/.tmp.scenarios/.results:/r \ + alpine chmod -R a+r /r +make -C tests/randomized analyze + +# Clean up +make -C tests/randomized clean +docker-compose \ + -f tests/randomized/lib/docker-compose.yml down +``` + +### installer tests + +The installer tests (`make -C dockerfiles/verify_packages test_installer`) run +~39 test scripts. Each spins up a Docker container, runs +`php ./build/packages/datadog-setup.php --php-bin php`, and verifies the +installed extension version matches `cat VERSION`. + +`datadog-setup.php` downloads tarballs from: +`{DD_TEST_INSTALLER_REPO}/releases/download/{RELEASE_VERSION}/dd-library-php-{RELEASE_VERSION}-{arch}-linux-{libc}.tar.gz` + +where `DD_TEST_INSTALLER_REPO` comes from `dockerfiles/verify_packages/.env` +and `RELEASE_VERSION` is baked into `datadog-setup.php` at build time (replaces +`@release_version@`). + +In CI, `generate-installers.sh` detects the `+` in the version and rewrites +the URL to point to S3. Running locally requires either waiting for the +`publish to public s3` CI job, or serving tarballs from a local HTTP server +(described below). + +#### Running with a local HTTP server (no S3 dependency) + +This approach works with both CI-downloaded and locally-built artifacts. +The key pieces that must all agree: + +- `VERSION` file must match the version baked into the compiled `.so` files +- `RELEASE_VERSION` in `build/packages/datadog-setup.php` must equal `VERSION` +- Tarball filenames must contain that version string +- `DD_TEST_INSTALLER_REPO` in `.env` must point to the HTTP server + +##### Step 1: Obtain the tarballs + +**Option A -- From CI artifacts:** + +```bash +tooling/bin/download-artifacts --preset extension-amd64-gnu \ + -o /tmp/ci-artifacts-gnu +tooling/bin/download-artifacts --preset extension-amd64-musl \ + -o /tmp/ci-artifacts-musl +``` + +The combined tarballs are the large files (~900MB gnu, ~700MB musl) whose +names do NOT contain a PHP API number. + +**Option B -- From local builds:** + +After running the full compile pipeline (see +[compile-artifacts.md](compile-artifacts.md)), generate tarballs: + +```bash +TRIPLET=x86_64-unknown-linux-gnu \ + bash tooling/bin/generate-final-artifact.sh "$(cat VERSION)" build/packages . +TRIPLET=x86_64-alpine-linux-musl \ + bash tooling/bin/generate-final-artifact.sh "$(cat VERSION)" build/packages . +``` + +The script needs compiled extensions in `extensions_x86_64/`, +`datadog-profiling/`, `appsec_x86_64/`, and `src/`. If built via +`dockerh --overlayfs`, extract from volumes first (see +[compile-artifacts.md](compile-artifacts.md)). + +##### Step 2: Determine the version + +```bash +# From CI tarball filenames: +VERSION_STR=$(ls /tmp/ci-artifacts-gnu/dd-library-php-*-x86_64-linux-gnu.tar.gz \ + | sed 's|.*/dd-library-php-\(.*\)-x86_64-linux-gnu.tar.gz|\1|') + +# Or from locally-built packages: +VERSION_STR=$(cat VERSION) + +echo "Version: $VERSION_STR" +``` + +##### Step 3: Update VERSION and .env + +```bash +echo -n "$VERSION_STR" > VERSION + +# Docker bridge gateway IP (reachable from containers): +GATEWAY=$(docker network inspect bridge \ + --format '{{(index .IPAM.Config 0).Gateway}}') +echo "DD_TEST_INSTALLER_REPO=http://${GATEWAY}:8888" \ + > dockerfiles/verify_packages/.env +``` + +On Docker Desktop (macOS/Windows), use +`host.docker.internal` instead of `$GATEWAY`. + +##### Step 4: Build datadog-setup.php (non-CI path) + +The non-CI codepath preserves `DD_TEST_INSTALLER_REPO` +support (the CI path in `generate-installers.sh` hardcodes +S3 URLs): + +```bash +mkdir -p build/packages +sed "s|@release_version@|${VERSION_STR}|g" \ + ./datadog-setup.php > build/packages/datadog-setup.php +``` + +This step must come **after** writing `VERSION` (step 3). +The Makefile rule `build/packages/datadog-setup.php: VERSION` +re-runs `generate-installers.sh` whenever `VERSION` is newer +than `datadog-setup.php`. By running `sed` after the +`VERSION` write, the output file is naturally newer and Make +will not overwrite it. If `CI_JOB_ID` is set in the +environment, `generate-installers.sh` takes the CI branch +and hardcodes S3 URLs, breaking the local server setup. + +##### Step 5: Set up directory structure and HTTP server + +```bash +mkdir -p "/tmp/fake-repo/releases/download/${VERSION_STR}/" + +# Copy combined tarballs (adjust source paths): +cp /tmp/ci-artifacts-gnu/dd-library-php-*-x86_64-linux-gnu.tar.gz \ + "/tmp/fake-repo/releases/download/${VERSION_STR}/" +cp /tmp/ci-artifacts-musl/dd-library-php-*-x86_64-linux-musl.tar.gz \ + "/tmp/fake-repo/releases/download/${VERSION_STR}/" + +# Also copy to build/packages/ for tests that use --file: +cp "/tmp/fake-repo/releases/download/${VERSION_STR}/dd-library-php-${VERSION_STR}-x86_64-linux-gnu.tar.gz" \ + build/packages/ + +# Start the server (proxies misses to GitHub for old +# versions needed by upgrade tests): +.claude/ci/serve-installer-packages /tmp/fake-repo & +``` + +##### Step 6: Run the tests + +```bash +# All tests: +make -C dockerfiles/verify_packages test_installer + +# Single test: +make -C dockerfiles/verify_packages test_first_install.sh +``` + +##### Step 7: Clean up + +```bash +git checkout VERSION dockerfiles/verify_packages/.env +kill %1 # stop HTTP server +``` + +#### How it works + +- `datadog-setup.php` first tries a per-PHP-API tarball (e.g., + `dd-library-php-{ver}-x86_64-linux-gnu-20190902.tar.gz`) which returns 404 + (only the combined tarball is served). It falls back to the combined tarball. +- Tests that install old versions download their `datadog-setup.php` from + GitHub, but those old scripts also read `DD_TEST_INSTALLER_REPO`. The proxy + forwards their requests to GitHub (`urllib` follows redirects). +- The `+` character in the version is safe in URL paths; Python's http.server + and PHP's curl handle it correctly. +- Building `datadog-setup.php` with plain `sed` instead of + `generate-installers.sh` avoids the CI codepath that rewrites URLs to S3. + +## Gotchas + +- The `framework test` job installs `docker-compose` v2.36.0 as a standalone + binary (`/usr/local/bin/docker-compose`), not the Docker Compose plugin. The + Makefile invokes `docker-compose` (hyphenated), not `docker compose`. + +- `randomized tests` arm64 variants are **commented out** in the generator, + waiting for a `docker-in-docker:arm64` runner. + +- Each `randomized tests` index (1--5) runs an independent set of 4 randomly + generated scenarios. The 5 parallel instances give coverage breadth; there is + no deduplication. + +- The `_no_ddtrace` framework test variants exist to detect false positives: if + `wordpress_no_ddtrace` also fails, the problem is in the test environment, + not in ddtrace. + +- `installer tests` needs packages from **both** architectures (amd64 + arm64) + even on an amd64 runner, because `datadog-setup.php` is tested for its + ability to select the correct package. + +- **`installer tests` VERSION mismatch.** The installer compares the installed + extension version against `VERSION`. CI runs `append-build-id.sh` which bumps + it (e.g. `1.17.0` -> `1.18.0+`). Without this step, tests fail with + "Wrong ddtrace version". + +- **`installer tests` downloads from S3 by default.** In CI, + `datadog-setup.php` fetches archives from S3. To run locally without waiting + for `publish to public s3`, use the local HTTP server approach described + above. + +- **Randomized test platform names are `centos7` and `buster`** (defined in + `tests/randomized/config/platforms.php`), not `debian`. Only the no-asan + + centos7 combination actually works (see the `after_script` gotcha below). + +- **Randomized test result files are root-owned.** Docker containers create + result files as `root:root` mode 600. Without root access, `make analyze` + fails with "Permission denied". Fix with: `docker run --rm -v + $(pwd)/tests/randomized/.tmp.scenarios/.results:/r alpine chmod -R a+r /r` + +- **Elasticsearch 7.17.4 crashes on modern kernels.** On hosts with cgroupv2, + the ES container in `tests/randomized/lib/docker-compose.yml` crashes. + Updating to `elasticsearch:7.17.28` fixes it. + +- **Framework test mysql containers persist.** After running `wordpress`, the + mysql:5.7 container stays running. To fully clean up: `docker-compose -f + dockerfiles/frameworks/nginx_file_server.yml -f + dockerfiles/frameworks/wordpress.yml down` + +- **Randomized tests `analyze` runs in `after_script` -- failures are silently + ignored.** The `make ... analyze` step in `.gitlab/generate-package.php` + (line 806) runs in `after_script`, which GitLab treats as non-fatal. When + `analyze` exits with code 1, GitLab logs `WARNING: after_script failed` and + marks the job as succeeded. The result is that **randomized tests provide + zero coverage on buster** and **zero ASAN coverage on any platform**. Only + the no-asan + centos7 scenarios actually execute the extension, and even + those failures are masked. Root causes: + - **ASAN + buster:** the ASAN `.so` is built on bookworm (glibc 2.36) but + buster has glibc 2.28 -- `GLIBC_2.29 not found` at load time. + - **ASAN + centos7:** centos7 images only have NTS PHP, but the ASAN package + only contains `debug-zts` variants. + - **No-asan + buster:** buster images have `debug-zts` PHP, but the no-asan + package does not include debug-zts variants. To fix: move `analyze` from + `after_script` to `script`, and either create bookworm-based randomized + test images or restrict `RANDOMIZED_RESTRICT_PLATFORMS` per variant. diff --git a/.claude/ci/package-native-verification.md b/.claude/ci/package-native-verification.md new file mode 100644 index 00000000000..097132f5730 --- /dev/null +++ b/.claude/ci/package-native-verification.md @@ -0,0 +1,365 @@ +# Native Package Verification + +These jobs verify that the packaged dd-trace-php extension installs and runs correctly +on real distribution base images (Alpine, CentOS, Debian, Ubuntu, Windows) and in +specialized configurations (PECL, minimal installs, SSI loader, profiling). They run +in the `verify` stage of the **package-trigger** child pipeline and use native runners +(no Docker-in-Docker), though some use GitLab service containers. + +## CI Jobs + +**Source:** +- `.gitlab/generate-package.php` -- defines all jobs listed below +- `dockerfiles/verify_packages/verify.sh` -- main verification script for distro jobs +- `dockerfiles/verify_packages/{alpine,centos,debian}/install.sh` -- per-distro install helpers +- `dockerfiles/verify_packages/verify_tar_gz_root.sh` -- tar.gz ownership verification +- `dockerfiles/verify_packages/verify_no_ext_json.sh` -- JSON-less PHP verification +- `dockerfiles/verify_packages/verify_windows.ps1` -- Windows verification +- `loader/bin/test.sh` -- loader test runner + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `verify alpine: [{packages}, {image}, {install_type}]` | `alpine:{ver}` or `php:{ver}-fpm-alpine` | Installs ddtrace on Alpine via php_installer or native_package; verifies CLI + FPM produce traces | +| `verify centos: [{php_ver}, {install_type}]` | `centos:7` | Installs ddtrace on CentOS 7 with remi PHP packages; verifies CLI + Apache produce traces | +| `verify debian: [{php_ver}, {install_type}, {image}]` | `debian:{bullseye,bookworm}-slim` | Installs ddtrace on Debian with sury PHP packages; verifies CLI + FPM + Apache produce traces | +| `verify .tar.gz: [{arch}]` | `debian:bullseye-slim` | Extracts `.tar.gz` package, verifies file ownership is root, runs `post-install.sh`, checks `php --ri=ddtrace` | +| `verify no json ext` | `alpine:3.12` | Installs ddtrace on Alpine without the JSON PHP extension; verifies it still loads and works | +| `verify windows` | Windows runner (no container image) | See [windows-tests.md](windows-tests.md) | +| `Loader test on {arch} libc: [{ver}, {flavour}]` | `dd-trace-ci:php-{ver}_{suffix}` | Extracts SSI loader package, runs `loader/bin/test.sh` phpt tests; optionally runs `check_glibc_version.sh` | +| `Loader test on {arch} alpine` | `alpine:3.20` | Installs PHP 8.3 from apk, extracts SSI loader musl package, runs `loader/bin/test.sh` | +| `min install tests` | `dd-trace-ci:php-8.0-shared-ext` | Installs `.deb` package via `dpkg`, runs `make run_tests` + `make test_c` against the installed extension | +| `pecl tests: [{ver}]` | `dd-trace-ci:php-{ver}_bookworm-6` | Installs ddtrace from PECL `.tgz`, runs `pecl run-tests` against the installed extension | +| `test early PHP 8.1` | `ubuntu:jammy` | Installs stock Ubuntu 22.04 PHP 8.1 (no sury), installs ddtrace via `datadog-setup.php`, runs `pecl run-tests` | +| `x-profiling phpt tests on Alpine: [{ver}]` | `dd-trace-ci:php-compile-extension-alpine-{ver}` | Installs full package on Alpine via `datadog-setup.php --enable-profiling`, runs profiling phpt tests | + +### Runners and matrices + +**verify alpine:** +Runner: `arch:amd64` +Matrix: (Alpine 3.8+ including latest, with `php7`/`php` packages) + (`php:{ver}-fpm-alpine` for PHP 7.0+) +Install types: `php_installer` (uses `datadog-setup.php`) and `native_package` (uses `.apk`) + +**verify centos:** +Runner: `arch:amd64` +Matrix: PHP 7.0--8.3 x {php_installer, native_package} + +**verify debian:** +Runner: `arch:amd64` +Matrix: PHP 7.0+ x {php_installer, native_package} x {bullseye-slim, bookworm-slim} + +**verify .tar.gz:** +Runner: `arch:{amd64,arm64}` (amd64 tests PHP 7.0 package, arm64 tests PHP 8.1 package) + +**verify no json ext:** +Runner: `arch:amd64` + +**verify windows:** See [windows-tests.md](windows-tests.md). + +**Loader test on {arch} libc:** +Runner: `arch:{amd64,arm64}` +Matrix (amd64): PHP 5.6 (buster) + 7.0--7.3 (nts) + 7.4+ (nts + zts, with valgrind) +Matrix (arm64): PHP 7.0--7.3 (nts) + 7.4+ (nts + zts) + +**Loader test on {arch} alpine:** +Runner: `arch:{amd64,arm64}` + +**min install tests:** +Runner: `arch:amd64` + +**pecl tests:** +Runner: `arch:amd64` +Matrix: PHP 7.0+ + +**test early PHP 8.1:** +Runner: `arch:amd64` + +**x-profiling phpt tests on Alpine:** +Runner: `arch:amd64` +Matrix: PHP 7.1+ + +## What It Tests + +### verify {alpine,centos,debian} + +These are the primary distribution smoke tests. They: + +1. Install PHP from the distro's package manager (apk/yum/apt with sury) +2. Install ddtrace using either `datadog-setup.php` (`php_installer` type) or + the native package (`.apk`/`.rpm`/`.deb` via `native_package` type) +3. Run `dockerfiles/verify_packages/verify.sh` which: + - Starts a CLI PHP script and checks it produces traces (sent to + `request-replayer`) + - Starts Apache or FPM (depending on distro) and checks HTTP requests + produce traces + - Verifies `phpinfo()` shows ddtrace loaded + +The `request-replayer` GitLab service container acts as a mock trace agent, +recording submitted traces for verification. + +### verify .tar.gz + +Extracts the `.tar.gz` package to `/` and verifies: +- `/opt` and `/opt/datadog-php` are owned by root (not the build user) +- `post-install.sh` runs successfully +- `php --ri=ddtrace` shows the extension info + +### Loader tests + +Test the SSI (Single Step Instrumentation) library loader, which is a minimal +PHP extension that loads the full ddtrace extension at runtime. The loader +package is extracted and `loader/bin/test.sh` runs the loader's own phpt test +suite. The glibc version check (`check_glibc_version.sh`) verifies the loader +binary does not require a newer glibc than the target platform provides. + +### pecl tests + +Verifies the PECL distribution path: installs from the `.tgz` built by `pecl +build`, enables the extension, and runs `pecl run-tests` which executes the +phpt test suite from the installed PECL package. + +### test early PHP 8.1 + +Specifically tests against Ubuntu 22.04's stock PHP 8.1 (without the sury PPA), +which is an older patchlevel than what the CI images ship. This catches +compatibility issues with early 8.1 builds (e.g., missing symbols, changed +APIs). + +### x-profiling phpt tests on Alpine + +Installs the full package with `--enable-profiling` on Alpine and runs the +profiling extension's phpt test suite. This verifies that the profiler works +correctly on musl libc. + +## Upstream Artifacts + +All jobs need artifacts from packaging jobs (Group I / Group D): + +| Job | Needs | +|-----|-------| +| `verify alpine` | `package extension: [amd64, x86_64-alpine-linux-musl]` + `datadog-setup.php` | +| `verify centos` | `package extension: [amd64, x86_64-unknown-linux-gnu]` + `datadog-setup.php` | +| `verify debian` | `package extension: [amd64, x86_64-unknown-linux-gnu]` + `datadog-setup.php` | +| `verify .tar.gz: [amd64]` | `package extension: [amd64, x86_64-unknown-linux-gnu]` + `datadog-setup.php` | +| `verify .tar.gz: [arm64]` | `package extension: [arm64, aarch64-unknown-linux-gnu]` + `datadog-setup.php` | +| `verify no json ext` | `package extension: [amd64, x86_64-alpine-linux-musl]` | +| `verify windows` | `package extension windows` + `datadog-setup.php` | +| `Loader test on {arch} libc` | `package loader: [{arch}]` | +| `Loader test on {arch} alpine` | `package loader: [{arch}]` | +| `min install tests` | `package extension: [amd64, x86_64-unknown-linux-gnu]` | +| `pecl tests` | `pecl build` | +| `test early PHP 8.1` | `package extension: [amd64, x86_64-unknown-linux-gnu]` + `datadog-setup.php` | +| `x-profiling phpt tests on Alpine` | `package extension: [amd64, x86_64-alpine-linux-musl]` + `datadog-setup.php` | + +## Reproducing Locally + +Most of these jobs are difficult to reproduce locally because they require packaged +artifacts from upstream compile/package jobs. Two ways to obtain them: + +- **From CI:** use `tooling/bin/download-artifacts` to download preset packages + (e.g., `--preset extension-amd64-gnu`, `--preset ssi-amd64`, `--preset datadog-setup`). + See the "Downloading artifacts" section in [index.md](index.md) for full usage. +- **Build locally:** follow [compile-artifacts.md](compile-artifacts.md) to compile + the extension and packaging artifacts from source. + +Once you have the artifacts, place them in the expected directory structure and +run the verification script inside the appropriate container via `dockerh`. + +All examples below use `dockerh --overlayfs`. Jobs that install packages or +write to system directories (verify distro, verify .tar.gz, verify no json ext) +need **`--root`** so the container stays as root. Jobs that only run tests as a +regular user (loader tests, pecl tests) omit `--root`. + +### verify {alpine,centos,debian} + +These scripts run `apt`/`apk`/`yum install` and start services → use `--root`. +The CI `before_script` does `mkdir build; mv packages build` then installs +`curl` (and `INSTALL_PACKAGES` on Alpine). The `script` is just +`./dockerfiles/verify_packages/verify.sh`. `datadog-setup.php` is used from +the repo checkout (CWD), not from the packages directory. + +```bash +# Example: Debian bookworm, PHP 8.3, php_installer. +# Start request-replayer first (needed for trace verification): +docker network create verify-net 2>/dev/null || true +docker rm -f replayer 2>/dev/null || true +docker run -d --name replayer --network verify-net \ + --network-alias request-replayer \ + datadog/dd-trace-ci:php-request-replayer-2.0 + +.claude/ci/dockerh --cache verify-debian-83 --overlayfs --root \ + debian:bookworm-slim \ + -v /path/to/packages:/artifacts:ro \ + --network verify-net \ + -e DD_AGENT_HOST=request-replayer -e DD_TRACE_AGENT_PORT=80 \ + -e DD_TRACE_AGENT_FLUSH_INTERVAL=1000 \ + -e PHP_VERSION=8.3 -e INSTALL_MODE=sury -e INSTALL_TYPE=php_installer \ + -- bash -c ' + mkdir -p build/packages + cp /artifacts/dd-library-php-*-x86_64-linux-gnu.tar.gz build/packages/ + apt update && apt-get install -y curl + ./dockerfiles/verify_packages/verify.sh + ' + +# Cleanup +docker rm -f replayer; docker network rm verify-net +``` + +For Alpine, replace the image and adjust the before_script to match CI: + +```bash +.claude/ci/dockerh --cache verify-alpine --overlayfs --root \ + alpine:3.20 \ + -v /path/to/packages:/artifacts:ro \ + --network verify-net \ + -e DD_AGENT_HOST=request-replayer -e DD_TRACE_AGENT_PORT=80 \ + -e DD_TRACE_AGENT_FLUSH_INTERVAL=1000 \ + -e VERIFY_APACHE=no -e INSTALL_TYPE=php_installer \ + -- sh -c ' + mkdir -p build/packages + cp /artifacts/dd-library-php-*-x86_64-linux-musl.tar.gz build/packages/ + cp /artifacts/*.apk build/packages/ + apk add --no-cache ca-certificates curl php php-fpm php-json + ./dockerfiles/verify_packages/verify.sh + ' +``` + +Without request-replayer, for a basic "does it load" check, just verify +`php --ri=ddtrace` works after installation. + +### verify .tar.gz + +The CI `before_script` is just `mkdir build; mv packages build`. +The `script` runs `./dockerfiles/verify_packages/verify_tar_gz_root.sh`, which +calls `dockerfiles/verify_packages/tar_gz/install.sh` (installs PHP from sury), +extracts the tar.gz to `/`, checks ownership, runs `post-install.sh`, then +`php --ri=ddtrace`. Uses `--root` (needs apt and writes to `/opt`). + +```bash +.claude/ci/dockerh --cache verify-targz --overlayfs --root \ + debian:bullseye-slim \ + -v /path/to/packages:/artifacts:ro \ + -e PHP_VERSION=7.0 \ + -- bash -c ' + mkdir -p build/packages + cp /artifacts/datadog-php-tracer-*.x86_64.tar.gz build/packages/ + ./dockerfiles/verify_packages/verify_tar_gz_root.sh + ' +``` + +Note: CI uses PHP 7.0 on amd64, PHP 8.1 on arm64. + +### verify no json ext + +The CI `before_script` is the same as `verify alpine` (the +`&verify_alpine_before_script` anchor): `mkdir build; mv packages build; +apk add ca-certificates curl`. The `script` runs +`./dockerfiles/verify_packages/verify_no_ext_json.sh`. Uses `--root` (needs +apk). + +```bash +.claude/ci/dockerh --cache verify-nojson --overlayfs --root \ + alpine:3.12 \ + -v /path/to/packages:/artifacts:ro \ + -- sh -c ' + mkdir -p build/packages + cp /artifacts/*.apk build/packages/ + apk add --no-cache ca-certificates curl + ./dockerfiles/verify_packages/verify_no_ext_json.sh + ' +``` + +### pecl tests + +Runs as non-root (no `--root`). + +```bash +.claude/ci/dockerh --cache pecl-8.3 --overlayfs --php nts \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- \ + bash -c ' + cp /project/dd-trace-php/pecl/datadog_trace-*.tgz ./datadog_trace.tgz + pecl install datadog_trace.tgz + echo "extension=ddtrace.so" | sudo tee $(php -i | awk -F"=> " "/Scan this dir/ {print \$2}")/ddtrace.ini + php --ri=ddtrace + ' +``` + +### Loader tests (libc) + +Runs as non-root (no `--root`). The CI `before_script` sets +`XDEBUG_SO_NAME` per PHP version, calls `switch-php $PHP_FLAVOUR`, extracts +the SSI package, and copies the loader `.so` into `loader/modules/`. With +`--overlayfs` the repo is read-only underneath, so copy `loader/` to `/tmp` +first. + +```bash +.claude/ci/dockerh --cache loader-8.3 --overlayfs --php nts \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -v /path/to/packages:/artifacts:ro \ + -- bash -c ' + export XDEBUG_SO_NAME=xdebug-3.3.2.so + mkdir -p extracted/ + tar --no-same-owner --no-same-permissions --touch \ + -xzf /artifacts/dd-library-php-ssi-*-linux.tar.gz -C extracted/ + export DD_LOADER_PACKAGE_PATH=${PWD}/extracted/dd-library-php-ssi + cp -a loader /tmp/loader-work && cd /tmp/loader-work + mkdir -p modules + cp ${DD_LOADER_PACKAGE_PATH}/linux-gnu/loader/dd_library_loader.so modules/ + ./bin/test.sh + ./bin/check_glibc_version.sh + ' +``` + +### Loader tests (Alpine musl) + +Installs PHP from apk → use `--root`. The CI `before_script` installs +`curl-dev php83 php83-dev php83-pecl-xdebug bash`, sets `XDEBUG_SO_NAME`, +extracts the SSI package in-place, and copies the musl loader `.so`. + +```bash +.claude/ci/dockerh --cache loader-alpine --overlayfs --root \ + alpine:3.20 \ + -v /path/to/packages:/artifacts:ro \ + -- sh -c ' + apk add --no-cache curl-dev php83 php83-dev php83-pecl-xdebug bash + export XDEBUG_SO_NAME=xdebug.so + tar -xzf /artifacts/dd-library-php-ssi-*-x86_64-linux.tar.gz + export DD_LOADER_PACKAGE_PATH=${PWD}/dd-library-php-ssi + cp -a loader /tmp/loader-work && cd /tmp/loader-work + mkdir -p modules + cp ${DD_LOADER_PACKAGE_PATH}/linux-musl/loader/dd_library_loader.so modules/ + ./bin/test.sh + ' +``` + +## Gotchas + +- `verify centos` targets CentOS 7 which is **EOL**. The job contains + workarounds to use `vault.centos.org` instead of the defunct + `mirrorlist.centos.org`, with retry logic for the unreliable vault mirror. + These jobs may flake due to mirror issues. + +- The `INSTALL_TYPE` dimension has two values: `php_installer` (uses + `datadog-setup.php`, the recommended end-user path) and `native_package` + (uses distro package manager directly). Both must pass for a release. + +- `verify no json ext` specifically tests on Alpine 3.12 without the + `php7-json` package. The JSON extension was bundled into PHP core starting + with PHP 8.0, so this test is relevant for PHP 7.x on Alpine where JSON is a + separate package that users might not install. + +- `Loader test on amd64 libc` includes a `USE_VALGRIND: "true"` matrix + dimension for PHP 7.4+ that enables Valgrind leak checking. The arm64 variant + does not have this (Valgrind is too slow on emulated arm64). + +- `test early PHP 8.1` deliberately removes the opcache ini file (`rm + /etc/php/8.1/cli/conf.d/10-opcache.ini`) and blanks the sources_path setting + to test the extension in a minimal configuration matching what early Ubuntu + 22.04 users would have. + +- `x-profiling phpt tests on Alpine` uses the Alpine **compile** images + (`php-compile-extension-alpine-{ver}`), not the regular bookworm CI images, + because it needs a musl-based PHP installation. + +- The `verify` jobs use the `request-replayer` service container (not the + test-agent) as a mock trace backend. It records raw trace payloads at + `/replay` for assertions. diff --git a/.claude/ci/packaging-oci.md b/.claude/ci/packaging-oci.md new file mode 100644 index 00000000000..ce26172cc46 --- /dev/null +++ b/.claude/ci/packaging-oci.md @@ -0,0 +1,145 @@ +# Packaging and OCI Publishing + +## CI Jobs + +**Source:** +- `.gitlab/generate-package.php` — generates the package-trigger child pipeline; + all `script:` sections defined inline +- `.gitlab/prepare-oci-package.sh` — unpacks loader tar.gz and + strips debug files before OCI publish +- `tooling/bin/generate-final-artifact.sh` — assembles per-triplet release packages +- `tooling/bin/generate-ssi-package.sh` — assembles SSI (loader) package +- `.gitlab/one-pipeline.locked.yml` — includes the shared one-pipeline template + that defines all OCI, promotion, and publishing jobs + +All compile, link, and aggregate jobs (`compile tracing extension`, +`compile tracing sidecar`, `link tracing extension`, `aggregate tracing extension`, +`compile appsec extension`, `compile appsec helper`, `compile appsec helper rust`, +`compile profiler extension`, `compile loader`, `compile extension windows`) are +documented in [compile-artifacts.md](compile-artifacts.md). + +| CI Job | Image | What it does | +|--------|-------|--------------| +| `package extension: [{arch}, {triplet}]` | `dd-trace-ci:php_fpm_packaging` | Assembles `.deb`, `.rpm`, `.tar.gz`, `.apk` packages for one platform | +| `package loader: [{arch}]` | same | Assembles the SSI loader package | +| `datadog-setup.php` | same | Builds the `datadog-setup.php` installer via `make` | +| `requirements_json_test` | (one-pipeline template) | Validates `loader/packaging/block_tests.json` / `allow_tests.json` | +| `validate_supported_configurations_v2_local_file` | (one-pipeline template) | Validates `metadata/supported-configurations.json` against central schema | +| `package-oci` | (one-pipeline template) | Packages loader artifacts into an OCI image layer | +| `oci-internal-publish` | (one-pipeline template) | Publishes the OCI image to internal ECR | +| `create-multiarch-lib-injection-image` | (one-pipeline template) | Creates amd64+arm64 multi-arch manifest | +| `kubernetes-injection-test-ecr-publish` | (one-pipeline template) | Publishes to ECR for Kubernetes injection tests | +| `promote-oci-to-staging` / `prod-beta` / `prod` | (one-pipeline template) | Progressive OCI promotion | +| `publishing-gate` | (one-pipeline template) | Final gate before production promotion | +| `publish to public s3` | `amazon/aws-cli:2.17.32` | Uploads packages to `s3://dd-trace-php-builds/{VERSION}/` | +| `publish release to github` | `php:8.2-cli` | Creates GitHub release + uploads assets (release branches only) | +| `bundle for reliability env` | `ci_docker_base:67145216` | Bundles setup script + tar for the reliability env | + +Runner: `arch:amd64` for all packaging and publishing jobs. + +Platform matrix for `package extension`: +- `[amd64, x86_64-alpine-linux-musl]` — Alpine/musl +- `[arm64, aarch64-alpine-linux-musl]` — Alpine/musl arm64 +- `[amd64, x86_64-unknown-linux-gnu]` — glibc (centos-7 image) +- `[arm64, aarch64-unknown-linux-gnu]` — glibc arm64 + +## What It Produces + +- `.deb`, `.rpm`, `.tar.gz` for amd64 and arm64 (glibc) +- `.apk` for amd64 and arm64 (musl/Alpine) +- `dbgsym.tar.gz` — Windows debug symbols +- `dd-library-php-ssi-*-{x86_64,aarch64}-linux.tar.gz` — SSI loader packages +- `datadog-setup.php` — universal installer script +- OCI image — lib-injection image for Kubernetes auto-instrumentation + +## Data Flow + +``` +compile tracing extension ─┐ + + link tracing extension │ +compile appsec extension ─┤ +compile appsec helper ─┤→ generate-final-artifact.sh → .tar.gz +compile appsec helper rust─┤ │ +compile profiler extension─┤ v +compile loader ─┘ nfpm → .deb/.rpm/.apk + │ + prepare-oci-package.sh → OCI image +``` + +Intermediate artifacts (`extensions_*/`, `appsec_*/`, +`datadog-profiling/`) feed into `generate-final-artifact.sh`, which +produces per-platform `.tar.gz` tarballs. Those are then packaged +into `.deb`/`.rpm`/`.apk` by nfpm, and into OCI images by +`prepare-oci-package.sh`. See +[building-locally.md](building-locally.md#release-package-assembly) +for prerequisites and argument details. + +## Local Reproduction + +These jobs assemble release packages from compiled artifacts and +rarely fail. The most common need is to inspect the generated package +structure. See +[building-locally.md](building-locally.md#release-package-assembly) +for the `generate-final-artifact.sh` and `generate-ssi-package.sh` +commands. + +```bash +# Inspect what the OCI step unpacks. +# Requires packages/dd-library-php-ssi-*.tar.gz in the parent dir. +# OS and ARCH are required — the script silently exits without them. +mkdir -p oci-work && cd oci-work +OS=linux ARCH=amd64 bash ../.gitlab/prepare-oci-package.sh +ls -R sources/ +cd .. +``` + +**Jobs defined in the one-pipeline template** (`package-oci`, +`oci-internal-publish`, `create-multiarch-lib-injection-image`, +`kubernetes-injection-test-ecr-publish`, `promote-oci-to-*`, +`publishing-gate`, `requirements_json_test`, +`validate_supported_configurations_v2_local_file`) **cannot be fully +reproduced locally** — their scripts live in a remote GitLab template +at `gitlab-templates.ddbuild.io`. Only `prepare-oci-package.sh` +(the preparation step for `package-oci`) can be tested locally as +shown above. + +## Gotchas + +- **`one-pipeline.locked.yml` is auto-generated.** It contains a single + `include: remote:` pointing to `gitlab-templates.ddbuild.io`. The OCI, + promotion, and publishing-gate jobs are defined in that remote template — their + exact `script:` is not in this repo. + +- **`requirements_json_test` always runs** (its `rules:` force `when: on_success`). + Failures indicate a malformed JSON in `loader/packaging/`. + +- **`validate_supported_configurations_v2_local_file` needs no prerequisites** + (`needs: []`). It validates `metadata/supported-configurations.json` against the + Datadog-wide schema in the one-pipeline template infrastructure. + +- **`publish to public s3` is manual on non-master branches.** Automatic only on + `master` non-schedule runs. Requires `prepare code`, + `datadog-setup.php`, `package extension windows`, all + `package extension`, and all `package loader` jobs to have + succeeded. + +- **`bundle for reliability env` only runs on nightly builds or release branches.** + Manual with `allow_failure: true` on all other branches. + +- **Compile images for glibc packages use `centos-7`**, not + `bookworm`. See the "centos-7 vs bookworm" gotcha in + [compile-artifacts.md](compile-artifacts.md) for details. + +- **`package loader` depends on many upstream compile jobs** — appsec helper (C++ + and Rust), loader (glibc and musl), tracing extension aggregates, sidecar, all + appsec and profiler extension versions. A single upstream failure blocks packaging. + See [building-locally.md § SSI Loader Package Assembly](building-locally.md#ssi-loader-package-assembly) + for local reproduction and important caveats (empty stubs do not work; + `standalone_*/` not `extensions_*/`; must run on amd64). + +- **`php: command not found` warnings in `php_fpm_packaging`.** The packaging + image does not have `php` on PATH. Make evaluates all `$(shell ...)` variable + definitions at parse time (lines like `PHP_EXTENSION_DIR`, `PHP_MAJOR_MINOR`, + `ASAN`, `XDEBUG_SO_FILE`), even for targets that never use them. The packaging + targets only use `VERSION`, `ARCHITECTURE`, and fpm/tarball logic — the + PHP-dependent variables are irrelevant. The warnings are harmless. diff --git a/.claude/ci/serve-installer-packages b/.claude/ci/serve-installer-packages new file mode 100755 index 00000000000..3d15b6fa29b --- /dev/null +++ b/.claude/ci/serve-installer-packages @@ -0,0 +1,124 @@ +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.11" +# /// +""" +serve-installer-packages - HTTP server for local installer tests. + +Serves extension tarballs from a local directory for dd-trace-php installer +tests. Requests for files not found locally are proxied to GitHub releases, +so tests that install old released versions (e.g., test_upgrade_from_php_installer.sh) +still work. + +Usage: + # Serve tarballs from /tmp/fake-repo on port 8888 (default): + serve-installer-packages /tmp/fake-repo + + # Custom port: + serve-installer-packages /tmp/fake-repo --port 9999 + +The directory must contain tarballs at the path: + releases/download/{VERSION}/dd-library-php-{VERSION}-{arch}-linux-{libc}.tar.gz + +Example setup: + VERSION=1.18.0+abc123 + mkdir -p /tmp/fake-repo/releases/download/$VERSION + cp dd-library-php-$VERSION-x86_64-linux-gnu.tar.gz \\ + /tmp/fake-repo/releases/download/$VERSION/ + serve-installer-packages /tmp/fake-repo + +Then set DD_TEST_INSTALLER_REPO=http://:8888 in +dockerfiles/verify_packages/.env and run the tests. + +See package-dind-verification.md for the full procedure. +""" + +import argparse +import http.server +import os +import sys +import urllib.request +import urllib.error +from pathlib import Path + +GITHUB_BASE = "https://github.com/DataDog/dd-trace-php" + + +class ProxyHandler(http.server.SimpleHTTPRequestHandler): + """Serves local files; proxies misses to GitHub.""" + + serve_dir: str = "." + + def __init__(self, *args, **kwargs): + super().__init__(*args, directory=self.serve_dir, **kwargs) + + def log_message(self, format, *args): + sys.stderr.write(f"[http] {format % args}\n") + + def do_GET(self): + if os.path.isfile(self.translate_path(self.path)): + return super().do_GET() + self._proxy("GET") + + def do_HEAD(self): + if os.path.isfile(self.translate_path(self.path)): + return super().do_HEAD() + self._proxy("HEAD") + + def _proxy(self, method: str): + url = GITHUB_BASE + self.path + self.log_message("Proxying %s -> %s", method, url) + try: + req = urllib.request.Request(url, method=method) + with urllib.request.urlopen(req, timeout=120) as resp: + self.send_response(resp.status) + for h in ("Content-Type", "Content-Length", + "Content-Disposition"): + v = resp.getheader(h) + if v: + self.send_header(h, v) + self.end_headers() + if method != "HEAD": + while True: + chunk = resp.read(65536) + if not chunk: + break + self.wfile.write(chunk) + except urllib.error.HTTPError as e: + self.send_error(e.code, str(e)) + except Exception as e: + self.send_error(502, str(e)) + + +def main(): + parser = argparse.ArgumentParser( + description="HTTP server for local installer tests. " + "Serves local files, proxies misses to GitHub.", + ) + parser.add_argument( + "directory", type=Path, + help="Directory to serve (must contain releases/download/...)", + ) + parser.add_argument( + "--port", type=int, default=8888, + help="Port to listen on (default: 8888)", + ) + args = parser.parse_args() + + if not args.directory.is_dir(): + print(f"Error: {args.directory} is not a directory", file=sys.stderr) + sys.exit(1) + + ProxyHandler.serve_dir = str(args.directory.resolve()) + server = http.server.HTTPServer(("0.0.0.0", args.port), ProxyHandler) + print(f"Serving {args.directory.resolve()} on port {args.port}") + print(f" Misses proxied to {GITHUB_BASE}") + print(f" Ctrl+C to stop") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopped.") + + +if __name__ == "__main__": + main() diff --git a/.claude/ci/shared-zai-tea-tests.md b/.claude/ci/shared-zai-tea-tests.md new file mode 100644 index 00000000000..567d5692d64 --- /dev/null +++ b/.claude/ci/shared-zai-tea-tests.md @@ -0,0 +1,283 @@ +# Shared Pipeline — ZAI, TEA, and C Components Tests + +## CI Jobs + +**Source:** `.gitlab/generate-shared.php` — generates the shared-trigger +child pipeline; all job definitions and matrices are inline. + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `Build & Test Tea` | `dd-trace-ci:php-{ver}_bookworm-6` | Builds the TEA (Test Execution Abstraction) library from `tea/`, runs its ctest suite, installs artifacts for downstream jobs | +| `Zend Abstract Interface Tests: [{ver}, {variant}]` | `dd-trace-ci:php-{ver}_bookworm-6` | Builds and tests the ZAI library (`zend_abstract_interface/`) against a specific PHP variant | +| `Extension Tea Tests: [{ver}, {variant}]` | `dd-trace-ci:php-{ver}_bookworm-6` | Builds ddtrace.so via `make install`, then builds and runs the extension-level TEA tests in `tests/tea/` | +| `ZAI Shared Tests: [{ver}]` | `dd-trace-ci:php-{ver}-shared-ext` | Runs ZAI tests with shared extensions (curl, json) on a special image; only PHP 7.4 and 8.0 | +| `C components ASAN` | `dd-trace-ci:centos-7`, `dd-trace-ci:php-compile-extension-alpine`, `dd-trace-ci:bookworm-6` | Builds C components (`components/`) with ASAN (on Debian) or plain Debug (on CentOS/Alpine), runs ctest | +| `C components UBSAN` | `dd-trace-ci:bookworm-6` | Builds C components with UBSAN, runs ctest with `--repeat until-fail:10` | +| `Configuration Consistency` | `dd-trace-ci:php-{latest}_bookworm-6` | Runs `tooling/generate-supported-configurations.sh` and verifies `metadata/supported-configurations.json` is up-to-date | + +Runner: `arch:amd64` (all jobs in this pipeline are amd64-only) + +Matrix: +- **Build & Test Tea**: PHP 7.0+ x {debug, debug-zts-asan (7.4+), + nts, zts}. Pre-7.4 versions skip `debug-zts-asan` and use + `debug-zts` instead. +- **ZAI Tests**: same matrix as TEA, plus UBSAN toolchain for `debug` + variant on PHP 7.4+. +- **Extension Tea Tests**: PHP 7.0+ x {debug, debug-zts-asan + (7.4+), nts, zts}. Pre-7.4 skips `debug-zts-asan`. +- **ZAI Shared Tests**: PHP 7.4, 8.0 only, `nts` variant only. +- **C components ASAN**: three images (centos-7, alpine, bookworm-6); + ASAN toolchain only on Debian (bookworm). +- **C components UBSAN**: bookworm-6 only. +- **Configuration Consistency**: latest PHP version, single run. + +## What It Tests + +**TEA** (`tea/`) is a small C library that wraps PHP's Zend Engine for +test scaffolding. `Build & Test Tea` compiles it with cmake, runs its +own tests, and installs it to `tmp/tea/{variant}/` so downstream jobs +can reference it as `Tea_ROOT`. + +**ZAI** (`zend_abstract_interface/`) contains C abstractions over Zend +internals (config, sandbox, interceptor, etc.). The ZAI tests link +against the TEA artifacts and exercise each ZAI component. The ASAN +variant uses `cmake/asan.cmake` and the UBSAN variant uses +`cmake/ubsan.cmake`. + +**Extension Tea Tests** (`tests/tea/`) test ddtrace extension internals +using the TEA framework. They first build ddtrace.so (`make install`) +then build the cmake project in `tests/tea/`. + +**ZAI Shared Tests** run on a special image (`php-{ver}-shared-ext`) +where PHP extensions like curl are shared (.so) rather than built-in. +This tests that ZAI works correctly when extensions are loaded via +`extension=curl.so`. Uses `-DRUN_SHARED_EXTS_TESTS=1` and +`TEA_INI_IGNORE=0`. + +**C components** (`components/`) are standalone C modules tested with +Catch2. ASAN and UBSAN runs detect memory errors and undefined +behavior respectively. + +**Configuration Consistency** verifies that the checked-in +`metadata/supported-configurations.json` matches what the generator +script produces from current source. Fails if they diverge. + +## Build & Test Tea + +### Full suite + +```bash +.claude/ci/dockerh --cache tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +mkdir -p tmp/build-tea-debug +cd tmp/build-tea-debug +CMAKE_PREFIX_PATH=/opt/catch2 cmake \ + -DCMAKE_INSTALL_PREFIX=../../tmp/tea/debug \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TEA_TESTING=ON \ + ../../tea +make -j$(nproc) all +make install +make test ARGS="--output-on-failure" +' +``` + +Replace `8.3` and `debug` with the desired PHP version and variant. +For ASAN, add `-DCMAKE_TOOLCHAIN_FILE=../../cmake/asan.cmake` and use +`--php debug-zts-asan` with a separate cache name. + +### Single test + +```bash +.claude/ci/dockerh --cache tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +cd tmp/build-tea-debug +make test ARGS="--output-on-failure -R tea_sapi" +' +``` + +The `-R` flag is a ctest regex filter on test names. Use +`ctest --test-dir tmp/build-tea-debug -N` to list available tests. + +## Zend Abstract Interface Tests + +`--overlayfs` is required: the ZAI cmake build links `components_rs`, +which triggers a cargo build of `libdd-libunwind-sys`. That crate's +`build.rs` decided that running `git submodule update --init` from +inside a build script was a reasonable thing to do, so it writes into +`.git/modules/` — which fails on a read-only mount. + +### Full suite + +Requires TEA artifacts from the previous step to exist at +`tmp/tea/{variant}/`. + +```bash +.claude/ci/dockerh --cache tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +mkdir -p tmp/build_zai && cd tmp/build_zai +CMAKE_PREFIX_PATH=/opt/catch2 Tea_ROOT=../../tmp/tea/debug \ + cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_ZAI_TESTING=ON \ + -DPhpConfig_ROOT=$(php-config --prefix) \ + ../../zend_abstract_interface +make -j$(nproc) all +make test ARGS="--output-on-failure" +grep -e "=== Total [0-9]+ memory leaks detected ===" \ + Testing/Temporary/LastTest.log && exit 1 || true +' +``` + +For ASAN variant: add `-DCMAKE_TOOLCHAIN_FILE=../../cmake/asan.cmake`, +use `--php debug-zts-asan`, and a separate cache name. + +### Single test + +```bash +.claude/ci/dockerh --cache tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +cd tmp/build_zai +ctest --output-on-failure -R config +' +``` + +## Extension Tea Tests + +### Full suite + +Requires TEA artifacts at `tmp/tea/{variant}/`. + +```bash +.claude/ci/dockerh --cache ext-tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make install +mkdir -p tmp/build_ext-tea && cd tmp/build_ext-tea +CMAKE_PREFIX_PATH=/opt/catch2 Tea_ROOT=../../tmp/tea/debug \ + cmake -DCMAKE_BUILD_TYPE=Debug -S ../../tests/tea +cmake --build . --parallel +make test ARGS="--output-on-failure" +grep -e "=== Total [0-9]+ memory leaks detected ===" \ + Testing/Temporary/LastTest.log && exit 1 || true +' +``` + +`--overlayfs` is needed because `make install` and cmake write into the +source tree (see [index.md](index.md) for details). + +### Single test + +```bash +.claude/ci/dockerh --cache ext-tea-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +cd tmp/build_ext-tea +ctest --output-on-failure -R "" +' +``` + +## C Components ASAN / UBSAN + +### Full suite (ASAN, bookworm) + +```bash +.claude/ci/dockerh --cache components-asan \ + datadog/dd-trace-ci:bookworm-6 -- bash -c ' +set -e +mkdir -p tmp/build_php_components_asan && cd tmp/build_php_components_asan +CMAKE_PREFIX_PATH=/opt/catch2 cmake \ + -DCMAKE_TOOLCHAIN_FILE=../../cmake/asan.cmake \ + -DCMAKE_BUILD_TYPE=Debug -DDATADOG_PHP_TESTING=ON \ + ../../components +make -j$(nproc) all +make test ARGS="--output-on-failure" +' +``` + +For UBSAN, replace `asan` with `ubsan` in the toolchain file and +directory name. UBSAN in CI runs with `--repeat until-fail:10` to +catch non-deterministic issues (mainly in the channel component). + +### Full suite (CentOS / Alpine) + +On CentOS-7 and Alpine images there is no ASAN toolchain file, so +cmake runs without `-DCMAKE_TOOLCHAIN_FILE`: + +```bash +.claude/ci/dockerh --cache components-centos \ + datadog/dd-trace-ci:centos-7 -- bash -c ' +set -e +if [ -f "/opt/libuv/lib/pkgconfig/libuv.pc" ]; then + export PKG_CONFIG_PATH="/opt/libuv/lib/pkgconfig:$PKG_CONFIG_PATH" +fi +if [ -d "/opt/catch2" ]; then + export CMAKE_PREFIX_PATH=/opt/catch2 +fi +mkdir -p tmp/build_php_components_asan && cd tmp/build_php_components_asan +cmake -DCMAKE_BUILD_TYPE=Debug -DDATADOG_PHP_TESTING=ON ../../components +make -j$(nproc) all +make test ARGS="--output-on-failure" +' +``` + +Replace `centos-7` with `php-compile-extension-alpine` for the Alpine +variant. + +### Single test + +```bash +cd tmp/build_php_components_asan +ctest --output-on-failure -R "" +``` + +## Configuration Consistency + +This job is quick and has no build step. It is unlikely to require +local reproduction, but if needed: + +```bash +.claude/ci/dockerh --cache config-consistency --overlayfs \ + --php nts \ + datadog/dd-trace-ci:php-8.5_bookworm-6 -- bash -c ' +bash tooling/generate-supported-configurations.sh +' +``` + +The script writes `ext/version.h` as a side effect; `--overlayfs` +absorbs this into the overlay volume. + +If the output differs from the committed +`metadata/supported-configurations.json`, the CI job fails. Fix by +running the script locally and committing the result. + +## Gotchas + +- **TEA must be built before ZAI or Extension Tea Tests, using the same `--cache` name.** + Each `dockerh` cache gets its own `tmp/` overlay. If the ZAI command uses a different + `--cache` than the TEA build, `tmp/tea/debug/` will be empty and cmake fails with + `Could not find a package configuration file provided by "Tea"`. The examples above + all use `--cache tea-8.3-debug` for both TEA and ZAI. + +- **The `--php` variant must match the TEA variant.** TEA artifacts + at `tmp/tea/debug/` are built against the debug PHP ABI. Using them + with `--php nts` (or vice versa) will produce link or runtime + errors. + +- **ZAI Shared Tests use a different image** (`php-{ver}-shared-ext`) + that is not available for all PHP versions. Only 7.4 and 8.0 are + tested. This image is not easily reproducible locally since the + shared-ext images are custom CI builds. + +- **C components tests do not require PHP.** The `bookworm-6` base + image (no PHP version suffix) is sufficient. The centos-7 and alpine + images need the `PKG_CONFIG_PATH` / `CMAKE_PREFIX_PATH` env vars + for libuv and Catch2 respectively. + +- **UBSAN test repeats are intentional.** The `--repeat until-fail:10` + flag in CI catches non-deterministic UB in the channel component. + Locally you can drop it for faster iteration. + +- **Memory leak grep.** Both TEA and ZAI jobs grep `LastTest.log` for + `=== Total [0-9]+ memory leaks detected ===` and fail if found. + This catches PHP-level memory leaks that ctest itself does not + treat as failures. diff --git a/.claude/ci/system-tests-onboarding.md b/.claude/ci/system-tests-onboarding.md new file mode 100644 index 00000000000..617fb62542d --- /dev/null +++ b/.claude/ci/system-tests-onboarding.md @@ -0,0 +1,408 @@ +# Onboarding / SSI System Tests + +These tests validate Single-Step Instrumentation (SSI) and library injection on +real VMs or Docker containers. They live in a separate repository +(`datadog/system-tests`) and are orchestrated by the `one-pipeline` shared +template included via `.gitlab/one-pipeline.locked.yml`. The dd-trace-php CI +only configures which scenario groups to run. + +Unlike other CI job groups, these tests **cannot be reproduced with `dockerh`**. +They require either AWS infrastructure or (with severe limitations) Vagrant. + +## CI Jobs + +**Source:** +- `.gitlab/generate-package.php` -- defines `configure_system_tests` (lines 114-117) +- `.gitlab/one-pipeline.locked.yml` -- shared template that expands + `configure_system_tests` into the actual child jobs +- `system-tests` repo (`~/repos/system-tests`) -- test runner, scenarios, and + weblog definitions + +The `configure_system_tests` job in `generate-package.php` sets: + +```yaml +configure_system_tests: + variables: + SYSTEM_TESTS_SCENARIOS_GROUPS: "simple_onboarding,simple_onboarding_profiling,simple_onboarding_appsec,lib-injection,lib-injection-profiling,docker-ssi" + ALLOW_MULTIPLE_CHILD_LEVELS: "false" +``` + +The shared template expands each scenario group into jobs that: +1. Build the tracer OCI image from the pipeline's packaging artifacts +2. Spin up AWS EC2 instances (or Docker containers for `docker-ssi`) +3. Install the Datadog Agent + library injector + tracer via SSI +4. Run the weblog application and validate traces against the Datadog backend + +| Scenario group | What it tests | +|---|---| +| `simple_onboarding` | Basic SSI auto-injection on various OS/arch combinations | +| `simple_onboarding_profiling` | SSI with continuous profiling enabled | +| `simple_onboarding_appsec` | SSI with AppSec enabled | +| `lib-injection` | Kubernetes lib-injection (init container injection) | +| `lib-injection-profiling` | Kubernetes lib-injection with profiling | +| `docker-ssi` | Docker-based SSI (no VM, uses Docker-in-Docker) | + +Runner: `docker-in-docker:amd64` (all scenario groups) +Image: `docker:24.0.4-gbi-focal` + +### CI secrets + +CI fetches secrets from AWS SSM (parameter store). The relevant parameters: + +| SSM parameter | Maps to env var | +|---|---| +| `ci.dd-trace-php.dd-api-key-onboarding` | `DD_API_KEY_ONBOARDING` | +| `ci.dd-trace-php.dd-app-key-onboarding` | `DD_APP_KEY_ONBOARDING` | +| `ci.dd-trace-php.onboarding-aws-infra-subnet-id` | `ONBOARDING_AWS_INFRA_SUBNET_ID` | +| `ci.dd-trace-php.onboarding-aws-infra-security-groups-id` | `ONBOARDING_AWS_INFRA_SECURITY_GROUPS_ID` | + +Backend validation hits the production `system-tests` Datadog organization at +`dd.datadoghq.com`. The API/APP keys come from that org. + +## What It Tests + +The onboarding tests verify that the full SSI installation flow works +end-to-end: the Datadog Agent installs correctly, the injector injects the +tracer into the PHP process, and traces arrive at the Datadog backend. This +covers: + +- Package installation via the Datadog installer (`install.datad0g.com` for dev, + `install.datadoghq.com` for prod) +- Auto-injection of the PHP tracer into Apache/FPM/CLI processes +- Correct trace submission to the Agent and then to the backend +- Profiling and AppSec activation when those scenario groups are selected + +## Local Reproduction: AWS (recommended) + +This is the faithful reproduction path -- it uses the same infrastructure as CI. + +### Prerequisites + +1. **AWS access** to the `dev-apm-dcs-system-tests` account. Request access via: + + +2. **Pulumi >= 3.69.0** installed and logged in locally: + ```bash + pulumi login --local + ``` + +3. **aws-vault** installed and the + `sso-dev-apm-dcs-system-tests-account-admin` SSO profile configured + in `~/.aws/config`. This specific account is required — it has the + AMI mappings, IAM instance profiles, and VPC networking that the + tests expect. Generic sandbox accounts (e.g. + `k9-security-ecosystems-sandbox`) will fail with + `collecting instance settings: empty result`. + + Request access via: + + + Authenticate via SSO, then verify: + ```bash + aws sso login --profile sso-dev-apm-dcs-system-tests-account-admin + aws-vault exec sso-dev-apm-dcs-system-tests-account-admin -- \ + aws sts get-caller-identity + ``` + See the AWS SSO setup guide: + + +4. **system-tests repo** cloned at `~/repos/system-tests` (sibling of + `dd-trace-php`). + +5. Build the runner image (installs Python deps + Pulumi providers): + ```bash + cd ~/repos/system-tests + ./build.sh -i runner + ``` + +### Required environment variables + +```bash +export DD_API_KEY_ONBOARDING= +export DD_APP_KEY_ONBOARDING= +export ONBOARDING_AWS_INFRA_SUBNET_ID=subnet-0597477128c3d3a6b +export ONBOARDING_AWS_INFRA_SECURITY_GROUPS_ID=sg-02e547f03cf2b5955 +export ONBOARDING_LOCAL_TEST=true +export SKIP_AMI_CACHE=true +export PULUMI_CONFIG_PASSPHRASE="" +``` + +The API/APP keys are from the system-tests Datadog organization: + + +The subnet and security group defaults above are documented in the wizard and +should work for the `dev-apm-dcs-system-tests` account. + +### Running a test + +```bash +cd ~/repos/system-tests + +# Simple onboarding scenario, PHP 8.3 container weblog, dev env, Amazon Linux 2023 +aws-vault exec sso-dev-apm-dcs-system-tests-account-admin -- \ + ./run.sh SIMPLE_INSTALLER_AUTO_INJECTION \ + --vm-weblog test-app-php-container-83 \ + --vm-env dev \ + --vm-library php \ + --vm-provider aws \ + --vm-only Amazon_Linux_2023_amd64 +``` + +Key flags: +- `--vm-env dev` -- uses `install.datad0g.com` (dev snapshots); use `prod` for + released versions +- `--vm-provider aws` -- provisions real EC2 instances via Pulumi +- `--vm-only ` -- restricts to a single VM; without it, all VMs in the + matrix run (slow and expensive) +- `--vm-weblog ` -- selects the PHP weblog variant + +To test a custom tracer build from your pipeline: +```bash +export DD_INSTALLER_LIBRARY_VERSION="pipeline-" +``` + +### Using the wizard (interactive) + +The system-tests repo provides an interactive wizard that prompts for all +variables and builds the `run.sh` command: + +```bash +cd ~/repos/system-tests +./build.sh -i runner +source venv/bin/activate +bash utils/scripts/ssi_wizards/aws_onboarding_wizard.sh +``` + +The wizard will prompt for AWS credentials, scenario, weblog, VM, and +environment, then offer to execute the final command. + +### Keeping VMs alive for debugging + +```bash +export ONBOARDING_KEEP_VMS=true +``` + +When set, VMs are not destroyed after the test. You can SSH into them to +inspect logs. Remember to destroy the Pulumi stack manually when done: + +```bash +aws-vault exec sso-dev-apm-dcs-system-tests-account-admin -- pulumi destroy +``` + +## Local Reproduction: Vagrant (limited -- not recommended) + +Vagrant replaces AWS with local VMs. Replace `--vm-provider aws` with +`--vm-provider vagrant` in the `run.sh` invocation. This avoids the need for +AWS credentials and infrastructure. + +**This path has significant limitations and most tests will not pass.** + +### Why Vagrant does not work well + +1. **No real Amazon Linux 2023.** Vagrant maps `Amazon_Linux_2023_amd64` to + `generic/centos9s` (CentOS 9 Stream), which is not the same OS. Package + repositories, default packages, and system behavior differ. + +2. **`podman-docker` instead of Docker.** On CentOS 9, `yum install docker` + installs the `podman-docker` shim, not the actual Docker daemon. + `docker-compose` cannot connect to `/var/run/docker.sock`, so container + weblogs never start. + +3. **IPv6 resolution failures under QEMU.** QEMU's user-mode networking only + supports IPv4. Go programs (like the Datadog installer) bypass `gai.conf` + and attempt IPv6 resolution for `install.datad0g.com`, causing connection + failures. + +4. **Fabric API breakage.** The Vagrant code path uses Fabric 1.x syntax + (`from fabric.api import ...`), but the current `system-tests` virtualenv + installs Fabric 3.x, which removed `fabric.api`. + +5. **Bugs in VM provisioning code.** `utils/virtual_machine/virtual_machines.py` + has issues at lines 277/285: dict key access without `.get()` and + `self.name` vs `self.vm.name` mismatches that cause `KeyError` or + `AttributeError`. + +6. **Backend 401 errors.** Even if the VM starts, backend validation requires + valid `DD_API_KEY_ONBOARDING` / `DD_APP_KEY_ONBOARDING` pointing at the + system-tests Datadog org. Without these, trace validation fails with 401. + +**Net result:** the container weblog never starts on most VM types, and even +with workarounds (enable podman socket, symlink, disable IPv6 at kernel level), +backend validation still fails without the API keys. + +## Docker-SSI scenarios + +The `docker-ssi` scenario group does not use VMs. It runs entirely in +Docker and **can be reproduced locally** without AWS access. + +### Prerequisites + +1. **system-tests repo** cloned (e.g. `~/repos/system-tests`). +2. **Python venv** with system-tests installed: + ```bash + cd ~/repos/system-tests + uv venv venv --python 3.12 + source venv/bin/activate + uv pip install --upgrade pip setuptools==75.8.0 + uv pip install -e . + ``` + (Or use `./build.sh -i runner` if you have a system python3.12 + with venv/ensurepip support.) + Both approaches are cached — re-run only after pulling new commits + in the system-tests repo. +3. **Docker** available on the machine. +4. `DD_API_KEY_ONBOARDING` and `DD_APP_KEY_ONBOARDING` env vars. + Docker-SSI tests validate traces against a local test agent, + not the Datadog backend — any non-empty value works (e.g. + `export DD_API_KEY_ONBOARDING=deadbeef`). The AWS VM scenarios + require real keys from the system-tests Datadog org. + +### Non-interactive run + +Use `./run.sh` directly. The `--ssi-base-image` flag takes the +**Docker image name** (not the friendly name from the JSON matrix). +Resolve names via `utils/docker_ssi/docker_ssi_images.json`. + +```bash +cd ~/repos/system-tests +source venv/bin/activate + +# PHP docker-ssi on Ubuntu 22.04 amd64, prod env +DD_API_KEY_ONBOARDING= DD_APP_KEY_ONBOARDING= \ + ./run.sh DOCKER_SSI \ + --ssi-weblog php-app \ + --ssi-library php \ + --ssi-base-image 'public.ecr.aws/lts/ubuntu:22.04' \ + --ssi-arch linux/amd64 \ + --ssi-env prod + +# With AppSec enabled +DD_API_KEY_ONBOARDING= DD_APP_KEY_ONBOARDING= \ + ./run.sh DOCKER_SSI_APPSEC \ + --ssi-weblog php-app \ + --ssi-library php \ + --ssi-base-image 'public.ecr.aws/lts/ubuntu:22.04' \ + --ssi-arch linux/amd64 \ + --ssi-env prod +``` + +To test a specific PHP runtime version (matching the CI matrix): +```bash +./run.sh DOCKER_SSI \ + --ssi-weblog php-app \ + --ssi-library php \ + --ssi-base-image 'public.ecr.aws/lts/ubuntu:22.04' \ + --ssi-arch linux/amd64 \ + --ssi-env prod \ + --ssi-installable-runtime 8.3 +``` + +Without `--ssi-installable-runtime`, no specific PHP version is +installed — the base image's system PHP is used (PHP 8.1 on Ubuntu +22.04). In CI, every version from 5.6 to 8.3 runs as a separate +matrix cell (from `utils/docker_ssi/docker_ssi_runtimes.json`). + +Available PHP scenarios: `DOCKER_SSI`, `DOCKER_SSI_APPSEC`. +`DOCKER_SSI_APPSEC` does **not** exercise AppSec attack detection or +WAF rules — it only verifies that `DD_APPSEC_ENABLED=true` is +propagated through SSI injection and reported in telemetry. If +`DOCKER_SSI` passes but `DOCKER_SSI_APPSEC` fails for the same +runtime, the issue is in how the installer handles +`DD_APPSEC_ENABLED`, not in AppSec logic. + +Available PHP weblogs: `php-app`. +Available base images for PHP: `Ubuntu_22_amd64` (`public.ecr.aws/lts/ubuntu:22.04`), +`Ubuntu_22_arm64` (same image, `linux/arm64`). + +Additional flags: +- `-B` / `--ssi-force-build` — force rebuild all Docker layers + (skip local cache). Only needed when debugging install scripts or + testing changes to the Dockerfile chain. +- `--ssi-installable-runtime ` — install a specific PHP + runtime version. Available: 5.6, 7.0–7.4, 8.0–8.3. + +Use `--ssi-env dev` to test development snapshots (from +`install.datad0g.com`), or `--ssi-env prod` for released versions. + +To test a custom tracer build from a CI pipeline: +```bash +./run.sh DOCKER_SSI ... --ssi-library-version pipeline- +``` + +SSI tests (both Docker-SSI and VM-based) always install the tracer +from the OCI registry — there is no way to inject a locally built +`.tar.gz`. To test local changes via SSI, push a branch and use +`--ssi-library-version pipeline-`. + +To test locally built packages without SSI (via the traditional +`datadog-setup.php` install path), see +[system-tests.md](system-tests.md) § Reproducing Locally. Those +tests exercise the same tracer/appsec code but do not test the SSI +injection mechanism itself. + +### Interactive wizard + +Alternatively, use the interactive wizard: + +```bash +cd ~/repos/system-tests +source venv/bin/activate +bash utils/scripts/ssi_wizards/docker_ssi_wizard.sh +``` + +The wizard prompts for language, scenario, weblog, and base image, +then constructs and runs the `./run.sh` command. + +## Gotchas + +- The `one-pipeline` shared template is fetched from a remote URL locked in + `.gitlab/one-pipeline.locked.yml`. The actual job definitions (runner tags, + script steps, secret mappings) are not visible in the dd-trace-php repo -- + you must look at the `system-tests` repo and the shared template to + understand what runs. + +- `--vm-env dev` uses `install.datad0g.com` (development package repository), + while `--vm-env prod` uses `install.datadoghq.com` (production). When + testing unreleased changes, always use `dev`. + +- `SKIP_AMI_CACHE=true` is required for local runs. Without it, the test + framework tries to look up cached AMIs that only exist in CI. + +- `ONBOARDING_LOCAL_TEST=true` adjusts behavior for local execution (e.g., + skipping CI-specific artifact paths). + +- EC2 instances cost money. Always use `--vm-only` to restrict to a single VM + when iterating. If using `ONBOARDING_KEEP_VMS=true`, destroy the stack when + done. + +- The subnet and security group values are specific to the + `dev-apm-dcs-system-tests` AWS account. Using a different account requires + different networking values. + +- `--ssi-base-image` takes the **Docker image name** (e.g. + `public.ecr.aws/lts/ubuntu:22.04`), not the friendly name from the + JSON matrix (e.g. `Ubuntu_22_amd64`). Passing the friendly name + causes `invalid reference format: repository name must be lowercase`. + Look up the mapping in `utils/docker_ssi/docker_ssi_images.json`. + +- Building system-tests venv requires `g++` (for the `brotli` C++ + extension used by `mitmproxy`). If `g++` is not available, install + it before running `uv pip install -e .`. + +- The CI matrix for Docker-SSI PHP runs **every PHP version + (5.6–8.3) × every base image × every scenario** as separate + parallel jobs. For `php-app` on Ubuntu 22.04, that is 10 runtime + versions × 2 architectures × 2 scenarios = 40 jobs. Use + `--ssi-installable-runtime` to test a single version locally. + +- First local Docker-SSI run is slow (~5–7 min) due to Docker image + builds (OS deps, PHP runtime, SSI installer). Subsequent runs + reuse the Docker cache and take ~1–2 min. The CI ECR image cache + (`PRIVATE_DOCKER_REGISTRY`) is not available locally. + +- On Apple Silicon, Docker-SSI scenarios may need + `DOCKER_DEFAULT_PLATFORM=linux/amd64` to match CI behavior. + +- After a Docker-SSI run, logs are in `logs_docker_ssi/` (or + `logs_docker_ssi_appsec/`) under the system-tests directory. + The scenario name is lowercased by the framework. diff --git a/.claude/ci/system-tests.md b/.claude/ci/system-tests.md new file mode 100644 index 00000000000..70f0d91e342 --- /dev/null +++ b/.claude/ci/system-tests.md @@ -0,0 +1,313 @@ +# System Tests + +## CI Jobs + +**Source:** `.gitlab/generate-package.php` -- the `.system_tests` template and +individual job definitions immediately below it. The template defines the base +image, `before_script` (Docker + Python setup, clone of `system-tests` repo, +artifact placement, `./build.sh`), and artifact collection. Each concrete job +extends this template and runs `./run.sh` with the appropriate scenario. + +| CI Job | Scenario argument | What it does | +|--------|-------------------|--------------| +| `System Tests: [default]` | *(none -- default scenario)* | Core tracer + AppSec default scenario | +| `System Tests: [APPSEC_API_SECURITY]` | `APPSEC_API_SECURITY` | API Security with schema types | +| `System Tests: [APPSEC_API_SECURITY_RC]` | `APPSEC_API_SECURITY_RC` | API Security remote config | +| `System Tests: [APPSEC_API_SECURITY_NO_RESPONSE_BODY]` | `APPSEC_API_SECURITY_NO_RESPONSE_BODY` | API Security without response body | +| `System Tests: [INTEGRATIONS]` | `INTEGRATIONS` | Library integrations scenario | +| `System Tests: [CROSSED_TRACING_LIBRARIES]` | `CROSSED_TRACING_LIBRARIES` | Cross-library distributed tracing | +| `System Tests: [parametric]` | `PARAMETRIC` | Parametric tests (language-agnostic API conformance) | +| `System Tests: [tracer-release]` | *(dynamic)* | Tracer release scenarios; master/scheduled only, 4h timeout | + +Runner: `docker-in-docker:amd64` +Image: `python:3.12-slim-bullseye` (the job itself installs Docker +inside the container) + +The `System Tests` job (the matrix one) uses `parallel: matrix:` to +expand into five parallel jobs, one per `TESTSUITE` value. The +`[default]` and `[parametric]` jobs are separate definitions. + +### Upstream dependencies (CI only) + +All system-tests jobs `needs:` three upstream jobs: + +1. `package extension: [amd64, x86_64-unknown-linux-gnu]` -- produces + `packages/dd-library-php-*-x86_64-linux-gnu.tar.gz` +2. `datadog-setup.php` -- produces `packages/datadog-setup.php` +3. `prepare code` -- runs `composer update` + `make generate` + +The `before_script` moves `datadog-setup.php` and the `.tar.gz` into +`system-tests/binaries/` before calling `./build.sh php`. + +The `[parametric]` job overrides `BUILD_SH_ARGS` to `-i runner` instead +of the default `php`. + +### Not covered here + +Onboarding / SSI system tests (`configure_system_tests` job, which sets +`SYSTEM_TESTS_SCENARIOS_GROUPS` to onboarding/SSI groups) are documented +separately in `system-tests-onboarding.md`. Those jobs require AWS +credentials; the jobs listed above do not. + +## What It Tests + +The `system-tests` framework (https://github.com/DataDog/system-tests) +is a cross-language end-to-end test suite. It spins up Docker containers +(a "weblog" app built from the library under test, a mock Datadog Agent, +and various backend services) and runs pytest scenarios against them. + +For PHP, `./build.sh php` builds a Docker image that installs the PHP +package (via `datadog-setup.php` + the tarball placed in `binaries/`) +into a weblog app. `./run.sh ` then exercises that image. + +## Reproducing Locally + +### Prerequisites + +- **Docker with buildx:** system-tests uses `docker buildx build`. If + `docker buildx version` fails, install the plugin: + ```bash + mkdir -p ~/.docker/cli-plugins + curl -sSL "https://github.com/docker/buildx/releases/latest/download/buildx-$(uname -s | tr A-Z a-z)-amd64" \ + -o ~/.docker/cli-plugins/docker-buildx + chmod +x ~/.docker/cli-plugins/docker-buildx + ``` +- **Python 3.12:** Required by the system-tests runner. If installed + via `uv`, `build.sh` may fail at "Build virtual env" because the + `EXTERNALLY-MANAGED` marker blocks `ensurepip`. Workaround: create + the venv manually before running `build.sh`: + ```bash + cd system-tests + uv venv venv --python python3.12 --seed + ``` + +### 1. Build the PHP package locally + +The system tests need two artifacts in the `binaries/` directory of the +`system-tests` checkout: + +- `datadog-setup.php` +- `dd-library-php--x86_64-linux-gnu.tar.gz` + +Build them from the working tree. Before starting, ensure submodules are +initialised (see +[building-locally.md](building-locally.md#submodule-initialisation)). The build +has three parts: tracing extension, appsec components, and tarball assembly. +See [building-locally.md](building-locally.md) for all build information. In +particular, see the section "Slim package with debug binaries". The +alternative of generating/downloading ALL the binaries (full all +versions/variants) and invoking `generate-final-artifact.sh` is possible, but +strongly discouraged locally, even if CI does it. + +**Weblog PHP version:** the default weblog (`apache-mod-8.0`) may not be supported by +newer working-tree branches. Use `WEBLOG_VARIANT=apache-mod-8.2` if the default build +fails with "not supported". `apache-mod` variants stop at 8.2; for PHP 8.5 +use `WEBLOG_VARIANT=php-fpm-8.5` (there is no 8.3 or 8.4 weblog). + +**The weblog variant must match the PHP version you build.** Available +weblogs: `apache-mod-7.0` through `apache-mod-8.2`, and `php-fpm-7.0` +through `php-fpm-8.2` plus `php-fpm-8.5`. ZTS variants also exist +(`apache-mod-7.0-zts` through `apache-mod-8.2-zts`). + +```bash +WEBLOG_VARIANT=apache-mod-8.2 ./build.sh php +WEBLOG_VARIANT=apache-mod-8.2 ./run.sh + +# PHP 8.4 / 8.5 — fpm only +WEBLOG_VARIANT=php-fpm-8.5 ./build.sh php +WEBLOG_VARIANT=php-fpm-8.5 ./run.sh +``` + +### Alternative: place only the .so files you want to test + +`install_ddtrace.sh` in system-tests supports `.so` overrides: when no +`dd-library-php-*.tar.gz` is present it downloads the **latest released package** +from GitHub as the base, then replaces installed files with any `.so` files it finds +in `binaries/`. + +| File in `binaries/` | What it replaces | +|---------------------|-----------------| +| `ddtrace.so` | The installed `ddtrace.so` (searched under `/root`, `/opt`, `/usr/lib/php`) | +| `ddappsec.so` + `libddappsec-helper.so` | Both required together; replaces appsec extension and C++ helper | +| `libddappsec-helper-rust.so` | Placed alongside the C++ helper (enables `DD_APPSEC_HELPER_RUST_REDIRECTION`) | +| `libddwaf.so` | Placed alongside the C++ helper | + +**Hard constraints — this approach only works if all three hold:** + +1. **GLIBC compatibility.** The `.so` built by the normal `compile extension` CI job + (bookworm image) requires GLIBC_2.34. The default weblog (`apache-mod-8.0`) runs on + Debian Bullseye (GLIBC_2.31) — the extension loads but immediately crashes with + `GLIBC_2.32 not found`. You must either build with a lower-glibc toolchain (the + package pipeline's centos-7 image targets GLIBC_2.17) or use a weblog with a newer + base OS. + +2. **PHP ABI match.** The `.so` must be compiled for the same PHP version as the weblog. + PHP ABIs are not cross-compatible. The default weblog uses PHP 8.0 (ABI `20200930`). + +3. **Weblog install path.** `install_ddtrace.sh` searches `find /root /opt /usr/lib/php` + for the installed extension. `apache-mod-*` weblogs (Debian-based) install under + `/root/php/...`; `php-fpm-*` weblogs (Ubuntu + `ondrej/php` PPA) install under + `/usr/lib/php//`. Both are covered. + +**Summary:** in practice this approach is harder than it looks. The full-package path +(section 1) is more reliable. The `.so` override is most useful when you already have a +package-pipeline–built artifact (centos-7 compiled, GLIBC_2.17) and want to swap one +component without reassembling the full tarball. + +**Caveats:** +- The base package comes from the GitHub **latest release**. Files it provides + (`recommended.json`, `.ini` config, other extensions) reflect that released version. +- `compile_extension.sh` requires CI env vars (`CI_COMMIT_SHA`, `CI_COMMIT_BRANCH`). + Workaround: pass `-e CI_COMMIT_TAG=local-build` to the docker run so + `append-build-id.sh` exits early. + +### 2. Clone system-tests and place artifacts + +```bash +git clone https://github.com/DataDog/system-tests.git +mkdir -p system-tests/binaries + +# From a .tar.gz package (full or slim): +cp dd-library-php-*-linux-gnu.tar.gz system-tests/binaries/ +cp datadog-setup.php system-tests/binaries/ +``` + +### 3. Build the weblog image + +```bash +cd system-tests + +# For all jobs except [parametric]: +./build.sh php + +# For [parametric]: +TEST_LIBRARY=php ./build.sh -i runner +``` + +### 4. Run scenarios + +```bash +# Default scenario (System Tests: [default]) +./run.sh + +# A specific scenario (System Tests: [APPSEC_API_SECURITY], etc.) +./run.sh APPSEC_API_SECURITY + +# Parametric tests +TEST_LIBRARY=php ./run.sh PARAMETRIC +``` + +### Running a single test + +```bash +# By test file +./run.sh DEFAULT tests/path_to_test.py + +# By test class +./run.sh DEFAULT tests/appsec/waf/test_addresses.py::Test_BodyJson + +# By test method +./run.sh DEFAULT tests/appsec/waf/test_addresses.py::Test_BodyJson::test_body_json + +# By pattern +./run.sh DEFAULT -k "test_pattern" +``` + +### Inspecting logs after a run + +Logs are written to `logs_/` (or `logs/` for default) and +`logs_parametric/` under the `system-tests/` directory. Inside: + +- `docker/weblog/logs/` -- PHP/weblog application logs +- `docker/weblog/logs/helper.log` -- AppSec helper logs (see + `appsec/helper-rust/CLAUDE.md` for how to distinguish C++ vs Rust + helper output) +- `interfaces/` -- captured agent traffic + +## Using a custom helper-rust binary + +To test with a locally-built Rust AppSec helper instead of the one +bundled in the package: + +```bash +# Build the helper via Gradle +cd appsec/tests/integration +./gradlew buildHelperRust --info + +# Extract from Docker volume +docker run -i --rm -v php-helper-rust:/vol alpine \ + cat /vol/libddappsec-helper.so > /path/to/system-tests/binaries/libddappsec-helper.so + +# Then rebuild the weblog and run as usual +cd /path/to/system-tests +./build.sh php +./run.sh APPSEC_API_SECURITY +``` + +## Gotchas + +- The CI job installs Docker (docker-ce, containerd, buildx) inside the + `python:3.12-slim-bullseye` container at runtime. Locally you just + need Docker already running on the host. + +- **Build timeout.** `build.sh` has a 10-minute timeout per attempt + (`SYSTEM_TEST_BUILD_TIMEOUT=600`). Cold builds with no Docker layer + cache can exceed this. Override with + `SYSTEM_TEST_BUILD_TIMEOUT=1200 ./build.sh php`. + +- **Log directories become root-owned.** After runs, `logs*/` directories + are owned by root. Remove them via Docker before re-running: + `docker run --rm -v $(pwd):/s alpine rm -rf /s/logs /s/logs_*` + +- **Run the orchestrator on the host, not in a container.** Volume mount + paths in `run.sh` resolve on the Docker host. If you run the orchestrator + inside a container with the Docker socket mounted, the paths won't match + and non-parametric scenarios will fail. + +- `system-tests` is cloned fresh from `main` on every CI run -- there is + no pinned commit or tag. A breaking change upstream can cause failures + unrelated to dd-trace-php changes. + +- The `.tar.gz` filename includes the library version (from the + `VERSION` file). If you build the package locally, make sure the + version in the filename matches what `datadog-setup.php` expects. + +- The `[parametric]` job uses `BUILD_SH_ARGS="-i runner"` instead of + `php`. This builds a different Docker image (the parametric test + runner) rather than the PHP weblog. + +- Non-parametric scenarios (DEFAULT, APPSEC_API_SECURITY, INTEGRATIONS, + etc.) always run sequentially — `run.sh` hardcodes `pytest_numprocesses=1` + for these. Only PARAMETRIC uses parallel workers (`-n auto`). CI sets + `PYTEST_XDIST_AUTO_NUM_WORKERS=8` but this only affects PARAMETRIC. + +- On Apple Silicon, `build.sh` targets `linux/arm64/v8` by default. + When using a locally-built x86_64 tarball, you **must** export + `DOCKER_DEFAULT_PLATFORM=linux/amd64` before running + `./build.sh php`, or `datadog-setup.php` will fail to match the + architecture in the filename. CI always sets this variable. + Alternatively, build an arm64 `.so` with `make` (not `make static`) + and use the `.so` override path — see + [../debugging-system-tests.md](../debugging-system-tests.md). + +- Artifacts are collected from `system-tests/logs_parametric/` and + `system-tests/logs/` -- these directories are always uploaded + regardless of job success or failure (`when: always`). + +- **AppSec events silently missing when appsec artifacts are absent.** When + `libddappsec-helper.so`, `ddappsec.so`, or `recommended.json` do not reach + the weblog Docker image, the sidecar's `maybe_start_appsec()` skips WAF + loading without any visible error. All AppSec-related scenarios (`default`, + `APPSEC_API_SECURITY*`) will report "No appsec event validates this + condition" or "No telemetry data to validate on". + After any change to `generate-package.php` or the tarball assembly that + touches helper packaging, confirm that + `libddappsec-helper.so` is present inside the assembled `.tar.gz` + before running system tests. + +- **Sidecar log level must be `debug` to see startup confirmation.** + The default log level filters out the "Starting sidecar" message. When + diagnosing sidecar startup issues in the weblog container, set + `_DD_DEBUG_SIDECAR_LOG_METHOD=file:///tmp/sidecar.log` and + `_DD_DEBUG_SIDECAR_LOG_LEVEL=debug`. Logs appear under + `logs/docker/weblog/logs/sidecar.log` after the run. diff --git a/.claude/ci/tracer-integration-tests.md b/.claude/ci/tracer-integration-tests.md new file mode 100644 index 00000000000..5865fa54591 --- /dev/null +++ b/.claude/ci/tracer-integration-tests.md @@ -0,0 +1,283 @@ +# Tracer Service Integration Tests + +## CI Jobs + +**Source:** +- `.gitlab/generate-tracer.php` -- generates the tracer-trigger child + pipeline; integration test jobs are generated from `TEST_INTEGRATIONS_{XY}` + lists in the `Makefile`, expanded by the PHP loop starting at the + `foreach ($jobs as $type => $type_jobs)` block +- `Makefile` -- defines per-version `TEST_INTEGRATIONS_{XY}` (where XY is the + PHP major+minor digits, e.g. `70` for PHP 7.0) target lists and the + individual `test_integrations_*`, `test_opentelemetry_*`, + `test_opentracing_*` make targets +- `.gitlab/generate-common.php` -- shared service definitions (test-agent, + request-replayer, httpbin, mysql, redis, memcache, amqp, mongodb, kafka, + zookeeper, elasticsearch2/7, sqlsrv, googlespanner) + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `test_integrations_amqp2: [{php}]` | `datadog/dd-trace-ci:php-{php}_bookworm-6` | AMQP v2 (RabbitMQ) integration tests | +| `test_integrations_amqp_latest: [{php}]` | same | AMQP latest version tests | +| `test_integrations_curl: [{php}]` | same | Curl integration tests | +| `test_integrations_deferred_loading: [{php}]` | same | Deferred loading tests (needs mysql + redis) | +| `test_integrations_elasticsearch{1,7,8,_latest}: [{php}]` | same | Elasticsearch integration tests (version-specific) | +| `test_integrations_guzzle{5,6,_latest}: [{php}]` | same | Guzzle HTTP client tests | +| `test_integrations_kafka: [{php}]` | same | Kafka integration tests (needs kafka + zookeeper) | +| `test_integrations_laminaslog2: [{php}]` | same | Laminas Log v2 tests | +| `test_integrations_memcache: [{php}]` | same | Memcache extension tests | +| `test_integrations_memcached: [{php}]` | same | Memcached extension tests | +| `test_integrations_mongodb_{1x,latest}: [{php}]` | same | MongoDB integration tests | +| `test_integrations_monolog{1,2,_latest}: [{php}]` | same | Monolog logging tests | +| `test_integrations_mysqli: [{php}]` | same | MySQLi integration tests | +| `test_integrations_openai_latest: [{php}]` | same | OpenAI integration tests | +| `test_integrations_pdo: [{php}]` | same | PDO integration tests (needs mysql) | +| `test_integrations_phpredis{3,4,5}: [{php}]` | same | PHPRedis extension tests (version-specific .so) | +| `test_integrations_predis_{1,2,latest}: [{php}]` | same | Predis library tests | +| `test_integrations_roadrunner: [{php}]` | same | RoadRunner server tests | +| `test_integrations_swoole_5: [{php}]` | same | Swoole integration tests | +| `test_integrations_frankenphp: [{php}]` | same | FrankenPHP integration tests | +| `test_integrations_ratchet: [{php}]` | same | Ratchet WebSocket tests | +| `test_integrations_pcntl: [{php}]` | same | PCNTL (process control) tests | +| `test_integrations_sqlsrv: [{php}]` | same | SQL Server integration tests | +| `test_integrations_googlespanner_latest: [{php}]` | same | Google Spanner emulator tests | +| `test_integrations_stripe_latest: [{php}]` | same | Stripe SDK tests | +| `test_opentelemetry_{1,beta}: [{php}]` | same | OpenTelemetry SDK bridge tests | +| `test_opentracing_10: [{php}]` | same | OpenTracing 1.0 bridge tests | + +Runner: `arch:amd64` +Matrix: PHP 7.0+ (varies per integration; see `TEST_INTEGRATIONS_{XY}` in Makefile) + +Stage: `integrations test` + +## What It Tests + +These jobs test the ddtrace PHP extension's automatic instrumentation of +service client libraries (databases, caches, message queues, HTTP clients, +etc.). Each make target: + +1. Installs composer dependencies (if any) for the library version +2. Runs PHPUnit tests from `tests/Integrations//` +3. Tests communicate with real service containers (MySQL, Redis, etc.) +4. Traces are sent to the **test-agent** for snapshot validation + +Like web tests, `DD_TRACE_DEBUG=1` is set and output is scanned for error +log lines. + +## Service Containers + +All integration test jobs get the base three services (test-agent, +request-replayer, httpbin). Additional services are matched by target name +substring. + +### Base services (always present) + +| Service | Image | Alias | Port | Purpose | +|---------|-------|-------|------|---------| +| test-agent | `ddapm-test-agent:v1.22.1` | `test-agent` | 9126 | Receives traces; validates snapshots | +| request-replayer | `dd-trace-ci:php-request-replayer-*` | `request-replayer` | 80 | HTTP request replay | +| httpbin | `kong/httpbin:0.2.2` | `httpbin-integration` | 8080 | HTTP echo service | + +### Additional services by target substring + +The generator in `generate-tracer.php` matches substrings of the target +name against service keys defined in `generate-common.php`. The matching +logic is: if `str_contains($target, $part)` then include that service. + +| Target substring | Service(s) added | Image | Alias | Key ports | +|-----------------|-----------------|-------|-------|-----------| +| `elasticsearch1` | elasticsearch2 | `elasticsearch:2` | `elasticsearch2-integration` | 9200 | +| `elasticsearch7` | elasticsearch7 | `elasticsearch:7.17.23` | `elasticsearch7-integration` | 9200 | +| `elasticsearch8` | elasticsearch7 | `elasticsearch:7.17.23` | `elasticsearch7-integration` | 9200 | +| `elasticsearch_latest` | elasticsearch7 | `elasticsearch:7.17.23` | `elasticsearch7-integration` | 9200 | +| `mysql` (matches `mysqli` too) | mysql | `dd-trace-ci:php-mysql-dev-5.6` | `mysql-integration` | 3306 | +| `pdo` | mysql | `dd-trace-ci:php-mysql-dev-5.6` | `mysql-integration` | 3306 | +| `deferred_loading` | mysql | `dd-trace-ci:php-mysql-dev-5.6` | `mysql-integration` | 3306 | +| `deferred_loadin` | redis | `dd-trace-ci:php-redis-5.0` | `redis-integration` | 6379 | +| `redis` (matches `phpredis`, `predis`) | redis | `dd-trace-ci:php-redis-5.0` | `redis-integration` | 6379 | +| `memcache` (matches `memcached` too) | memcache | `memcached:1.5-alpine` | `memcached-integration` | 11211 | +| `amqp` | amqp | `rabbitmq:3.9.20-alpine` | `rabbitmq-integration` | 5672 | +| `mongodb` | mongodb | `mongo:4.2.24` | `mongodb-integration` | 27017 | +| `kafka` | kafka + zookeeper | `cp-kafka:7.8.0` / `cp-zookeeper:7.8.0` | `kafka-integration` / `zookeeper` | 9092 / 2181 | +| `sqlsrv` | sqlsrv | `sqlserver:2019-CU15-ubuntu-20.04` | `sqlsrv-integration` | 1433 | +| `googlespanner` | googlespanner | `cloud-spanner-emulator/emulator:1.5.25` | `googlespanner-integration` | 9010 | + +Note: `deferred_loading` matches both `deferred_loading` (mysql) and +`deferred_loadin` (redis), so it gets both mysql and redis services. + +### WAIT_FOR variables + +All integration test jobs inherit `WAIT_FOR: test-agent:9126` from the shared +template. The following targets **replace** (not append to) that default: + +| Target | WAIT_FOR | +|--------|----------| +| `kafka` | `zookeeper:2181 kafka-integration:9092` | +| `sqlsrv` | `sqlsrv-integration:1433` | + +## Running Locally + +### Prerequisites + +Docker running. The job depends on the `compile extension: debug` artifact. + +### Step 1 -- Build the extension + +Use the **same cache name** as the test step (`tracer-integ-83`) so the built +`.so` is already in place when Step 3 runs — no copy needed. +`CI_COMMIT_TAG=local` prevents `append-build-id.sh` from aborting on a missing +`CI_COMMIT_SHA`; `SHARED=1` matches the CI job variable. + +```bash +.claude/ci/dockerh --cache tracer-integ-83 --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e CI_COMMIT_TAG=local \ + -e SHARED=1 \ + -- bash -c ' +set -e +.gitlab/compile_extension.sh +' +``` + +The compiled extension lands at +`~/.cache/dd-ci/tracer-integ-83/tmp/build_extension/modules/ddtrace.so`. + +### Step 2 -- Start service containers + +Use Docker Compose with a unique project name. Multiple runs can coexist on the +same host because Compose namespaces container names under the project. Within +the network, services are reachable by their short names (`test-agent`, +`redis-integration`, etc.) via Docker DNS — no env-var changes needed in Step 3. + +All services are defined in `.claude/ci/docker-compose.services.yml`. Start only +the ones your target needs: + +```bash +PROJECT=tracer-integ-83 # use the same name as --cache; change to avoid conflicts +REPO=/Users/gustavo.lopes/repos/dd-trace-php + +docker compose -p $PROJECT -f $REPO/.claude/ci/docker-compose.services.yml \ + up -d test-agent request-replayer httpbin-integration \ + [mysql-integration] [redis-integration] [rabbitmq-integration] \ + [memcached-integration] [elasticsearch7-integration] [mongodb-integration] \ + [kafka-integration] [sqlsrv-integration] [googlespanner-integration] +``` + +Quick reference — services per target substring: + +| Targets | Extra services to add | +|---|---| +| mysqli, pdo, deferred_loading | `mysql-integration` | +| phpredis, predis, deferred_loading | `redis-integration` | +| amqp | `rabbitmq-integration` | +| memcache, memcached | `memcached-integration` | +| elasticsearch7, elasticsearch8, elasticsearch_latest | `elasticsearch7-integration` | +| mongodb | `mongodb-integration` | +| kafka | `kafka-integration` (brings zookeeper automatically via `depends_on`) | +| sqlsrv | `sqlsrv-integration` | +| googlespanner | `googlespanner-integration` | + +### Step 3 -- Run tests + +Replace `8.3` with your PHP version and `test_integrations_predis_latest` +with your target. Pass `--network ${PROJECT}_default` (matching the project +name from Step 2). The `--network` flag must appear **after** the image name. + +#### Full suite + +```bash +.claude/ci/dockerh --cache tracer-integ-83 --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network ${PROJECT}_default \ + -e COMPOSER_MEMORY_LIMIT=-1 \ + -e DD_TRACE_ASSUME_COMPILED=1 \ + -e HTTPBIN_HOSTNAME=httpbin-integration \ + -e HTTPBIN_PORT=8080 \ + -e DATADOG_HAVE_DEV_ENV=1 \ + -- bash -c ' +set -e +composer update --no-interaction +make composer_tests_update +DD_TRACE_AGENT_TIMEOUT=1000 make test_integrations_predis_latest RUST_DEBUG_BUILD=1 +' +``` + +#### Single test + +```bash +# Same dockerh invocation as above, but change the final make command: +DD_TRACE_AGENT_TIMEOUT=1000 make test_integrations_predis_latest \ + RUST_DEBUG_BUILD=1 \ + TESTS="--filter=testCacheHit" +``` + +The `TESTS` variable is appended to the PHPUnit invocation. + +### Cleanup + +```bash +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml down +``` + +## Gotchas + +- **Service matching is substring-based**: The generator uses + `str_contains($target, $part)`. This means `phpredis` matches `redis` + (getting the redis service), `mysqli` matches `mysql` (getting mysql), + and `memcached` matches `memcache` (getting memcached). This is + intentional. + +- **`deferred_loading` gets two services**: It matches both + `deferred_loading` -> mysql and `deferred_loadin` -> redis. The truncated + key `deferred_loadin` is deliberate to match the target. + +- **Kafka requires zookeeper**: The `kafka` service key is overridden in + the generator to include both `kafka` and `zookeeper`. The Kafka container + entrypoint waits for zookeeper to be ready before starting. CI also sets + `CI_DEBUG_SERVICES=true` for kafka jobs and `WAIT_FOR` includes both + `zookeeper:2181` and `kafka-integration:9092`. + +- **phpredis version-specific .so files**: `test_integrations_phpredis3` + through `phpredis5` load specific pre-built `.so` files via + `TEST_EXTRA_INI=-d extension=redis-X.Y.Z.so`. The `bookworm-6` CI image + only ships `redis-5.3.7.so` — **`redis-3.1.6.so` and `redis-4.3.0.so` are + absent**, so `test_integrations_phpredis3` and `test_integrations_phpredis4` + cannot be run locally against a PHP 8.3 image. Run them with a PHP 7.x image + where the older `.so` files are present. `test_integrations_phpredis5` + has a bug ([phpredis#1869](https://github.com/phpredis/phpredis/issues/1869)) + that manifests with PHP debug builds on PHP 8.0 — it is excluded from + `TEST_INTEGRATIONS_80` for this reason. On PHP 8.1+, the target is included + and the Makefile works around the bug via `DD_IGNORE_ARGINFO_ZPP_CHECK=1`; + `--php debug` works for those versions. + +- **googlespanner requires grpc extension**: The `test_integrations_googlespanner_latest` + target sets `TEST_EXTRA_INI=-d extension=grpc.so` and + `TEST_EXTRA_ENV=ZEND_DONT_UNLOAD_MODULES=1`. The grpc.so is pre-installed + in the CI image. + +- **sqlsrv requires the sqlsrv extension**: The target sets + `TEST_EXTRA_INI=-d extension=sqlsrv.so`, pre-installed in the CI image. + +- **openai_latest enables telemetry**: The `test_integrations_openai_latest` + target temporarily sets `TELEMETRY_ENABLED=1` (normally disabled in tests). + +- **No SAPI dimension**: Unlike web tests, integration tests do not vary + by SAPI. They run only once per PHP version, using `cli-server` as the + default `DD_TRACE_TEST_SAPI`. + +- **PHP-version-limited jobs**: Several jobs are excluded from newer PHP + versions in the CI matrix and will fail if run with the wrong PHP: + - `elasticsearch1` — PHP 7.0–7.2 only (uses ES 1.x client requiring PHP 5/7 API) + - `mongodb_1x` — PHP 7.0–8.0 only (`ext-mongodb 2.x` in PHP 8.1+ images is + incompatible with `mongodb/mongodb ^1.x`) + - `phpredis3`, `phpredis4` — PHP 7.x only (`.so` files absent from PHP 8.x images) + +- **frankenphp requires ZTS**: `test_integrations_frankenphp` skips all tests + on NTS PHP (`!ZEND_THREAD_SAFE`). Use `--php zts` and a separate ZTS cache + (e.g. `tracer-integ-83-zts`). The CI job currently runs with the `debug` + (NTS) variant, so all FrankenPHP tests are silently skipped in CI. + +- **CI images have service clients pre-installed**: The `dd-trace-ci` PHP + images include MySQL client (`mysqladmin`), `redis-cli`, `nc`, and other + tools used by `.gitlab/wait-for-service-ready.sh`. If running in a + different image, these readiness checks will fail silently. diff --git a/.claude/ci/tracer-unit-tests.md b/.claude/ci/tracer-unit-tests.md new file mode 100644 index 00000000000..97a1a5974ff --- /dev/null +++ b/.claude/ci/tracer-unit-tests.md @@ -0,0 +1,591 @@ +# Tracer Unit Tests and Native Extension Tests + +## CI Jobs + +**Source:** +- `.gitlab/generate-tracer.php` — generates the tracer-trigger child + pipeline; defines all job matrices and `script:` sections inline +- `.gitlab/compile_extension.sh` — compiles ddtrace.so (used by the + `compile extension: debug` prerequisite) +- `Makefile` — defines the `test_c`, `test_unit`, `test_opcache`, + `test_extension_ci`, etc. targets + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `compile extension: debug` | `dd-trace-ci:php-{ver}_bookworm-6` | Compiles ddtrace.so in debug mode; produces artifact consumed by all test jobs below | +| `compile extension: debug-zts-asan` | same | Compiles ddtrace.so with ASAN+ZTS; used by ASAN test jobs | +| `Unit tests: [{ver}]` | `dd-trace-ci:php-{ver}_bookworm-6` | Runs PHPUnit `--testsuite=unit` | +| `API unit tests: [{ver}]` | same | Runs PHPUnit API unit tests | +| `test_extension_ci: [{ver}]` | same | Runs .phpt extension tests + valgrind wrapper, with test-agent | +| `PHP Language Tests: [{ver}]` | same | Runs the upstream PHP test suite with ddtrace loaded; uses an xfail list | +| `Opcache tests: [{ver}]` | same | Runs .phpt tests in `tests/opcache/` with opcache.so loaded | +| `xDebug tests: [{ver}, {xdebug_ver}]` | same | Runs xdebug-specific .phpt tests + unit tests with xdebug loaded | +| `Disabled test_c run: [{ver}]` | same | Runs `make test_c_disabled`; ignores test exit code, only fails if `.out` files contain leaks, segfaults, or assertion failures | +| `Internal api randomized tests: [{ver}]` | same | Stress-tests the internal tracing API with random calls | +| `test_auto_instrumentation: [{ver}]` | same | PHPUnit `--testsuite=auto-instrumentation` | +| `test_composer: [{ver}]` | same | PHPUnit `--testsuite=composer-tests` | +| `test_integration: [{ver}]` | same | PHPUnit `--testsuite=integration` (no external services beyond test-agent + mongodb) | +| `test_distributed_tracing: [{ver}, {sapi}]` | same | PHPUnit `--testsuite=distributed-tracing` with cli-server and cgi-fcgi SAPIs | +| `ASAN test_c: [{ver}, {arch}]` | same | .phpt extension tests under ASAN (ZTS debug build) | +| `ASAN Internal api randomized tests: [{ver}, {arch}]` | same | Internal API stress test under ASAN | +| `ASAN init hook tests: [{ver}]` | same | Tests init hook mechanism under ASAN | +| `ASAN test_c with multiple observers: [{ver}]` | same | .phpt tests with `zend_test.observer` enabled under ASAN (PHP 8.0+) | +| `ASAN Opcache tests: [{ver}]` | same | Opcache .phpt tests under ASAN | + +Runner: `arch:amd64` for all test jobs. Compile jobs also run on +`arch:arm64`. + +Matrix: +- **Non-ASAN jobs**: PHP 7.0+, amd64 only. +- **ASAN jobs**: PHP 7.4+ x {amd64, arm64} for `ASAN test_c`; + PHP 7.4+ amd64-only for other ASAN jobs. `ASAN test_c with + multiple observers` is PHP 8.0+ only. +- **xDebug tests**: specific (PHP, xdebug) version pairs; see the + `$xdebug_test_matrix` array in `generate-tracer.php`. Xdebug is + not yet supported on PHP 8.5. +- **`test_distributed_tracing`**: PHP 7.0+ x {cli-server, + cgi-fcgi}. The cgi-fcgi variant sets + `DD_DISTRIBUTED_TRACING=false`. +- **`test_auto_instrumentation`, `test_composer`, `test_integration`**: + PHP 7.0+, amd64. + +## Quick start: build once, run many + +Build the extension and install PHPUnit prerequisites once: + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e; make -j$(nproc) all; make install_all; +composer update --no-interaction; make generate' +``` + +Then reuse the cache for any test target: + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e DD_TRACE_ASSUME_COMPILED=1 -- bash -c 'make test_unit' +``` + +Replace `test_unit` with `test_c`, `test_opcache`, +`test_internal_api_randomized`, etc. + +## Prerequisites + +All test jobs (except compile jobs) depend on the +`compile extension: debug` (or `debug-zts-asan`) artifact. CI +downloads the pre-built `ddtrace.so` and places it in +`tmp/build_extension/modules/`. When reproducing locally, you must +either: + +1. Build ddtrace.so inside the container first (see "Building the + extension locally" below), or +2. Reuse a cached build from a previous `dockerh` run (the `tmp/` + overlay persists). + +**`git submodule update --init libdatadog` is a hard requirement.** +If the `libdatadog` submodule is not initialised, `configure`/`cmake` +will fail with a missing directory. This must be run inside the +container before `make all`. + +PHPUnit-based jobs (`test_unit`, `test_integration`, +`test_auto_instrumentation`, `test_composer`, +`test_distributed_tracing`) also require `Prepare code` artifacts +(generated bridge files) and a `composer update`. + +**`DD_TRACE_ASSUME_COMPILED=1`** tells the Makefile that the extension +is already compiled and installed — it skips the `configure`/build +step that would otherwise run as a prerequisite of `make test_*`. Pass +it as a container env var (`-e DD_TRACE_ASSUME_COMPILED=1`) for all +PHPUnit-based test runs after a prior build. Without it, the Makefile +may attempt to recompile the extension in a context where that would +conflict with the cached build. + +## Building the extension locally + +See [building-locally.md](building-locally.md#for-test-jobs-bookworm-debug-build) +for the build command and common gotchas (submodules, CARGO_HOME, etc.). + +`make install` (not `make install_all`) is sufficient before `test_c` +and `test_opcache` — those jobs load `ddtrace.so` directly from the +build tree via `-d extension=$(SO_FILE)`. PHPUnit jobs use +`install_all` (which also installs ini files) and trigger it +automatically via their `global_test_run_dependencies` prerequisite. + +The `tmp/` cache overlay persists the build artifacts for subsequent +runs. + +## .phpt Extension Tests (test_c) + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make -j$(nproc) all +make test_c +' +``` + +CI runs this with test-agent, httpbin, and request-replayer service +containers. Locally, tests that contact these services will be +skipped (most .phpt tests are self-contained). + +### Single test + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +make test_c TESTS=tests/ext/sandbox/auto_flush.phpt +' +``` + +`TESTS` accepts space-separated paths or a directory. Paths are +relative to the repo root. + +### Disabled test_c run + +The CI job runs `make test_c_disabled` — **not** +`DD_TRACE_CLI_ENABLED=0 make test_c`. These are not equivalent. + +The Makefile target ignores all test failures (`|| true`) and only +fails if `.out` files contain: memory leak flush messages, segfaults, +or `assert()` failures. **~450 test failures are expected by design** +(tests that call hooks, `active_span()`, etc. fail because CLI tracing +is disabled). The grep pattern for flush detection has been stale since +August 2022 and never matches. + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +make test_c_disabled +' +``` + +For the ASAN variant, see the [ASAN Tests](#asan-tests) section. + +## PHPUnit Unit Tests + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make -j$(nproc) all +make install_all +composer update --no-interaction +make generate +make test_unit +' +``` + +### Single test + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +make test_unit FILTER=testSomething +' +``` + +Use `FILTER=` (not `TESTS="--filter=..."`) — the Makefile appends +`--filter=$(FILTER)` itself, and passing `--filter` twice gives +unpredictable PHPUnit behaviour. + +To also restrict to a specific file, add it via `TESTS`: +```bash +make test_unit TESTS=tests/Unit/SomeTest.php FILTER=testSomething +``` + +## Opcache Tests + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make -j$(nproc) all +make test_opcache +' +``` + +This runs .phpt tests from `tests/opcache/` with both ddtrace.so and +opcache.so loaded. On PHP < 8.5 the `-d zend_extension=opcache.so` +flag is passed automatically. + +### Single test + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +make test_opcache TESTS=tests/opcache/some_test.phpt +' +``` + +## Internal API Randomized Tests + +Requires a compiled `ddtrace.so` (see [Prerequisites](#prerequisites)). + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make -j$(nproc) all +make test_internal_api_randomized +' +``` + +For the ASAN variant, see the [ASAN Tests](#asan-tests) section. + +## PHP Language Tests + +### Full suite + +Start service containers, then run the test suite: + +```bash +PROJECT=tracer-lang-83 +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml \ + up -d test-agent request-replayer + +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --root --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network ${PROJECT}_default \ + -e DD_AGENT_HOST=test-agent \ + -e DD_TRACE_AGENT_PORT=9126 \ + -- bash -c ' +set -e +make -j$(nproc) all install_all +export XFAIL_LIST=dockerfiles/ci/xfail_tests/8.3.list +export CI_PROJECT_DIR=/project/dd-trace-php +export PHP_MAJOR_MINOR=8.3 +export DD_TRACE_STARTUP_LOGS=0 +export DD_TRACE_WARN_CALL_STACK_DEPTH=0 +export DD_TRACE_GIT_METADATA_ENABLED=0 +export SKIP_ONLINE_TESTS=1 +.gitlab/run_php_language_tests.sh +' + +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml down +``` + +This runs the upstream PHP test suite (from `/usr/local/src/php/`) +with ddtrace loaded. Expected failures are listed in +`dockerfiles/ci/xfail_tests/{ver}.list`. + +`--root` is required: the script deletes xfail `.phpt` files, modifies +test files in-place, and `run-tests.php` writes helper scripts — all +inside `/usr/local/src/php/`. + +## xDebug Tests + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-8.3-debug --overlayfs --root --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +make -j$(nproc) all install_all +php /usr/local/src/php/run-tests.php -g FAIL,XFAIL,BORK,WARN,LEAK,XLEAK,SKIP \ + -p $(which php) --show-all \ + -d zend_extension=xdebug-3.3.2.so \ + tests/xdebug/3.0.0 +' +``` + +Replace `3.3.2` with the xdebug version to test. The test directory +is `tests/xdebug/2.7.2`, `tests/xdebug/2.9.2`, `tests/xdebug/2.9.5`, +or `tests/xdebug/3.0.0` (xdebug 3.x all use the `3.0.0` dir). The +xdebug `.so` files are pre-installed in the CI images. + +Some xdebug versions also run `make test_unit` with the xdebug +extension loaded via `TEST_EXTRA_INI`. + +## test_auto_instrumentation / test_composer / test_integration + +### Prerequisites — service containers + +These jobs need `test-agent`, `request-replayer`, and +`httpbin-integration`. Use a project name matching your cache key: + +```bash +docker compose -p tracer-integ-83 -f .claude/ci/docker-compose.services.yml \ + up -d test-agent request-replayer httpbin-integration +``` + +Wait a few seconds for the test-agent to be ready before running tests. + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-integ-83 --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network tracer-integ-83_default \ + -e DD_TRACE_ASSUME_COMPILED=1 \ + -e DDAGENT_HOSTNAME=test-agent \ + -e DD_AGENT_HOST=test-agent \ + -e DD_TRACE_AGENT_PORT=9126 \ + -e HTTPBIN_HOSTNAME=httpbin-integration \ + -e HTTPBIN_PORT=8080 \ + -e DATADOG_HAVE_DEV_ENV=1 \ + -- bash -c ' +set -e +make -j$(nproc) all +make install_all +composer update --no-interaction +make generate +DD_TRACE_AGENT_TIMEOUT=1000 make test_auto_instrumentation +' +``` + +`RUST_DEBUG_BUILD=1` (seen in CI) passes `--enable-ddtrace-rust-debug` +to `./configure`, but this is auto-detected from the PHP debug binary +and only matters during the configure step — it is not needed on test +target invocations after a `--php debug` build. + +Replace `test_auto_instrumentation` with `test_composer` or +`test_integration`. + +Note: `test_integration` additionally sets `DD_TRACE_AGENT_RETRIES=3 +DD_TRACE_AGENT_FLUSH_INTERVAL=333 DD_AGENT_HOST=test-agent +DD_TRACE_AGENT_PORT=9126` via `TEST_EXTRA_ENV` in the Makefile. +`test_composer` and `test_auto_instrumentation` do not — they rely on +the container-level `DD_AGENT_HOST` and `DD_TRACE_AGENT_PORT` env +vars set via `-e` above. + +### Cleanup + +```bash +docker compose -p tracer-integ-83 -f .claude/ci/docker-compose.services.yml down +``` + +### Single test + +```bash +DD_TRACE_AGENT_TIMEOUT=1000 make test_integration \ + FILTER=testSomething +``` + +## test_distributed_tracing + +Requires the same service containers as the previous section. Start +them if not already running (see `test_auto_instrumentation` section). + +### Full suite + +```bash +.claude/ci/dockerh --cache tracer-integ-83 --overlayfs --php debug \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network tracer-integ-83_default \ + -e DD_TRACE_ASSUME_COMPILED=1 \ + -e DDAGENT_HOSTNAME=test-agent \ + -e DD_AGENT_HOST=test-agent \ + -e DD_TRACE_AGENT_PORT=9126 \ + -e HTTPBIN_HOSTNAME=httpbin-integration \ + -e HTTPBIN_PORT=8080 \ + -e DATADOG_HAVE_DEV_ENV=1 \ + -e DD_TRACE_TEST_SAPI=cli-server \ + -- bash -c ' +set -e +make -j$(nproc) all +make install_all +composer update --no-interaction +make generate +DD_TRACE_AGENT_TIMEOUT=1000 make test_distributed_tracing +' +``` + +CI runs this twice per PHP version: once with `cli-server` SAPI and +once with `cgi-fcgi` SAPI. For the `cgi-fcgi` variant, change the +env vars: +``` + -e DD_TRACE_TEST_SAPI=cgi-fcgi \ + -e DD_DISTRIBUTED_TRACING=false \ +``` + +## ASAN Tests + +All ASAN jobs use `--php debug-zts-asan` and a **separate cache** from +the normal debug build (e.g., `tracer-8.3-asan`). The `BUILD_DIR` +(`tmp/build_extension`) is **not** suffixed when `ASAN=1`, so reusing +a debug cache for ASAN will corrupt object files and produce crashes or +silent wrong behaviour. + +### Step 1 — Build (shared for all ASAN targets) + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +export COMPILE_ASAN=1 +make -j$(nproc) all +' +``` + +`COMPILE_ASAN=1` must be set explicitly — it is not auto-detected. It +enables `-fsanitize=address` in the Rust sidecar build. `ASAN=1` is +auto-detected from the PHP binary, so the explicit export is optional. + +The `make` configure step automatically adds `--enable-ddtrace-sanitize` +when `ASAN=1` is detected, which adds `-fsanitize=address +-fno-omit-frame-pointer` to the C extension's CFLAGS and linker flags. + +### ASAN test_c + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +set -e +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 +make test_c +' +``` + +CI caps `MAX_TEST_PARALLELISM=2` for ASAN jobs (vs 8 normally) due to +memory overhead. Running with full `nproc` locally risks OOM kills or +false-positive sanitizer failures. Consider setting: +```bash +make test_c MAX_TEST_PARALLELISM=2 +``` + +Under ASAN, `run-tests.php --asan` sets `SKIP_ASAN=1` in the test +environment. Tests with `--XLEAK--` sections are skipped entirely (not +run). Tests that use `getenv("SKIP_ASAN")` in `--SKIPIF--` sections +(e.g., crashtracker tests) are also skipped. + +### ASAN Internal api randomized tests + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 +make test_internal_api_randomized +' +``` + +### ASAN init hook tests + +These tests live in `tests/C2PHP/` and are run via `make +test_with_init_hook`. The only test in that directory +(`get_context_distributed_tracing_test.phpt`) requires +`httpbin-integration` to be reachable. It has **no `--SKIPIF--` +section** — if httpbin is missing or not yet ready, the test throws an +exception (hard failure, not a skip). + +Start httpbin before running: +```bash +docker compose -p tracer-asan-83 -f .claude/ci/docker-compose.services.yml \ + up -d httpbin-integration +``` + +Then run: +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network tracer-asan-83_default \ + -e HTTPBIN_HOSTNAME=httpbin-integration \ + -e HTTPBIN_PORT=8080 \ + -- bash -c ' +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 +make test_with_init_hook +' +``` + +Note: `make test_with_init_hook` loads the extension via +`-d datadog.trace.sources_path=$(TRACER_SOURCE_DIR)`, which is a +different load mechanism from `test_c` (which uses `-d +extension=$(SO_FILE)` directly). + +### ASAN test_c with multiple observers (PHP 8.0+ only) + +The `zend_test.observer` INI flags used by this target are undefined on +PHP 7.x — do not run against PHP 7.x. + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 +make test_c_observer +' +``` + +### ASAN Opcache tests + +```bash +.claude/ci/dockerh --cache tracer-8.3-asan --overlayfs \ + --php debug-zts-asan \ + datadog/dd-trace-ci:php-8.3_bookworm-6 -- bash -c ' +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 +make test_opcache +' +``` + +## Gotchas + +- **The `debug` PHP build is the default on PATH** in CI images, but + `dockerh --php debug` still explicitly calls `switch-php debug`. + Always use `--php` to be explicit. + +- **ASAN builds must use ZTS.** The ASAN PHP build is `debug-zts-asan`, + not `debug-asan`. NTS ASAN builds do not exist in the CI images. + +- **`--overlayfs` is required** — many steps write to the source tree + (`make generate`, `composer update`, `git checkout`). See + [index.md](index.md). + +- **`make install_all` writes outside the overlay.** `install_all` + copies `ddtrace.so` to `/opt/php/debug/lib/php/extensions/` and + `ddtrace.ini` to `/opt/php/debug/conf.d/` — both outside + `/project/dd-trace-php`, so they are not persisted by `--overlayfs`. + Each new `dockerh` invocation must re-run `make install_all` even + when the compiled artifacts are cached. + +- **`test_extension_ci` uses a valgrind wrapper.** The Makefile + prepends `tests/ext/valgrind` to `$PATH`, which intercepts `php` + calls to run them under valgrind. This makes the job significantly + slower and is specific to CI. + +- **`PHP Language Tests` has retry:2 in CI.** These tests are + inherently flaky due to timing-sensitive PHP runtime tests. The CI + job retries up to 2 times on script failure. + +- **`test_integration` talks to test-agent on port 9126** and mongodb. + `test_composer`, `test_auto_instrumentation`, and + `test_distributed_tracing` also need the test-agent — unlike + `test_integration`, their Makefile targets do not set `DD_AGENT_HOST` + internally, so it must be provided via a container-level `-e` + argument. Without test-agent running and reachable, these tests + produce errors (not skips). + +- **`request-replayer` is required for integration tests.** The + test-agent Docker image is configured with + `DD_TRACE_AGENT_URL=http://request-replayer:80`. Tests that exercise + trace forwarding (e.g., `OrphansTest`) directly connect to + `request-replayer` by hostname. Start it alongside `test-agent`. + +- **`ASSERT_NO_MEMLEAKS` only exists in CI.** This is defined in + `generate-tracer.php` and appended directly to the GitLab `script:` + lines. It is not in the Makefile. Running `make test_unit` or `make + test_api_unit` locally does not pipe output through any memory-leak + grep. + +- **Service containers are not started by `dockerh`.** Start them + with `docker compose -p -f .claude/ci/docker-compose.services.yml up -d` + before running `dockerh`, and connect the PHP container with + `--network _default`. `request-replayer` must be started + alongside `test-agent` — the test-agent forwards to it, and some + tests (e.g. `OrphansTest`) connect to it directly. + +- **xdebug `.so` files are pre-installed in CI images.** They live at + paths like `/opt/php/debug/lib/php/extensions/*/xdebug-3.3.2.so`. + If reproducing locally with a non-CI PHP build, you would need to + install xdebug separately. diff --git a/.claude/ci/tracer-web-tests.md b/.claude/ci/tracer-web-tests.md new file mode 100644 index 00000000000..ce287b232f3 --- /dev/null +++ b/.claude/ci/tracer-web-tests.md @@ -0,0 +1,294 @@ +# Tracer Web Framework Tests + +## CI Jobs + +**Source:** +- `.gitlab/generate-tracer.php` -- generates the tracer-trigger child + pipeline; the web test jobs are generated from `TEST_WEB_{XY}` lists in + the `Makefile`, expanded by the PHP loop starting at the + `foreach ($jobs as $type => $type_jobs)` block +- `Makefile` -- defines per-version `TEST_WEB_{XY}` target lists (where XY is + the PHP major+minor digits, e.g. `70` for PHP 7.0) + and the individual `test_web_*` / `test_metrics` make targets +- `.gitlab/generate-common.php` -- shared service definitions (test-agent, + request-replayer, httpbin-integration, mysql) + +| CI Job | Image | What it does | +|--------|-------|-------------| +| `test_web_laravel_{ver}: [{php}, {sapi}]` | `datadog/dd-trace-ci:php-{php}_bookworm-6` | Laravel integration tests (4.2, 5.7, 5.8, 8.x, 9.x, 10.x, 11.x, latest, octane) | +| `test_web_symfony_{ver}: [{php}, {sapi}]` | same | Symfony integration tests (2.3--latest) | +| `test_web_wordpress_{ver}: [{php}, {sapi}]` | same | WordPress integration tests (4.8, 5.5, 5.9, 6.1) | +| `test_web_drupal_{ver}: [{php}, {sapi}]` | same | Drupal integration tests (8.9, 9.5, 10.1) | +| `test_web_magento_{ver}: [{php}, {sapi}]` | same | Magento integration tests (2.3, 2.4) | +| `test_web_slim_{ver}: [{php}, {sapi}]` | same | Slim integration tests (3.12, 4.8, latest) | +| `test_web_cakephp_{ver}: [{php}, {sapi}]` | same | CakePHP integration tests (2.8, 3.10, 4.5, latest) | +| `test_web_codeigniter_{ver}: [{php}, {sapi}]` | same | CodeIgniter integration tests (2.2, 3.1) | +| `test_web_lumen_{ver}: [{php}, {sapi}]` | same | Lumen integration tests (5.2--10.0) | +| `test_web_nette_{ver}: [{php}, {sapi}]` | same | Nette integration tests (2.4, 3.1, latest) | +| `test_web_laminas_{type}_{ver}: [{php}, {sapi}]` | same | Laminas MVC / REST integration tests | +| `test_web_yii_{ver}: [{php}, {sapi}]` | same | Yii integration tests (2.0.49, latest) | +| `test_web_zend_1: [{php}, {sapi}]` | same | Zend Framework 1 integration tests | +| `test_web_custom: [{php}, {sapi}]` | same | Custom framework integration tests | +| `test_metrics: [{php}, {sapi}]` | same | Metrics integration tests | + +Runner: `arch:amd64` +Matrix: PHP 7.0+ (varies per framework) x SAPI {cli-server, cgi-fcgi, apache2handler} +(PHP >= 7.2 gets all three SAPIs; PHP 7.0--7.1 gets only a bare run without SAPI dimension. +`test_web_custom` additionally gets `fpm-fcgi`.) + +Stage: `web test` + +## What It Tests + +These jobs test the ddtrace PHP extension's automatic instrumentation of +web frameworks. Each make target: + +1. Installs composer dependencies for the framework version under + `tests/Frameworks//Version_X_Y/` +2. Runs PHPUnit test suites from `tests/Integrations//` +3. The test harness starts a PHP web server (controlled by + `DD_TRACE_TEST_SAPI`) and sends HTTP requests through it +4. Traces are sent to the **test-agent** service container, which validates + them against snapshot files in `tests/snapshots/` + +The `DD_TRACE_DEBUG=1` flag is always set (via `run_tests_debug`), and the +output is scanned for `[error]`, `[warning]`, or `[deprecated]` log lines +-- any such line fails the job. + +## Service Containers + +All web test jobs use four GitLab service containers: + +| Service | Image | Alias | Port | Purpose | +|---------|-------|-------|------|---------| +| test-agent | `ddapm-test-agent:v1.22.1` | `test-agent` | 9126 | Receives traces; validates snapshots | +| request-replayer | `dd-trace-ci:php-request-replayer-*` | `request-replayer` | 80 | Replays HTTP requests for trace forwarding | +| httpbin | `kong/httpbin:0.2.2` | `httpbin-integration` | 80 | HTTP echo service for curl/guzzle tests | +| mysql | `dd-trace-ci:php-mysql-dev-5.6` | `mysql-integration` | 3306 | MySQL for WordPress, Drupal, Magento, etc. | + +**All web test jobs always include the mysql service** (in addition to +test-agent, request-replayer, and httpbin) because many web frameworks use +a database. Some framework targets (e.g. `test_web_magento_*`) additionally +get `elasticsearch7` via the service-matching logic in the generator. + +### Additional services by target + +The generator matches substrings of the make target name against service +keys. Matches relevant to web tests: + +| Target substring | Extra service | Image | +|-----------------|---------------|-------| +| `magento` | elasticsearch7 | `elasticsearch:7.17.23` (alias `elasticsearch7-integration`, port 9200) | + +## Running Locally + +### Prerequisites + +You need Docker running. The job depends on the `compile extension: debug` +artifact (`ddtrace.so`). Locally you must build it first. + +### Step 1 -- Build the extension + +Use the **same cache name** as Step 3 so that `tmp/build_extension/` is shared +between the compile and test containers: + +```bash +.claude/ci/dockerh --cache tracer-web-83 --overlayfs \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + -e CI_COMMIT_TAG=local \ + -- bash -c ' +set -e +.gitlab/compile_extension.sh +' +``` + +`CI_COMMIT_TAG=local` skips the `append-build-id.sh` version-stamping step +that requires GitLab CI env vars and is not needed locally. + +The compiled `ddtrace.so` will be at +`~/.cache/dd-ci/tracer-web-83/tmp/build_extension/modules/ddtrace.so`. + +### Step 2 -- Start service containers + +Use `.claude/ci/docker-compose.services.yml`, which mirrors the relevant +services from `docker-compose.yml` but without host port bindings. This lets +multiple PHP versions (or parallel runs) coexist on the same host — containers +communicate via Docker DNS and don't need host-side ports. + +**Each concurrent test run must use a separate project name.** The project +name namespaces both containers and the Docker network. If two tests share a +project name they share the same test-agent container, and traces from +different web servers cross-contaminate each other's snapshot sessions. + +#### Project naming convention + +Derive the project name from the make target: strip the `test_web_` prefix, +replace underscores with hyphens, and prefix with the PHP version (no dot). +Examples: + +| Make target | Project name | +|---|---| +| `test_web_laravel_latest` | `tracer-web-83-laravel-latest` | +| `test_web_symfony_latest` | `tracer-web-83-symfony-latest` | +| `test_web_nette_latest` | `tracer-web-83-nette-latest` | +| `test_web_slim_312` | `tracer-web-83-slim-312` | +| `test_metrics` | `tracer-web-83-metrics` | + +Note: docker compose project names may not contain dots; use the PHP version +without a dot (e.g. `83` not `8.3`). + +```bash +PROJECT=tracer-web-83-laravel-latest +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml \ + up -d test-agent request-replayer httpbin-integration mysql-integration +``` + +For **magento** tests, also start: + +```bash +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml \ + up -d elasticsearch7-integration +``` + +The network is named `_default` (e.g. `tracer-web-83-laravel-latest_default`). +Use that name when connecting the PHP container in Step 3. + +### Step 3 -- Run tests + +Connect the PHP container to the same network. The `dockerh` tool does not +manage service containers, so pass `--network ${PROJECT}_default` as an extra +Docker option. The build artifact cache (`--cache tracer-web-83`) is shared +across all targets for the same PHP version — only the service containers need +to be per-target. + +`--overlayfs` is needed because composer writes to the repo tree (see +[index.md](index.md)). Some frameworks also run database migrations as +`post-autoload-dump` scripts — these need the service containers to be up +(Step 2) before Step 3 runs. + +Replace `8.3` with your PHP version and `test_web_laravel_latest` with +your target. + +#### Full suite + +```bash +PROJECT=tracer-web-83-laravel-latest +.claude/ci/dockerh --cache tracer-web-83 --overlayfs \ + datadog/dd-trace-ci:php-8.3_bookworm-6 \ + --network ${PROJECT}_default \ + -e COMPOSER_MEMORY_LIMIT=-1 \ + -e DD_TRACE_ASSUME_COMPILED=1 \ + -e DDAGENT_HOSTNAME=test-agent \ + -e HTTPBIN_HOSTNAME=httpbin-integration \ + -e HTTPBIN_PORT=8080 \ + -e DATADOG_HAVE_DEV_ENV=1 \ + -e DD_TRACE_TEST_SAPI=cli-server \ + -- bash -c ' +set -e +DD_TRACE_AGENT_TIMEOUT=1000 make test_web_laravel_latest RUST_DEBUG_BUILD=1 +' +``` + +Key variables: +- `DD_TRACE_ASSUME_COMPILED=1` — tells the Makefile to skip recompiling the + extension and use the `.so` already built in Step 1. Without this the build + step runs again inside the test container. +- `RUST_DEBUG_BUILD=1` — builds the Rust sidecar with debug symbols, giving + more useful backtraces. Safe to omit if you don't need Rust debug output. + +#### Single test + +```bash +# ... same dockerh invocation as above, but change the final make command: +DD_TRACE_AGENT_TIMEOUT=1000 make test_web_laravel_latest \ + RUST_DEBUG_BUILD=1 \ + TESTS="--filter=testScenario" +``` + +The `TESTS` variable is appended to the PHPUnit command. Use `--filter` to +select specific test methods. + +#### SAPI variants + +Set `DD_TRACE_TEST_SAPI` to control which SAPI the test harness uses: + +| Value | Server type | +|-------|------------| +| `cli-server` | PHP built-in web server (default) | +| `cgi-fcgi` | PHP-CGI via nginx | +| `apache2handler` | Apache mod_php | +| `fpm-fcgi` | PHP-FPM (only `test_web_custom`) | + +### Cleanup + +```bash +PROJECT=tracer-web-83-laravel-latest +docker compose -p $PROJECT -f .claude/ci/docker-compose.services.yml down +``` + +## Gotchas + +- **SNAPSHOT_CI=0 locally**: In CI, `SNAPSHOT_CI=1` makes the test-agent + strictly validate snapshots. Locally, set it to `0` or omit it to allow + snapshot mismatches during development. Set to `1` to reproduce exact CI + behavior. + +- **SNAPSHOT_DIR must be writable**: The test-agent reads/writes snapshots + from `$CI_PROJECT_DIR/tests/snapshots`. `--overlayfs` handles this + automatically. + +- **Do not run multiple test targets in parallel with the same `--cache` + name**: the overlay volume is shared, and `run_composer_with_lock` deletes + and recreates `tests/composer.lock-php*` on each run. Concurrent containers + race on this, causing spurious failures. Run targets sequentially or choose + different cache names (still avoiding creating may caches, as ddtrace will + have to be built in each one). + +- **Composer version pinning**: Symfony 2.x/3.0/3.3/4.0 jobs set + `COMPOSER_VERSION: 2.2` in CI. If you see dependency resolution failures + on old Symfony versions, run `sudo composer self-update --2.2` inside the + container before `composer update`. + +- **mysql is always present for web tests**: Unlike integration tests where + services are matched by target name, web tests unconditionally include + mysql. If mysql is not running, WordPress/Drupal/Magento tests will fail + with connection errors and other tests may emit unexpected warnings. + +- **laravel_octane gets extra memory**: CI allocates 6Gi for + `test_web_laravel_octane_latest`. If you see OOM kills locally, increase + Docker's memory limit. + +- **Services must be up before Step 3 (and stamp files must match the live + container)**: Framework-specific composer installs run as make prerequisites + on first test run. Several frameworks run DB setup as `post-autoload-dump` + scripts and therefore require mysql to be reachable: + - **Symfony Latest** — `doctrine:database:create` + `doctrine:migrations:migrate` + - **Laravel 8x/9x/10x/11x/latest** — `artisan migrate:fresh` + - **Drupal 9.5/10.1** — `php core/scripts/drupal install minimal` + - **WordPress** — creates tables on first request via WP install + + Two scenarios require deleting the stamp file to force re-run: + 1. Step 3 ran while services were down — stamp was written but migrations + were skipped. + 2. The MySQL container was recreated (e.g. you stopped and restarted + services, or started per-target containers for the first time) — the + stamp exists from a previous container but the new container has an empty + database. + Fix for both: delete `tests/Frameworks//composer.lock-php` + and re-run Step 3 with services running. + +- **`make composer_tests_update` is not needed**: In CI, `composer update` at + the repo root runs before the test. Locally, each framework's dependencies + are installed on demand as make prerequisites the first time a test target + runs. You do not need a separate `make composer_tests_update` step. + +- **Stale Symfony Messenger queue**: The `messenger_messages` table persists + across runs. In CI, each job gets a fresh database. Locally, leftover messages + from previous runs contaminate the queue: `messenger:consume async --limit=1` + picks up an old message instead of the one just dispatched by the test. If + `MessengerTest` fails with an unexpected consumer span count, truncate the + table before re-running: + ```bash + docker exec tracer-web-83-symfony-latest-mysql-integration-1 \ + bash -c "mysql -utest -ptest symfonyLatest -e 'DELETE FROM messenger_messages;'" + ``` diff --git a/.claude/ci/windows-tests.md b/.claude/ci/windows-tests.md new file mode 100644 index 00000000000..e78c8dd7de2 --- /dev/null +++ b/.claude/ci/windows-tests.md @@ -0,0 +1,88 @@ +# Windows Tests + +**WARNING**: THIS FILE HAS NOT YET BEEN REVIEWED + +## CI Jobs + +**Source:** +- `.gitlab/generate-tracer.php` — defines `windows test_c` +- `.gitlab/generate-package.php` — defines `compile extension windows`, `package extension windows`, `verify windows` +- `.gitlab/generate-common.php` — `windows_git_setup()` and `windows_git_setup_with_packages()` helpers +- `dockerfiles/verify_packages/verify_windows.ps1` — smoke-test script + +| CI Job | Pipeline | What it does | +|--------|----------|--------------| +| `windows test_c: [{ver}]` | tracer | Builds NTS `php_ddtrace.dll` with `phpize`+`nmake`, runs `.phpt` extension tests | +| `compile extension windows: [{ver}]` | package | Builds NTS + ZTS `php_ddtrace.dll`; produces `.dll` + `.pdb` debug symbols | +| `package extension windows` | package | Assembles Windows DLLs into `dbgsym.tar.gz` release archive | +| `verify windows` | package | Installs packaged extension via Chocolatey, runs `verify_windows.ps1` smoke tests | + +Runner: `windows-v2:2019` +Image: `registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-{ver}_windows` +Matrix: PHP 7.2+ + +## What It Tests + +`windows test_c` starts `httpbin-windows` and `php-request-replayer-*-windows` +service containers, builds `php_ddtrace.dll` from source inline (no pre-built +artifact needed), then runs `run-tests.php` against `tests/ext/`. + +`compile extension windows` builds both NTS and ZTS DLLs; the Rust/libdatadog +build in `x64/Release/target` is moved to `x64/Release_TS/target` so ZTS can +reuse it without recompiling. + +## Local Reproduction + +Requires a Windows host with Docker configured for Windows containers. Not +reproducible on Linux/macOS. + +```powershell +docker pull registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-8.3_windows + +git config --global core.longpaths true +git config --global core.symlinks true +git clone https://github.com/DataDog/dd-trace-php . + +$CONTAINER = "ddtrace-windows-test" +docker run -v ${pwd}:C:\Users\ContainerAdministrator\app -d --name $CONTAINER ` + registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-8.3_windows ping -t localhost + +# Build +docker exec $CONTAINER powershell.exe "cd app; switch-php nts; C:\php\SDK\phpize.bat; .\configure.bat --enable-debug-pack; nmake" + +# Run all extension tests +docker exec $CONTAINER powershell.exe ` + 'cd app; C:\php\php.exe -n -d memory_limit=-1 -d output_buffering=0 run-tests.php -g FAIL,XFAIL,BORK,WARN,LEAK,XLEAK,SKIP --show-diff -p C:\php\php.exe -d "extension=${pwd}\x64\Release\php_ddtrace.dll" "${pwd}\tests\ext"' +``` + +### Single test + +```powershell +docker exec $CONTAINER powershell.exe ` + 'cd app; C:\php\php.exe -n -d memory_limit=-1 run-tests.php --show-diff -p C:\php\php.exe -d "extension=${pwd}\x64\Release\php_ddtrace.dll" "${pwd}\tests\ext\sandbox\exception_in_original_call.phpt"' +``` + +### Cleanup + +```powershell +docker stop -t 5 $CONTAINER && docker rm -f $CONTAINER +``` + +## Gotchas + +- **`GIT_STRATEGY: none` on all Windows jobs.** `windows_git_setup()` manually + wipes the workspace and re-clones because GitLab's default checkout fails on + Windows with deep symlink/junction paths. + +- **`windows_git_setup_with_packages()` preserves artifacts.** Jobs receiving + artifacts (e.g. `verify windows`) move `packages/` to `%TEMP%` before the + workspace wipe and restore it after. + +- **`windows test_c` builds the DLL inline** — there is no separate compile + prerequisite in the tracer pipeline for Windows. + +- **`verify windows` installs Chocolatey inline.** Requires outbound HTTPS from + the runner. If Chocolatey's CDN is unreachable the job fails immediately. + +- **Windows `switch-php` is a PowerShell script**, not the Linux bash version — + it configures `C:\php\` rather than symlinking into `/usr/local/bin/`. diff --git a/.claude/debugging-system-tests.md b/.claude/debugging-system-tests.md new file mode 100644 index 00000000000..21595d61524 --- /dev/null +++ b/.claude/debugging-system-tests.md @@ -0,0 +1,268 @@ +# Debugging System Tests Locally (arm64) + +Combines Python debugging (pytest `--pdb`) with gdb inside the weblog +container. For build/run instructions see +[ci/system-tests.md](ci/system-tests.md). For gdb fundamentals see +[gdb.md](gdb.md). + +## arm64-specific build notes + +On Apple Silicon, use `--platform linux/arm64` (native). amd64 +emulation is too slow. + +- Build images are multi-arch: + `datadog/dd-trace-ci:php-_centos-7` works on arm64. +- `make` output goes to `extensions_aarch64/`, + `standalone_aarch64/`. +- The `php_fpm_packaging` image has **no arm64 variant**. For + `generate-final-artifact.sh` use `ubuntu:24.04` instead. +- Weblog images support arm64 natively. + +## Building ddtrace.so with Rust linked + +See [ci/building-locally.md](ci/building-locally.md#for-system-tests-centos-7-release-like-build) +for the build command, CARGO_HOME workaround, and `make` vs +`make static` explanation. + +First build: ~20 min (Rust sidecar). Incremental (C-only): ~1 min. +The result is at +`~/.cache/dd-ci/systest-82/tmp/build_extension/modules/ddtrace.so` +(~85 MB, with debug symbols from `-g -O2`). + +## Deploying via the .so override path + +Place just the `.so` in system-tests binaries; the install script +downloads the latest GitHub release as base, installs it, and +replaces the installed `ddtrace.so`: + +```bash +CACHE=~/.cache/dd-ci/systest-82 +cp $CACHE/tmp/build_extension/modules/ddtrace.so \ + ~/repos/system-tests/binaries/ddtrace.so + +# Remove any tarball/setup to trigger the download path +rm -f ~/repos/system-tests/binaries/dd-library-php-*.tar.gz +rm -f ~/repos/system-tests/binaries/datadog-setup.php + +cd ~/repos/system-tests +WEBLOG_VARIANT=php-fpm-8.2 ./build.sh php +./build.sh -i proxy +``` + +Build log should show: +``` +Overriding package ddtrace.so with custom binary from /binaries/ddtrace.so +Found installed ddtrace.so at /usr/lib/php/20220829/ddtrace.so, replacing +``` + +## Container lifecycle: when they're alive + +**Important:** system-tests stops containers and collects all data +BEFORE running test assertions (in `post_setup`, called at the end +of `pytest_collection_finish`). By the time `--pdb` pauses on a +test failure, containers are already gone. + +This means: +- **pdb on failure** — you inspect offline data only, no new + requests +- **gdb** — you need `--sleep` mode to keep containers alive + +## Python debugging with pytest --pdb + +Run in tmux for interactive pdb: + +```bash +tmux new-session -d -s systest \ + "cd ~/repos/system-tests && \ + WEBLOG_VARIANT=php-fpm-8.2 ./run.sh DEFAULT \ + tests/test_semantic_conventions.py::Test_Meta::test_meta_http_status_code \ + -v -s --pdb" +``` + +Wait for the pdb prompt, then interact: + +```bash +# query what tags the span has +tmux send-keys -t systest \ + "[k for k in span['meta'] if 'status' in k]" Enter +sleep 1 +tmux capture-pane -t systest -p | tail -5 +``` + +Example output: +``` +(Pdb) [k for k in span['meta'] if 'status' in k] +['http.response.status_code'] +``` + +To set Python breakpoints before a specific point, add +`breakpoint()` in the test source (in the system-tests checkout). + +## gdb inside the weblog container + +Use `--sleep` to keep containers alive indefinitely: + +```bash +cd ~/repos/system-tests +WEBLOG_VARIANT=php-fpm-8.2 ./run.sh DEFAULT --sleep +``` + +No tests run; the full scenario (weblog, proxy, agent) stays up +until Ctrl-C. Container name: `system-tests-weblog`. + +### Install gdb + tmux (first time) + +```bash +docker exec system-tests-weblog \ + bash -c 'apt-get update -qq && \ + apt-get install -yqq gdb tmux procps 2>/dev/null' +``` + +### Attach gdb via tmux + +```bash +# Find a PHP-FPM worker PID (not the master) +docker exec system-tests-weblog ps aux | grep 'php-fpm.*pool' +# www-data 129 ... php-fpm: pool www + +# Attach +docker exec system-tests-weblog \ + tmux new-session -d -s gdb \ + "gdb -q -iex 'set pagination off' \ + -iex 'set confirm off' -p 129" + +sleep 3 +docker exec system-tests-weblog \ + bash -c 'tmux capture-pane -t gdb -p' | tail -5 +``` + +### Set breakpoints and trigger + +```bash +docker exec system-tests-weblog \ + bash -c "tmux send-keys -t gdb \ + 'break dd_set_entrypoint_root_span_props' Enter" +sleep 1 +docker exec system-tests-weblog \ + bash -c "tmux send-keys -t gdb 'continue' Enter" + +# Trigger (may need several requests, see FPM selection below) +for i in $(seq 1 5); do + curl -s http://localhost:7777/ >/dev/null +done + +# Check +docker exec system-tests-weblog \ + bash -c 'tmux capture-pane -t gdb -p' | tail -15 +``` + +Expected: +``` +Thread 1 "php-fpm8.2" hit Breakpoint 2, + dd_set_entrypoint_root_span_props (data=..., span=...) + at .../ext/serializer.c:609 +``` + +### FPM worker selection + +PHP-FPM round-robins between workers. With gdb attached, one +worker may be frozen in `accept()`, causing FPM to route all +requests to the other. Solutions: + +- Send multiple requests until one hits the attached worker. +- Attach gdb to both workers. +- Configure FPM: `docker exec system-tests-weblog bash -c + "echo 'pm.max_children = 1' >> /etc/php/8.2/fpm/pool.d/www.conf + && kill -USR2 1"` to use a single worker. + +### Stepping and inspecting + +```bash +# Backtrace +docker exec system-tests-weblog \ + bash -c "tmux send-keys -t gdb 'bt 5' Enter" +sleep 1 +docker exec system-tests-weblog \ + bash -c 'tmux capture-pane -t gdb -p' | tail -15 + +# Step +docker exec system-tests-weblog \ + bash -c "tmux send-keys -t gdb 'next' Enter" +``` + +### Inlined functions at -O2 + +With `-O2`, some `static` functions (e.g., +`dd_set_entrypoint_root_span_props_end`) are inlined and won't +appear in `info functions`. Use line breakpoints instead: + +```gdb +break serializer.c:1179 +``` + +Or rebuild with `-O0` for full symbol visibility. + +### Sidecar (Rust) debugging + +See [gdb.md](gdb.md) "Attaching to the sidecar" section. Key: +- `file /proc//exe` before `attach` +- `set language rust` before Rust breakpoints +- `pgrep -f 'datadog-ipc.*ddog_daemon_entry_point'` for the PID. + +## Inspecting test data after a run + +Trace data in `logs/interfaces/library/`: + +```bash +python3 -c " +import json, glob +for f in sorted(glob.glob('logs/interfaces/library/*traces*.json')): + data = json.load(open(f)) + for trace in data.get('request',{}).get('content',[]): + for span in trace: + meta = span.get('meta', {}) + if any('status' in k for k in meta): + tags = {k:v for k,v in meta.items() if 'status' in k} + print(f, tags) +" +``` + +PHP/tracer logs: + +``` +logs/docker/weblog/logs/tracer.log ← tracer LOG(ERROR, ...) output +logs/docker/weblog/logs/php_error.log ← php_log_err() / error_log() / trigger_error() output +logs/docker/weblog/logs/appsec.log ← appsec extension mlog() output +logs/docker/weblog/logs/helper.log ← appsec C++ helper SPDLOG_*() output +logs/docker/weblog/logs/apache2/error.log ← profiler error!() output (if log level enabled) +``` + +### Adding debug output + +Use the project's own logging macros — they route to the collected log files +above. All verified empirically on the `apache-mod-8.2` weblog. + +**Reliable methods:** + +| Component | Macro | Lands in | +|---|---|---| +| Tracer (C) | `LOG(ERROR, "fmt", args)` | `tracer.log` | +| Tracer (C) | `php_log_err("msg")` | `php_error.log` | +| Appsec extension (C) | `mlog(dd_log_error, "fmt", args)` | `appsec.log` + `php_error.log` | +| Appsec C++ helper | `SPDLOG_ERROR("fmt", args)` | `helper.log` | +| Appsec Rust helper | `log::error!("fmt", args)` | `helper.log` (only when Rust helper is active) | +| Profiler (Rust) | `error!("msg")` | `apache2/error.log` (requires `datadog.profiling.log_level=error`) | + +**Methods that lose output** (verified with gdb): + +- **`fprintf(stderr, ...)`** — lost due to stdio buffering. `stderr` is fully + buffered (fd 2 points to a file, not a tty). Output sits in the `FILE*` + buffer and never reaches disk before system-tests stops the containers. + Confirmed: calling `fflush(stderr)` from gdb made the output appear. + +- **`trigger_error(E_USER_NOTICE)`** — silently dropped when `log_errors=Off` + (the weblog default). Works after adding `log_errors=On` to `php.ini`. + +- **Profiler `error!()`** — silently filtered when `datadog.profiling.log_level` + is `off` (the default). Works after setting `datadog.profiling.log_level=error` + in `php.ini`. diff --git a/.claude/debugging.md b/.claude/debugging.md new file mode 100644 index 00000000000..b61b55b8116 --- /dev/null +++ b/.claude/debugging.md @@ -0,0 +1,115 @@ +# Debugging with jdb + gdb in appsec integration tests +## General ad + +## Overview + +The appsec Gradle integration tests run inside Docker containers. Use +`--debug-jvm` with Gradle to pause the JVM for a Java debugger (jdb), then +attach gdb to processes inside the container (PHP-FPM workers, sidecar). + +See [gdb.md](gdb.md) for gdb-specific instructions. +See [ci/appsec-gradle-integration.md](ci/appsec-gradle-integration.md) for +Gradle test details. + +## Workflow + +``` +Gradle (--debug-jvm) -> jdb (port 5005) -> controls test flow + | + v + Docker container + ├── PHP-FPM worker (attach gdb for PHP/C code) + └── sidecar (attach gdb for Rust code) +``` + +### Step-by-step + +1. **Start Gradle** with `--debug-jvm`: + ```bash + ./gradlew test8.3-debug --info --tests "*TestClass*" --debug-jvm 2>&1 | tee /tmp/debug.log & + ``` + +2. **Connect jdb** and set breakpoints: + ```bash + tmux new-session -d -s jdb "jdb -connect com.sun.jdi.SocketAttach:hostname=localhost,port=5005" + ``` + +3. **Set breakpoints at the right moments** (see "Breakpoint strategy" below). + +4. **When jdb is paused**, attach gdb inside the container, set gdb + breakpoints, continue gdb. + +5. **Resume jdb** to trigger the action you want to observe. + +6. **Inspect** in gdb when it stops. + +## Breakpoint strategy — the critical part + +### Rule: prepare gdb breakpoints BEFORE the triggering action + +The most common mistake is setting gdb breakpoints **after** the event of +interest has already happened. The correct pattern: + +1. Use jdb to pause the test **before** the code that triggers the behavior + you want to observe. +2. While jdb is paused, attach gdb and set your breakpoints. +3. Continue gdb. +4. **Then** resume jdb to let the triggering action happen. + +### Example: capturing telemetry endpoint HTTP requests + +Wrong approach: break at `waitForAppEndpoints` (line 142) — endpoints already +collected by then, and they'll be sent soon. If we're trying to stop at the +moment they're sent, we may not be fast enough. + +Instead, break after there's been a request that has started sidecar, but has +not triggered the desired behavior -- you can introduce such a request for +testing if necessary. + +### Keeping the container alive + +The container is torn down when the test exits (pass or fail). Common causes +of premature container death: + +- **Test timeout**: `waitForAppEndpoints` has a 30s timeout. If the sidecar + is frozen by gdb and can't respond, the test's HTTP requests time out, + `waitForAppEndpoints` throws, and the container dies. +- **Test completion**: if jdb doesn't have another breakpoint after the + current one, the test finishes and tears down. + +**Mitigations:** +- Always set a **second jdb breakpoint** after the section you're + investigating (e.g., at the assert after `waitForAppEndpoints`) so the + test pauses before finishing. +- Use **non-stopping gdb breakpoints** (Python breakpoints that + auto-continue) so the sidecar keeps running while you filter for the + right event. +- Don't block the sidecar for extended periods — it needs to respond to IPC + from PHP and HTTP from the telemetry flush. + +## Sidecar watchdog + +The sidecar has a watchdog thread (`datadog-sidecar/src/watchdog.rs`) that +checks a `still_alive` counter every 10 seconds. If the counter hasn't +changed for two intervals (~20s), it calls `abort()`. When gdb stops the +sidecar, the tokio runtime freezes but the watchdog thread keeps running. + +**Workaround**: temporarily patch the watchdog to disable the abort: +```rust +// In watchdog.rs, replace the abort block: +if maybe_stuck { + // watchdog disabled for debugging +} +``` +Then rebuild the tracer (`docker volume rm php-tracer-8.3-debug` to force). + +Remember to revert the patch after debugging. + +## Groovy line numbers in jdb + +Groovy compiles to different bytecode line numbers than the source. When +setting `stop at Class:LINE`: +- Some lines have no code (`No code at line N`). +- Try nearby lines (e.g., 162 instead of 161). +- The first line with executable code after a method declaration usually + works. diff --git a/.claude/gdb.md b/.claude/gdb.md new file mode 100644 index 00000000000..1a36fb8b301 --- /dev/null +++ b/.claude/gdb.md @@ -0,0 +1,183 @@ +# Debugging with GDB via tmux + +## Setup + +```bash +# Start gdb in tmux with pagination off BEFORE attach (-iex runs before target load) +tmux new-session -d -s gdb "gdb -q -iex 'set pagination off' -iex 'set confirm off' /path/to/binary -p PID" +sleep 5 # wait for symbol loading +``` + +Key: use `-iex` (not `-ex`) for settings that must apply before symbol +loading/attach. + +## Sending commands + +```bash +tmux send-keys -t gdb 'command here' Enter +sleep 0.5 # give gdb time to process +tmux capture-pane -t gdb -p | tail -15 # read output +``` + +- Always add a `sleep` between send and capture — gdb needs time. +- Use `tail -N` to read just the relevant output. +- For long output, use `tmux capture-pane -t gdb -p -S -100` to get scrollback. + +## Avoid: interactive gdb loops via send-keys + +GDB's `while`/`end` interactive loop syntax does NOT work reliably via `tmux +send-keys`. The `end` keyword gets swallowed or the `>` continuation prompt +misaligns with the input. + +**Instead: use Python scripts.** + +Write a `.py` file and `source` it: + +```bash +cat > /tmp/script.py << 'PYEOF' +import gdb +val = gdb.parse_and_eval('some_var') +print(f'val = {val}') +PYEOF + +tmux send-keys -t gdb 'source /tmp/script.py' Enter +sleep 2 +tmux capture-pane -t gdb -p | tail -10 +``` + +This is the single most important lesson: **any logic beyond a flat sequence of +gdb commands should be a Python script sourced into gdb.** + +## Reading optimized-out variables + +Local variables are often optimized out at certain breakpoints. Workarounds: +- Read the value from the struct it came from (e.g., `ns->_ns_nloaded` may be + optimized out, but you can recompute it by walking the linked list in a + Python script). +- Break earlier (before the variable goes out of scope). +- Use `info registers` and correlate with disassembly. + +## Watchpoints + +Watchpoints are very effective via tmux — they don't require interaction: + +```bash +tmux send-keys -t gdb 'watch var->field' Enter +sleep 0.3 +tmux send-keys -t gdb 'c' Enter +sleep 5 +tmux capture-pane -t gdb -p | tail -15 +``` + +When the watchpoint fires, gdb shows old/new values and stops. Then inspect +with `bt`, `p`, etc. + +## Batch sequences + +For multi-step flows, chain commands with sleeps: + +```bash +docker exec container bash -c " +tmux send-keys -t gdb 'b some_function' Enter +sleep 0.3 +tmux send-keys -t gdb 'c' Enter +sleep 5 +tmux capture-pane -t gdb -p | tail -15 +" +``` + +## Language setting: C vs Rust + +The sidecar is Rust; PHP extensions are C. **gdb must be in the correct +language mode** for symbol resolution and expression evaluation. + +- After `attach`, gdb defaults to the language of the current frame (usually + C from a syscall). +- Set `set language rust` before working with Rust symbols (breakpoints, + `p` expressions). +- `(char*)` casts require C mode. Rust mode uses different syntax. +- When mixing: switch with `set language c` / `set language rust` as needed. + +### Common pitfall: `gdb.Breakpoint()` in Python + +`gdb.Breakpoint('rust::symbol::Name')` silently fails if gdb is in C +language mode — it returns without creating the breakpoint and without +raising an exception. Always ensure `set language rust` before creating +Rust breakpoints from Python. + +Interactive `python ... end` blocks work when `set language rust` is set +before the block. But `source script.py` may fail silently if the language +was wrong at the time. + +## Attaching to the sidecar + +GDB can't find the process image of sidecar (`datadog-ipc-helper`) without +help. You **must** use `file /proc//exe` before `attach`: + +```bash +docker exec CONTAINER tmux new-session -d -s gdb \ + "gdb -q -iex 'set pagination off' -iex 'set confirm off' \ + -ex 'file /proc/PID/exe' -ex 'attach PID' -ex 'set language rust'" +``` + +If you restart gdb or kill the tmux session, you must redo `file /proc/PID/exe` +before attach. + +Find the sidecar PID: +```bash +docker exec CONTAINER pgrep -f 'datadog-ipc.*ddog_daemon_entry_point' | tail -1 +``` + + +## Non-stopping Python breakpoints for filtering + +When you need to catch a specific payload among many, use a Python breakpoint +class that auto-continues on non-matching hits. Example: + +```python +import gdb, re, json + +class CatchMessageBatch(gdb.Breakpoint): + def stop(self): + try: + body_out = gdb.execute("output req.body", to_string=True) + m_ptr = re.search(r"ptr: (0x[0-9a-f]+)", body_out) + m_len = re.search(r"len: (\d+)", body_out) + if not m_ptr or not m_len: + return False + ptr = int(m_ptr.group(1), 16) + length = int(m_len.group(1)) + mem = gdb.selected_inferior().read_memory(ptr, length) + content = bytes(mem).decode("utf-8", errors="replace") + d = json.loads(content) + rt = d.get("request_type", "?") + if rt == "message-batch": + sub = [m.get("request_type", "?") for m in d.get("payload", [])] + print(f">>> message-batch: {sub}") + if "app-endpoints" in sub: + with open("/tmp/ep.json", "w") as f: + json.dump(d, f, indent=2) + print(">>> CAUGHT - saved to /tmp/ep.json") + return True # stop + except: + pass + return False # continue + +bp = CatchMessageBatch("libdd_telemetry::worker::TelemetryWorker::send_request") +``` + +## Async Rust stepping + +`next` in an async Rust function steps through the tokio state machine, not +the original source lines. Instead of stepping: +- Set breakpoints on specific functions (`build_request`, `send_request`) +- Use `continue` to jump between breakpoints +- Inspect state at each breakpoint rather than trying to step through + +## Cleanup + +```bash +tmux send-keys -t gdb 'quit' Enter +sleep 0.5 +tmux kill-server 2>/dev/null +``` diff --git a/.claude/general.md b/.claude/general.md new file mode 100644 index 00000000000..2c7d2e41d38 --- /dev/null +++ b/.claude/general.md @@ -0,0 +1,65 @@ +General/misc instructions +========================= + +¶1 When running long commands with Bash tool, don't use the pattern `my-cmd ... +| tail -10`, as that limits visibility into what's happening. Regardless of +whether the tool is invoked with or without `run_in_background`, use the +pattern `my-command ... | tee /tmp/$(mktemp command_log_XXXXX).log`, or just +use the output file returned by the tool call. Then, use the Read/Glob/Grep +tools to inspect the output file. + +¶2 When writing .md files, use a line length of 80 characters. You may exceed it +when necessary (e.g. for tables). + +¶3 Run make commands with `-j$(nproc)`. + +¶4 Before running builds or tests that depend on submodules, ensure +the relevant submodules are initialised. See +[ci/building-locally.md](ci/building-locally.md#submodule-initialisation) +for which submodules each build target needs. Quick reference: + +```bash +git submodule update --init \ + appsec/third_party/libddwaf \ + appsec/third_party/msgpack-c \ + appsec/third_party/cpp-base64 \ + libdatadog +``` + +¶5 Never "fix" tests by disabling them. + +¶6 Before attempting to fix a problem, you must get to the bottom of it, and +present the ultimate (not proximal) reason for the problem. Your conclusions +must be accompanied by evidence: both from analyzing the code and from running +experiments. These experiments can include adding log messages and checking +their content or running debuggers. + +¶7 In particular, never conclude that a problem is pre-existing or that it's +unrelated to our changes without running tests on `git merge-base HEAD +origin/master` (create a new worktree for this purpose) and verifying that it +also happens there. + +¶8 Fixes must be verified by running tests. These may either be existing tests +or new tests that fail before the fix and pass afterwards. + +¶9 To enforece ¶6-8, when the user explicitly asks for an problem to be +investigated, you MUST use the following template: + +``` +The user presented this problem: + +> (fill in user query here) + +I inspected the source code, and I found the following: + +> (fill in your findings, with references to source files and lines) + +The following commands validate my hypothesis: + +> (include the relevant commands ran, debugging steps taken, and if they run +> before or after your tentative changes -- if any --, or both) + +My conclusion is: + +> (fill in your conclusion here) +``` diff --git a/.gitlab/build-tracing.sh b/.gitlab/build-tracing.sh index 51be8205f44..6080fc94bf1 100755 --- a/.gitlab/build-tracing.sh +++ b/.gitlab/build-tracing.sh @@ -31,6 +31,7 @@ fi switch-php "${PHP_VERSION}" make clean && make -j "${MAKE_JOBS}" static objcopy --compress-debug-sections tmp/build_extension/modules/ddtrace.so "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}.so" +chmod +x "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}.so" cp -v tmp/build_extension/modules/ddtrace.a "extensions_$(uname -m)/ddtrace-${PHP_API}${suffix}.a" if [ "${PHP_VERSION}" = "7.0" ]; then cp -v tmp/build_extension/ddtrace.ldflags "ddtrace_$(uname -m)${suffix}.ldflags" @@ -41,6 +42,7 @@ if [ "${suffix}" != "-alpine" ]; then switch-php "${PHP_VERSION}-debug" make clean && make -j "${MAKE_JOBS}" static objcopy --compress-debug-sections tmp/build_extension/modules/ddtrace.so "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}-debug.so" + chmod +x "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}-debug.so" cp -v tmp/build_extension/modules/ddtrace.a "extensions_$(uname -m)/ddtrace-${PHP_API}${suffix}-debug.a" fi @@ -49,4 +51,5 @@ switch-php "${PHP_VERSION}-zts" rm -r tmp/build_extension make clean && make -j "${MAKE_JOBS}" static objcopy --compress-debug-sections tmp/build_extension/modules/ddtrace.so "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}-zts.so" +chmod +x "standalone_$(uname -m)/ddtrace-${PHP_API}${suffix}-zts.so" cp -v tmp/build_extension/modules/ddtrace.a "extensions_$(uname -m)/ddtrace-${PHP_API}${suffix}-zts.a" diff --git a/.gitlab/compile_extension.sh b/.gitlab/compile_extension.sh index f52a043d41f..308d4de862a 100755 --- a/.gitlab/compile_extension.sh +++ b/.gitlab/compile_extension.sh @@ -24,6 +24,16 @@ fi make -j static & wait +# Compile solib_bootstrap.c separately: 'make static' uses --enable-ddtrace-rust-library-split +# (SSI flag) which excludes solib_bootstrap from ddtrace.a. For this non-SSI build we +# compile it here and inject it into the final link with the ELF entry point flag. +# solib_bootstrap.c uses only system headers, no PHP includes needed. +SOLIB_BOOTSTRAP_OBJ=${EXTENSION_DIR}/ext/solib_bootstrap.o +cc -c -fPIC -O2 -fvisibility=hidden -fno-stack-protector \ + ${EXTENSION_DIR}/ext/solib_bootstrap.c -o ${SOLIB_BOOTSTRAP_OBJ} + # Link extension sed -i 's/-export-symbols .*\/ddtrace\.sym/-Wl,--retain-symbols-file=ddtrace.sym/g' ${EXTENSION_DIR}/ddtrace.ldflags -cc -shared -Wl,-whole-archive ${MODULES_DIR}/ddtrace.a -Wl,-no-whole-archive $(cat ${EXTENSION_DIR}/ddtrace.ldflags) ${CARGO_TARGET_DIR}/debug/libddtrace_php.a -Wl,-soname -Wl,ddtrace.so -o ${MODULES_DIR}/ddtrace.so +cc -shared -Wl,-whole-archive ${MODULES_DIR}/ddtrace.a ${SOLIB_BOOTSTRAP_OBJ} -Wl,-no-whole-archive $(cat ${EXTENSION_DIR}/ddtrace.ldflags) ${CARGO_TARGET_DIR}/debug/libddtrace_php.a -Wl,-e,_dd_solib_start -Wl,-soname -Wl,ddtrace.so -o ${MODULES_DIR}/ddtrace.so +# ExecSolib requires execute permission +chmod +x ${MODULES_DIR}/ddtrace.so diff --git a/.gitlab/link-tracing-extension.sh b/.gitlab/link-tracing-extension.sh index f84f943cca4..e1f58144494 100755 --- a/.gitlab/link-tracing-extension.sh +++ b/.gitlab/link-tracing-extension.sh @@ -4,14 +4,24 @@ set -e -o pipefail suffix="${1:-}" sed -i 's/-export-symbols .*\/ddtrace\.sym/-Wl,--retain-symbols-file=ddtrace.sym/g' "ddtrace_$(uname -m)${suffix}.ldflags" + +# Compile solib_bootstrap.c: the split build (--enable-ddtrace-rust-library-split) excludes +# it from the per-PHP-version .a archives, but the final ddtrace.so still needs it as the +# ELF entry point for ExecSolib sidecar spawning. +SOLIB_BOOTSTRAP_OBJ=$(mktemp --suffix=.o) +cc -c -fPIC -O2 -fvisibility=hidden -fno-stack-protector \ + ext/solib_bootstrap.c -o "${SOLIB_BOOTSTRAP_OBJ}" + pids=() for archive in extensions_$(uname -m)/*.a; do ( - cc -shared -Wl,-whole-archive $archive -Wl,-no-whole-archive $(cat "ddtrace_$(uname -m)${suffix}.ldflags") "libddtrace_php_$(uname -m)${suffix}.a" -Wl,-soname -Wl,ddtrace.so -o ${archive%.a}.so + cc -shared -Wl,-whole-archive $archive "${SOLIB_BOOTSTRAP_OBJ}" -Wl,-no-whole-archive $(cat "ddtrace_$(uname -m)${suffix}.ldflags") "libddtrace_php_$(uname -m)${suffix}.a" -Wl,-e,_dd_solib_start -Wl,-soname -Wl,ddtrace.so -o ${archive%.a}.so objcopy --compress-debug-sections ${archive%.a}.so + chmod +x ${archive%.a}.so ) & pids+=($!) done for pid in "${pids[@]}"; do wait $pid done +rm -f "${SOLIB_BOOTSTRAP_OBJ}" diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000000..4d3d3bf46ba --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +See [.claude/general.md](.claude/general.md) and [.claude/ci/index.md](.claude/ci/index.md). diff --git a/Cargo.lock b/Cargo.lock index ba2f26ff501..21f2f4e9724 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1442,6 +1442,7 @@ dependencies = [ "anyhow", "bincode", "cbindgen 0.27.0", + "cc", "const-str", "datadog-ipc", "datadog-live-debugger", @@ -5226,6 +5227,7 @@ dependencies = [ "nix 0.29.0", "rlimit", "tempfile", + "tracing", "winapi 0.2.8", "windows 0.51.1", ] diff --git a/Makefile b/Makefile index e22fb80a3b8..1c9d85fb3dc 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ RUN_TESTS_CMD := DD_SERVICE= DD_ENV= REPORT_EXIT_STATUS=1 TEST_PHP_SRCDIR=$(PROJ C_FILES = $(shell find components components-rs ext src/dogstatsd zend_abstract_interface -name '*.c' -o -name '*.h' | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) TEST_FILES = $(shell find tests/ext -name '*.php*' -o -name '*.inc' -o -name '*.json' -o -name '*.yaml' -o -name 'CONFLICTS' | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) -RUST_FILES = $(BUILD_DIR)/Cargo.toml $(BUILD_DIR)/Cargo.lock $(shell find components-rs -name '*.c' -o -name '*.rs' -o -name 'Cargo.toml' | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) $(shell find libdatadog/{build-common,datadog-ffe,datadog-ipc,datadog-ipc-macros,datadog-live-debugger,datadog-live-debugger-ffi,datadog-remote-config,datadog-sidecar,datadog-sidecar-ffi,datadog-sidecar-macros,libdd-alloc,libdd-capabilities,libdd-capabilities-impl,libdd-common,libdd-common-ffi,libdd-crashtracker,libdd-crashtracker-ffi,libdd-data-pipeline,libdd-ddsketch,libdd-dogstatsd-client,libdd-library-config,libdd-library-config-ffi,libdd-log,libdd-libunwind-sys,libdd-telemetry,libdd-telemetry-ffi,libdd-tinybytes,libdd-trace-*,spawn_worker,tools/{cc_utils,sidecar_mockgen},libdd-trace-*,Cargo.toml} \( -type l -o -type f \) \( -path "*/src*" -o -path "*/examples*" -o -path "*/libunwind*" -o -path "*Cargo.toml" -o -path "*/build.rs" -o -path "*/tests/dataservice.rs" -o -path "*/tests/service_functional.rs" \) -not -path "*/datadog-ipc/build.rs" -not -path "*/datadog-sidecar-ffi/build.rs") +RUST_FILES = $(BUILD_DIR)/Cargo.toml $(BUILD_DIR)/Cargo.lock $(shell find components-rs -name '*.c' -o -name '*.rs' -o -name 'Cargo.toml' | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) $(shell find libdatadog/{build-common,datadog-ffe,datadog-ipc,datadog-ipc-macros,datadog-live-debugger,datadog-live-debugger-ffi,datadog-remote-config,datadog-sidecar,datadog-sidecar-ffi,datadog-sidecar-macros,libdd-alloc,libdd-capabilities,libdd-capabilities-impl,libdd-common,libdd-common-ffi,libdd-crashtracker,libdd-crashtracker-ffi,libdd-data-pipeline,libdd-ddsketch,libdd-dogstatsd-client,libdd-library-config,libdd-library-config-ffi,libdd-log,libdd-libunwind-sys,libdd-shared-runtime,libdd-telemetry,libdd-telemetry-ffi,libdd-tinybytes,libdd-trace-*,spawn_worker,tools/{cc_utils,sidecar_mockgen},libdd-trace-*,Cargo.toml} \( -type l -o -type f \) \( -path "*/src*" -o -path "*/examples*" -o -path "*/libunwind*" -o -path "*Cargo.toml" -o -path "*/build.rs" -o -path "*/libdd-libunwind-sys/buildscript*" -o -path "*/tests/dataservice.rs" -o -path "*/tests/service_functional.rs" \) -not -path "*/datadog-ipc/build.rs" -not -path "*/datadog-sidecar-ffi/build.rs") ALL_OBJECT_FILES = $(C_FILES) $(RUST_FILES) $(BUILD_DIR)/Makefile TEST_OPCACHE_FILES = $(shell find tests/opcache -name '*.php*' -o -name '.gitkeep' | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) TEST_STUB_FILES = $(shell find tests/ext -type d -name 'stubs' -exec find '{}' -type f \; | awk '{ printf "$(BUILD_DIR)/%s\n", $$1 }' ) diff --git a/appsec/tests/integration/src/main/groovy/com/datadog/appsec/php/docker/AppSecContainer.groovy b/appsec/tests/integration/src/main/groovy/com/datadog/appsec/php/docker/AppSecContainer.groovy index fc8b265321f..be2bd60c1ed 100644 --- a/appsec/tests/integration/src/main/groovy/com/datadog/appsec/php/docker/AppSecContainer.groovy +++ b/appsec/tests/integration/src/main/groovy/com/datadog/appsec/php/docker/AppSecContainer.groovy @@ -94,7 +94,9 @@ class AppSecContainer> extends GenericContain withEnv 'DD_INSTRUMENTATION_TELEMETRY_ENABLED', '1' // very verbose: withEnv '_DD_DEBUG_SIDECAR_LOG_METHOD', 'file:///tmp/logs/sidecar.log' - withEnv 'DD_SPAWN_WORKER_USE_EXEC', '1' // gdb fails following child with fdexec + // DD_SPAWN_WORKER_USE_EXEC was previously set here to work around gdb not + // being able to follow children with FdExec. ExecSolib (the new default) uses + // a direct execve like Exec does, so the workaround is no longer needed. withEnv 'DD_TELEMETRY_HEARTBEAT_INTERVAL', '10' withEnv 'DD_TELEMETRY_EXTENDED_HEARTBEAT_INTERVAL', '10' // withEnv '_DD_SHARED_LIB_DEBUG', '1' diff --git a/components-rs/Cargo.toml b/components-rs/Cargo.toml index 90b6851e1e4..617badfaa8e 100644 --- a/components-rs/Cargo.toml +++ b/components-rs/Cargo.toml @@ -53,6 +53,7 @@ hashbrown = "0.15" [build-dependencies] cbindgen = "0.27" +cc = "1" [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(php_shared_build)'] } diff --git a/components-rs/build.rs b/components-rs/build.rs new file mode 100644 index 00000000000..801f217a7ca --- /dev/null +++ b/components-rs/build.rs @@ -0,0 +1,18 @@ +fn main() { + let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + // SHARED=1 is set by config.m4 when building in rust-library-split (SSI) mode. + // In that mode this crate is the cdylib that carries the Rust code. + // We compile ssi_entry.c into it to make libddtrace_php.so directly executable + // via the dynamic loader (ld.so). + let shared_build = std::env::var("SHARED").as_deref() == Ok("1"); + + if target_os == "linux" && shared_build { + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + cc::Build::new() + .file(format!("{manifest_dir}/ssi_entry.c")) + .flag("-fvisibility=hidden") + .compile("ssi_entry"); + + println!("cargo:rustc-link-arg=-Wl,-e,_dd_ssi_entry"); + } +} diff --git a/components-rs/ssi_entry.c b/components-rs/ssi_entry.c new file mode 100644 index 00000000000..8290a27ce52 --- /dev/null +++ b/components-rs/ssi_entry.c @@ -0,0 +1,216 @@ +// SSI entry point for libddtrace_php.so executed directly. +// +// The spawner invokes libddtrace_php.so via the dynamic loader explicitly: +// execve(ld_path, [ld_path, lib_path, process_name, "", lib_path, deps..., symbol], envp) +// ld.so loads libc and all other dependencies, then jumps to _dd_ssi_entry. +// By that time the process is fully initialised (TLS, libc, allocator) so we +// can use ordinary C library calls. +// +// argv layout as seen by _dd_ssi_entry: +// ld.so strips its own path before calling the entry point, so we see: +// argv[0] = lib_path <- skip (ld.so already removed ld_path) +// argv[1] = process_name <- skip +// argv[2] = "" <- standard trampoline argv[1] +// argv[3] = lib_path <- standard trampoline argv[2] +// ... +// argv[argc-1] = symbol_name +// +// NOTE: .init_array constructors are NOT called automatically when a shared +// library is exec'd as the main program via ld.so. ld.so's _dl_init skips +// the main object, expecting __libc_start_main to handle it — but we never +// call __libc_start_main. We therefore call run_own_init_array() before +// entering any Rust code (Rust runtime init, TLS, allocator setup, ... +// all live in .init_array). +// +// Confirmed in glibc elf/dl-init.c call_init(): +// if (l->l_name[0] == '\0' && l->l_type == lt_executable) return; +// The main executable always gets l_name="" and l_type=lt_executable, +// regardless of whether it is ET_EXEC or an ET_DYN exec'd via ld.so. +// +// Implementation notes for run_own_init_array(): +// +// _DYNAMIC: the linker always defines this symbol, pointing to the .dynamic +// section of the current object. It is PC-relative and always accessible. +// +// DT_INIT_ARRAY d_ptr: a link-time virtual address (offset from load base). +// ld.so adds l_addr when using it; it has NO separate RELATIVE relocation. +// We add the load base ourselves. +// +// Load base: __ehdr_start is a linker-defined symbol at VMA 0 within the +// object. For a standard DSO (first PT_LOAD p_vaddr = 0) this equals the +// runtime load base. +// +// .init_array entries: each entry has an R_*_RELATIVE relocation applied by +// ld.so before our entry point runs, so they are already absolute VAs. +// We call them directly without adding the load base again. + +#include +#include +#include +#include + +// Minimal ELF dynamic section types — avoids depending on system elf.h, +// which some clang configurations cannot find. ssi_entry.c targets Linux +// LP64 (x86-64 and aarch64) only, so intptr_t/uintptr_t are the right widths. +typedef struct { + intptr_t d_tag; + union { + uintptr_t d_val; + uintptr_t d_ptr; + } d_un; +} SsiDyn; + +#define DT_NULL 0 +#define DT_INIT 12 +#define DT_INIT_ARRAY 25 +#define DT_INIT_ARRAYSZ 27 + +struct trampoline_data { + int argc; + char **argv; + char **dependency_paths; +}; + +// Direct references to the sidecar entry points defined in this same .so. +// Since ssi_entry.c is compiled into libddtrace_php.so alongside the Rust +// code, the linker resolves these at link time — no runtime dlsym needed. +extern void ddog_daemon_entry_point(struct trampoline_data *); +extern void ddog_crashtracker_entry_point(struct trampoline_data *); + +// Linker-defined symbol at VMA 0 of this DSO (= load base for a standard build). +extern __attribute__((visibility("hidden"))) char __ehdr_start; + +// Linker-defined pointer to the .dynamic section of this object. +extern __attribute__((visibility("hidden"))) SsiDyn _DYNAMIC[]; + +// ELF spec (glibc ldsodefs.h): init_array entries have signature +// void fn(int argc, char **argv, char **envp), matching dl_init_t. +typedef void (*dl_init_t)(int, char **, char **); + +// Call DT_INIT and DT_INIT_ARRAY for this library. +// +// ld.so skips the main object's .init_array (leaves it for __libc_start_main), +// so we must run it manually before calling any Rust code. +static void run_own_init_array(int argc, char **argv, char **envp) +{ + uintptr_t base = (uintptr_t)&__ehdr_start; + + int has_init = 0; + uintptr_t init_off = 0; + uintptr_t init_array_off = 0; + size_t init_array_sz = 0; + + for (SsiDyn *d = _DYNAMIC; d->d_tag != DT_NULL; d++) { + switch (d->d_tag) { + case DT_INIT: + has_init = 1; + init_off = d->d_un.d_ptr; + break; + case DT_INIT_ARRAY: + init_array_off = d->d_un.d_ptr; + break; + case DT_INIT_ARRAYSZ: + init_array_sz = d->d_un.d_val; + break; + } + } + + // ELF spec: DT_INIT runs before DT_INIT_ARRAY. + if (has_init) + ((dl_init_t)(base + init_off))(argc, argv, envp); + + if (init_array_off) { + // DT_INIT_ARRAY d_ptr is a link-time offset; add load base. + // The entries themselves are already absolute VAs (RELATIVE relocs applied). + dl_init_t *arr = (dl_init_t *)(base + init_array_off); + size_t n = init_array_sz / sizeof(*arr); + for (size_t i = 0; i < n; i++) { + // Slots with value 0 or -1 are sentinels meaning "empty". + if (arr[i] && (uintptr_t)arr[i] != (uintptr_t)-1) + arr[i](argc, argv, envp); + } + } +} + +__attribute__((noreturn, used)) +static void ssi_main(int argc, char **argv) +{ + if (argc < 4) _exit(1); + + // envp sits just past the argv null terminator in the initial stack layout. + char **envp = argv + argc + 1; + + argc -= 2; + argv += 2; + + const char *symbol = argv[argc - 1]; + + void (*fn)(struct trampoline_data *) = NULL; + if (strcmp(symbol, "ddog_daemon_entry_point") == 0) + fn = ddog_daemon_entry_point; + else if (strcmp(symbol, "ddog_crashtracker_entry_point") == 0) + fn = ddog_crashtracker_entry_point; + + if (!fn) _exit(2); + + // Run our own .init_array before entering Rust code. ld.so skips it + // for the main executable, expecting __libc_start_main to handle it, + // but we never call __libc_start_main. + run_own_init_array(argc, argv, envp); + + struct trampoline_data td = { argc, argv, NULL }; + fn(&td); + _exit(0); +} + +// Architecture-specific _start-like stub: read argc/argv from the kernel +// stack and tail-call ssi_main. +// +// Stack alignment: +// +// x86-64: the kernel sets rsp % 16 == 0 at process entry. The SysV ABI +// requires rsp % 16 == 8 at the *start* of a C function (as if a 'call' +// had just pushed an 8-byte return address). Compiled code — including +// glibc internals — can use 'movaps' and other SSE instructions that +// require 16-byte aligned stack slots. If we jump to C code without +// fixing the alignment the first such instruction will SIGSEGV. +// +// Fix: 'and $-16, %rsp' (no-op since rsp is already 16-aligned at entry) +// followed by 'sub $8, %rsp' to simulate the return-address push. +// +// argc and argv must be read *before* the stack pointer is moved. +// +// aarch64: sp is required to be 16-byte aligned at all times by the ABI, +// and the kernel guarantees this at entry. There is no return-address-on- +// stack convention (lr carries it), so no adjustment is needed. + +#if defined(__aarch64__) +__asm__( + ".text\n" + ".global _dd_ssi_entry\n" + ".type _dd_ssi_entry, @function\n" + "_dd_ssi_entry:\n" + " mov x29, #0\n" + " mov x30, #0\n" + " ldr x0, [sp]\n" /* argc */ + " add x1, sp, #8\n" /* argv */ + " b ssi_main\n" /* noreturn tail call */ + ".size _dd_ssi_entry, .-_dd_ssi_entry\n" +); +#elif defined(__x86_64__) +__asm__( + ".text\n" + ".global _dd_ssi_entry\n" + ".type _dd_ssi_entry, @function\n" + "_dd_ssi_entry:\n" + " xor %ebp, %ebp\n" + " movl (%rsp), %edi\n" /* argc — read before adjusting rsp */ + " lea 8(%rsp), %rsi\n" /* argv */ + " and $-16, %rsp\n" /* ensure 16-byte alignment */ + " sub $8, %rsp\n" /* simulate 'call': rsp%16==8 */ + " jmp ssi_main\n" /* noreturn tail call */ + ".size _dd_ssi_entry, .-_dd_ssi_entry\n" +); +#else +# error "ssi_entry.c: unsupported architecture" +#endif diff --git a/config.m4 b/config.m4 index 6202b96db96..3214c8c4fb6 100644 --- a/config.m4 +++ b/config.m4 @@ -242,6 +242,19 @@ if test "$PHP_DDTRACE" != "no"; then DD_TRACE_PHP_SOURCES="$DD_TRACE_PHP_SOURCES \ ext/compat_getrandom.c" + dnl On Linux, add the solib bootstrap (makes the library directly executable). + dnl In rust-library-split (SSI) mode the bootstrap goes into libddtrace_php.so + dnl via components-rs/build.rs instead, because that is the library that + dnl carries the Rust code (and therefore DD_TRAMPOLINE_BIN). + case $host_os in + linux*) + if test "$PHP_DDTRACE_RUST_LIBRARY_SPLIT" = "no"; then + DD_TRACE_PHP_SOURCES="$DD_TRACE_PHP_SOURCES \ + ext/solib_bootstrap.c" + fi + ;; + esac + ZAI_SOURCES="$EXTRA_ZAI_SOURCES \ zend_abstract_interface/config/config.c \ zend_abstract_interface/config/config_decode.c \ @@ -292,6 +305,22 @@ if test "$PHP_DDTRACE" != "no"; then EXTRA_CFLAGS="$EXTRA_CFLAGS -fvisibility=hidden" EXTRA_LDFLAGS="$EXTRA_LDFLAGS -export-symbols $ext_srcdir/ddtrace.sym -flto -fuse-linker-plugin" + dnl On Linux, set the ELF entry point so ddtrace.so can be executed directly. + dnl In rust-library-split (SSI) mode the entry point is set on libddtrace_php.so + dnl via components-rs/build.rs instead. + case $host_os in + linux*) + if test "$PHP_DDTRACE_RUST_LIBRARY_SPLIT" = "no"; then + EXTRA_LDFLAGS="$EXTRA_LDFLAGS -Wl,-e,_dd_solib_start" + dnl ExecSolib requires execute permission; PHP's make install defaults to + dnl INSTALL_DATA = install -m 644. Override to 0755 so execve() works. + cat <<'EOT' >> Makefile.fragments +INSTALL_DATA = $(INSTALL) -m 0755 +EOT + fi + ;; + esac + PHP_SUBST(EXTRA_CFLAGS) PHP_SUBST(EXTRA_LDFLAGS) PHP_SUBST(DDTRACE_SHARED_LIBADD) diff --git a/datadog-setup.php b/datadog-setup.php index a86432fde7a..e2330d7ee03 100644 --- a/datadog-setup.php +++ b/datadog-setup.php @@ -956,6 +956,11 @@ function safe_copy_extension($source, $destination) $tmpName = $destination . '.tmp'; copy($source, $tmpName); + // Add execute permission: required for ExecSolib (the kernel execve's ddtrace.so + // directly to spawn the sidecar). Safe to apply unconditionally to .so files. + if (!IS_WINDOWS) { + chmod($tmpName, fileperms($tmpName) | 0111); + } rename($tmpName, $destination); echo "Copied '$source' to '$destination'\n"; } diff --git a/ext/solib_bootstrap.c b/ext/solib_bootstrap.c new file mode 100644 index 00000000000..a9e188754da --- /dev/null +++ b/ext/solib_bootstrap.c @@ -0,0 +1,1182 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 +// +// solib_bootstrap.c - Makes ddtrace.so directly executable without PT_INTERP. +// Works on both glibc and musl. No libc available - raw syscalls only. +// Must be compiled with -fno-stack-protector (runs before libc init). +// +// Two components: +// +// _dd_solib_start (asm, ELF entry point set via -Wl,-e): +// Saves sp, computes __ehdr_start (base) and _DYNAMIC via PC-relative +// ADRP/LEA, calls _dd_self_relocate(base, dynamic), then calls +// _dd_solib_bootstrap(original_sp). +// +// _dd_self_relocate (C, hidden): +// Walks .dynamic, applies R_*_RELATIVE from DT_RELA, DT_JMPREL, DT_RELR. +// On aarch64, skips relocs targeting .dynamic (linker emits RELATIVE relocs +// for .dynamic d_ptr entries on aarch64; those are not needed since nothing +// reads those fields as runtime addresses after self-relocation). +// +// _dd_solib_bootstrap (C, noreturn): +// a. Maps the embedded TRAMPOLINE_BIN ELF into anonymous memory. +// b. Redirects AT_PHDR/AT_PHNUM/AT_ENTRY in the auxv to the trampoline. +// c. Creates a patched memfd copy of ddtrace.so: sendfile the whole binary, +// then pwrite-patch .dynsym (STB_GLOBAL/SHN_UNDEF -> STB_WEAK) and +// .dynamic (neutralize DT_NEEDED, DT_INIT/FINI, DT_VERNEED, etc.). +// This lets dlopen succeed despite PHP symbols being absent in the child. +// d. Loads ld.so (glibc or musl) from the __DD_LDSO_PATH env var. +// e. Sets AT_BASE to ld.so's load address. +// f. Restores sp and jumps to ld.so's entry point. +// +// ld.so then sees the trampoline as the main executable, loads its deps (libc +// etc.), and calls trampoline main(), which does dlopen(memfd_path) + dlsym +// to invoke the requested entry point. + +#ifdef __linux__ + +#pragma GCC optimize("no-stack-protector") +// This file runs before ld.so and before any runtime library (libc, ASAN, +// etc.) is initialized. Any instrumentation that inserts calls to runtime +// functions (ASAN __asan_load*, stack protector __stack_chk_fail, UBSan +// __ubsan_handle_*) will produce unresolved PLT entries that crash before the +// bootstrap completes. +#ifdef __clang__ +# pragma clang attribute push(__attribute__((no_sanitize("address","undefined","thread","memory"))), apply_to = function) +#endif + +#include +#include +#include +#include + +// DD_TRAMPOLINE_BIN is defined in spawn_worker (Rust) as a &[u8] fat pointer, +// i.e. { const u8 *ptr, usize len }. +struct dd_slice { const unsigned char *ptr; uintptr_t len; }; +// Mark the symbol as hidden. This file is actually linked as part of a shared +// library and we don't want to linker to think the symbol could be preempted, +// especially because this could result in GLOB_DAT relocs that we currently +// don't handle during our self-relocation. +extern const struct dd_slice DD_TRAMPOLINE_BIN __attribute__((visibility("hidden"))); + +// Actually, the "split" build configuration requires that, when building +// ddtrace.la (yes, .la is apparently stil a thing), we provide a definition for +// the symbol, otherwise we get a linker error: "undefined reference to +// `DD_TRAMPOLINE_BIN'" / "relocation R_X86_64_PC32 against undefined hidden +// symbol `DD_TRAMPOLINE_BIN' can not be used when making a shared object". So +// provide here a weak hidden definition in the hopes that the final linking +// step will ignore it in favor of the non-weak version in the rust library +// libddtrace_php.a. +const struct dd_slice DD_TRAMPOLINE_BIN __attribute__((visibility("hidden"), weak)) = {0}; + +// ---- Structs {{{ + +struct loaded_lib { + uintptr_t base; + uintptr_t entry; + Elf64_Dyn *dynamic; + Elf64_Sym *dynsym; + const char *dynstr; + uint32_t *gnu_hash; + Elf64_Rela *rela; + long rela_count; + Elf64_Rela *jmprel; + long jmprel_count; +}; + +struct trampoline_map { + uintptr_t base; // load bias: base + p_vaddr == runtime address of that vaddr + uintptr_t entry; // runtime entry point (base + e_entry) + uintptr_t phdr; // runtime address of program header table + uint16_t phnum; // number of program headers + long total_map; // total bytes reserved +}; + +struct boot_args { + int argc; + char **argv; + char **envp; + Elf64_auxv_t *auxv; +}; + +// ddtrace.so's ELF header is at __ehdr_start (linker-defined, hidden). +extern const Elf64_Ehdr __ehdr_start __attribute__((visibility("hidden"))); + +// }}} +// ---- Forward declarations {{{ + +static void parse_stack(void *stack_top, struct boot_args *args); +static const char *find_env(char **envp, const char *name); +static Elf64_auxv_t *find_auxv_entry(Elf64_auxv_t *auxv, unsigned long type); +static unsigned long get_auxv(Elf64_auxv_t *auxv, unsigned long type); +static int elf_map_segments(int fd, long file_bias, const Elf64_Phdr *phdrs, + int phnum, uintptr_t base, long page_size); +static int elf_load_trampoline(const void *src, size_t src_len, + struct trampoline_map *out, long page_size); +static int elf_load(const char *path, struct loaded_lib *lib, long page_size); +static int create_patched_memfd(void); +static int uint_to_dec(unsigned int v, char *buf); +static void _dd_self_relocate(uintptr_t base, const Elf64_Dyn *dynamic); +static void _dd_apply_relr(uintptr_t base, const uint64_t *relr, long relrsz); +static int elf_check_header(const Elf64_Ehdr *ehdr, unsigned type_mask, int max_phnum); +static const Elf64_Phdr *find_phdr(const Elf64_Phdr *phdrs, int phnum, uint32_t type); +static int elf_reserve(const Elf64_Phdr *phdrs, int phnum, long page_size, + uintptr_t *base_out, long *total_out); +static int bs_strlen(const char *s); +static int bs_strncmp(const char *a, const char *b, int n); +static void bs_memcpy(void *dst, const void *src, long n); +static void bs_memset(void *dst, int c, long n); +static int elf_pf_to_prot(uint32_t pf); +static noreturn void bs_fatal(const char *msg, int code); +static long sys_readlink(const char *path, char *buf, long bufsiz); +static int sys_open_rdonly(const char *path); +static long sys_read(int fd, void *buf, long count); +static long sys_write(int fd, const void *buf, long count); +static int sys_close(int fd); +static void *sys_mmap(void *addr, long length, int prot, int flags, int fd, long offset); +static int sys_munmap(void *addr, long length); + +static noreturn void sys_exit_group(int status); +static int sys_memfd_create(const char *name, unsigned int flags); +static long sys_sendfile(int out_fd, int in_fd, long count); +static long sys_pwrite(int fd, const void *buf, long count, long offset); +// }}} + +// ---- Constants {{{ + +#define BS_PROT_NONE 0x0 +#define BS_PROT_READ 0x1 +#define BS_PROT_WRITE 0x2 +#define BS_PROT_EXEC 0x4 +#define BS_MAP_PRIVATE 0x02 +#define BS_MAP_FIXED 0x10 +#define BS_MAP_ANONYMOUS 0x20 +#define BS_MAP_FAILED ((void *)-1) + +#define BS_PAGE_DOWN(x, ps) ((uintptr_t)(x) & ~((uintptr_t)(ps) - 1)) +#define BS_PAGE_UP(x, ps) (((uintptr_t)(x) + (uintptr_t)(ps) - 1) & ~((uintptr_t)(ps) - 1)) + +// OS-specific ELF dynamic tags (not in all versions) +#ifndef DT_GNU_HASH +#define DT_GNU_HASH 0x6ffffef5 +#endif +#ifndef DT_FLAGS_1 +#define DT_FLAGS_1 0x6ffffffb +#endif +#ifndef DT_VERSYM +#define DT_VERSYM 0x6ffffff0 +#endif +#ifndef DT_VERNEED +#define DT_VERNEED 0x6ffffffe +#endif +#ifndef DT_VERNEEDNUM +#define DT_VERNEEDNUM 0x6fffffff +#endif +#ifndef DT_RELACOUNT +#define DT_RELACOUNT 0x6ffffff9 +#endif +#ifndef DT_RELR +#define DT_RELR 36 +#endif +#ifndef DT_RELRSZ +#define DT_RELRSZ 35 +#endif + +// Architecture-specific RELATIVE relocation type +#ifdef __x86_64__ +#define BS_R_RELATIVE R_X86_64_RELATIVE // = 8 +#elif defined(__aarch64__) +#define BS_R_RELATIVE R_AARCH64_RELATIVE // = 0x403 = 1027 +#endif +// }}} + +// ---- ELF entry point (file-scope asm) {{{ +// _dd_solib_bootstrap is noreturn - it transfers control directly to ld.so. +// The asm saves the original stack pointer and calls bootstrap with it. + +#ifdef __x86_64__ +__asm__( + ".text\n" + ".global _dd_solib_start\n" + ".type _dd_solib_start, @function\n" + "_dd_solib_start:\n" + " xor %ebp, %ebp\n" + " mov %rsp, %r12\n" // r12 = original sp (callee-saved) + " lea __ehdr_start(%rip), %rdi\n" // arg1: base + " lea _DYNAMIC(%rip), %rsi\n" // arg2: &_DYNAMIC + " andq $-16, %rsp\n" + " call _dd_self_relocate\n" + " mov %r12, %rdi\n" // arg1: original sp for bootstrap + " mov %r12, %rsp\n" + " andq $-16, %rsp\n" + " call _dd_solib_bootstrap\n" // noreturn + ".size _dd_solib_start, .-_dd_solib_start\n" +); +#elif defined(__aarch64__) +__asm__( + ".text\n" + ".global _dd_solib_start\n" + ".type _dd_solib_start, @function\n" + "_dd_solib_start:\n" + " mov x29, #0\n" + " mov x30, #0\n" + " mov x19, sp\n" // x19 = original sp (callee-saved) + " adrp x0, __ehdr_start\n" + " add x0, x0, :lo12:__ehdr_start\n" // arg1: base + " adrp x1, _DYNAMIC\n" + " add x1, x1, :lo12:_DYNAMIC\n" // arg2: &_DYNAMIC + " mov x9, x19\n" + " and x9, x9, #-16\n" + " mov sp, x9\n" + " bl _dd_self_relocate\n" + " mov x0, x19\n" // arg1: original sp for bootstrap + " mov x9, x19\n" + " and x9, x9, #-16\n" + " mov sp, x9\n" + " bl _dd_solib_bootstrap\n" // noreturn + ".size _dd_solib_start, .-_dd_solib_start\n" +); +#endif + +// }}} +// ---- Bootstrap entry point {{{ + +__attribute__((visibility("hidden"), used)) +noreturn void _dd_solib_bootstrap(void *stack_top) { + struct boot_args args; + struct loaded_lib bs_ldso; + parse_stack(stack_top, &args); + + long page_size = (long)get_auxv(args.auxv, AT_PAGESZ); + if (page_size <= 0) page_size = 4096; + + const char *ldso_path = find_env(args.envp, "__DD_LDSO_PATH"); + if (!ldso_path) + bs_fatal("__DD_LDSO_PATH not set", 119); + + // Step 1: Map the embedded trampoline binary from memory + const unsigned char *trampoline_bytes = DD_TRAMPOLINE_BIN.ptr; + size_t trampoline_len = DD_TRAMPOLINE_BIN.len; + + if (!trampoline_bytes || !trampoline_len) + bs_fatal("TRAMPOLINE_BIN not available", 120); + + struct trampoline_map tmap; + if (elf_load_trampoline(trampoline_bytes, trampoline_len, &tmap, page_size) < 0) + bs_fatal("failed to map trampoline", 121); + + // Step 2: Redirect auxv to the trampoline + // ld.so reads AT_PHDR/AT_PHNUM/AT_ENTRY to find and set up the main executable. + // We redirect these to the trampoline, so ld.so loads it instead of ddtrace.so. + Elf64_auxv_t *at_phdr = find_auxv_entry(args.auxv, AT_PHDR); + Elf64_auxv_t *at_phnum = find_auxv_entry(args.auxv, AT_PHNUM); + Elf64_auxv_t *at_entry = find_auxv_entry(args.auxv, AT_ENTRY); + + if (at_phdr) at_phdr->a_un.a_val = tmap.phdr; + if (at_phnum) at_phnum->a_un.a_val = tmap.phnum; + if (at_entry) at_entry->a_un.a_val = tmap.entry; + + // Zero AT_SYSINFO_EHDR (vDSO) so ld.so's setup_vdso() is a no-op. + // setup_vdso() calls elf_get_dynamic_info() on the vDSO's link_map, which + // runs ADJUST_DYN_INFO - writing to the vDSO's .dynamic in-place. The vDSO + // is mapped read-only by the kernel, so that write crashes on glibc versions + // that added DT_RELR support in ADJUST_DYN_INFO before adding the + // l_ld_readonly guard that skips the write for read-only .dynamic sections. + { + Elf64_auxv_t *at_sysinfo = find_auxv_entry(args.auxv, 33 /* AT_SYSINFO_EHDR */); + if (at_sysinfo) at_sysinfo->a_un.a_val = 0; + } + + // Step 3: Create a patched memfd so the trampoline can dlopen without PHP + // Replace all argv entries pointing to ddtrace.so with /proc/self/fd/ + // so the trampoline loads the patched (symbol-weakened) copy instead. + { + static char fd_path_buf[32]; + static char self_path_buf[512]; + int patched_mfd = create_patched_memfd(); + if (patched_mfd >= 0) { + // Build "/proc/self/fd/N" + const char *prefix = "/proc/self/fd/"; + int plen = bs_strlen(prefix); + bs_memcpy(fd_path_buf, prefix, plen); + int nlen = uint_to_dec((unsigned int)patched_mfd, fd_path_buf + plen); + fd_path_buf[plen + nlen] = '\0'; + + // Resolve the ddtrace.so path via /proc/self/exe + long self_len = sys_readlink("/proc/self/exe", self_path_buf, + (long)sizeof(self_path_buf) - 1); + if (self_len > 0) self_path_buf[self_len] = '\0'; + else self_len = 0; + + // Replace all argv entries (indices 2..argc-2) that match ddtrace.so + // with the patched memfd path. This covers both argv[2] (the main + // library to dlopen) and any argv[3+] (additional dependencies). + for (int i = 2; i < args.argc - 1; i++) { + const char *a = args.argv[i]; + if (!a || !*a) { + // empty slot → replace directly (this is the "temp file" slot) + args.argv[i] = fd_path_buf; + } else if (self_len > 0 && + bs_strlen(a) == (int)self_len && + bs_strncmp(a, self_path_buf, (int)self_len) == 0) { + // exact path match → replace + args.argv[i] = fd_path_buf; + } + } + } + } + + // Step 4: Load ld.so / musl + if (elf_load(ldso_path, &bs_ldso, page_size) < 0) + bs_fatal("failed to load dynamic linker", 122); + + // set AT_BASE so ld.so knows its own load address + Elf64_auxv_t *at_base = find_auxv_entry(args.auxv, AT_BASE); + if (at_base) at_base->a_un.a_val = bs_ldso.base; + + // Step 5: Jump to ld.so's entry + // ld.so will: + // * Find the "main executable" via AT_PHDR (the trampoline) + // * Load trampoline's DT_NEEDED (libc, libdl, libm, libpthread) + // * Relocate the trampoline fully + // * Call trampoline's _start + // * Trampoline main() does dlopen(argv[2]) + dlsym(argv[argc-1]) + calls it + // + // AT_ENTRY now points to trampoline's entry, so ld.so calls trampoline's + // _start. The original argv (process_name, "", ddtrace_path, deps..., + // symbol) is on the kernel stack and is passed unchanged to the + // trampoline's main(). + + // Restore sp to the original kernel stack and jump to ld.so entry. The + // function is noreturn - we transfer control via inline asm. + + uintptr_t ldso_entry = bs_ldso.entry; + + // Restore the original kernel stack and jump to ld.so's entry point. +#ifdef __x86_64__ + // On x86, the kernel could pass the rtld_fini function in edx. + // This is not the case for amd64 + __asm__ volatile( + "mov %[sp], %%rsp\n" + "jmp *%[entry]\n" + : + : [sp] "r"(stack_top), + [entry] "r"(ldso_entry) + : "memory" + ); +#elif defined(__aarch64__) + __asm__ volatile( + "mov sp, %0\n" + "br %1\n" + :: "r"(stack_top), "r"(ldso_entry) + : "memory" + ); +#endif + __builtin_unreachable(); +} + +// }}} + +// ---- Self-relocation {{{ +// Apply all R_*_RELATIVE relocations in DT_RELA, DT_JMPREL, and DT_RELR. +// Called from _dd_solib_start asm before any other C code runs. +// Constraints: only uses register arguments and stack locals - no global access. +__attribute__((used)) +static void _dd_self_relocate(uintptr_t base, const Elf64_Dyn *dynamic) { + const Elf64_Rela *rela = NULL; long relasz = 0; + const Elf64_Rela *jmprel = NULL; long pltrelsz = 0; + const uint64_t *relr = NULL; long relrsz = 0; + + const Elf64_Dyn *d = dynamic; + while (d->d_tag != DT_NULL) { + switch (d->d_tag) { + // eager relocations + case DT_RELA: rela = (const Elf64_Rela *)(base + d->d_un.d_ptr); break; + case DT_RELASZ: relasz = (long)d->d_un.d_val; break; + // lazy (w/out BIND_NOW) relocations + case DT_JMPREL: jmprel = (const Elf64_Rela *)(base + d->d_un.d_ptr); break; + case DT_PLTRELSZ: pltrelsz = (long)d->d_un.d_val; break; + // compact relative relocations + case DT_RELR: relr = (const uint64_t *)(base + d->d_un.d_ptr); break; + case DT_RELRSZ: relrsz = (long)d->d_un.d_val; break; + default: break; + } + d++; + } + // d now points to DT_NULL; d+1 is the first byte past .dynamic + +#ifdef __aarch64__ + // On aarch64 the linker emits R_AARCH64_RELATIVE for .dynamic d_ptr entries. + // These relocs are not needed: we already extracted what we needed from + // .dynamic in the loop above and are not consulting .dynamic again. + uintptr_t dyn_start = (uintptr_t)dynamic; + uintptr_t dyn_end = (uintptr_t)(d + 1); // first byte past DT_NULL +#endif + + // Apply RELATIVE relocs from DT_RELA + if (rela) { + const Elf64_Rela *end = (const Elf64_Rela *)((const char *)rela + relasz); + for (const Elf64_Rela *r = rela; r < end; r++) { + if (ELF64_R_TYPE(r->r_info) != (unsigned)BS_R_RELATIVE) continue; +#ifdef __aarch64__ + uintptr_t target = base + (uintptr_t)r->r_offset; + if (target >= dyn_start && target < dyn_end) continue; +#endif + *(uint64_t *)(base + (uintptr_t)r->r_offset) = + base + (uint64_t)r->r_addend; + } + } + + // Apply RELATIVE relocs from DT_JMPREL (PLT entries; processed eagerly) + if (jmprel) { + const Elf64_Rela *end = (const Elf64_Rela *)((const char *)jmprel + pltrelsz); + for (const Elf64_Rela *r = jmprel; r < end; r++) { + if (ELF64_R_TYPE(r->r_info) == (unsigned)BS_R_RELATIVE) + *(uint64_t *)(base + (uintptr_t)r->r_offset) = + base + (uint64_t)r->r_addend; + } + } + + // Apply DT_RELR compact relative relocations + if (relr) _dd_apply_relr(base, relr, relrsz); +} + +// }}} +// ---- Raw syscall wrappers {{{ + +#ifdef __x86_64__ + +static long _syscall1(long n, long a1) { + long ret; + __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "rcx", "r11", "memory"); + return ret; +} +static long _syscall2(long n, long a1, long a2) { + long ret; + __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2) : "rcx", "r11", "memory"); + return ret; +} +static long _syscall3(long n, long a1, long a2, long a3) { + long ret; + __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3) : "rcx", "r11", "memory"); + return ret; +} +static long _syscall4(long n, long a1, long a2, long a3, long a4) { + long ret; + register long r10 __asm__("r10") = a4; + __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10) : "rcx", "r11", "memory"); + return ret; +} +static long _syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6) { + long ret; + register long r10 __asm__("r10") = a4; + register long r8 __asm__("r8") = a5; + register long r9 __asm__("r9") = a6; + __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory"); + return ret; +} + +#define SYS_READ 0 +#define SYS_WRITE 1 +#define SYS_OPEN 2 +#define SYS_CLOSE 3 +#define SYS_LSEEK 8 +#define SYS_MMAP 9 +#define SYS_MPROTECT 10 +#define SYS_MUNMAP 11 +#define SYS_PWRITE64 18 +#define SYS_SENDFILE 40 +#define SYS_READLINK 89 +#define SYS_EXIT_GROUP 231 +#define SYS_MEMFD_CREATE 319 + +#elif defined(__aarch64__) + +static long _syscall1(long n, long a1) { + register long x8 __asm__("x8") = n; + register long x0 __asm__("x0") = a1; + __asm__ volatile("svc 0" : "+r"(x0) : "r"(x8) : "memory"); + return x0; +} +static long _syscall2(long n, long a1, long a2) { + register long x8 __asm__("x8") = n; + register long x0 __asm__("x0") = a1; + register long x1 __asm__("x1") = a2; + __asm__ volatile("svc 0" : "+r"(x0) : "r"(x8), "r"(x1) : "memory"); + return x0; +} +static long _syscall3(long n, long a1, long a2, long a3) { + register long x8 __asm__("x8") = n; + register long x0 __asm__("x0") = a1; + register long x1 __asm__("x1") = a2; + register long x2 __asm__("x2") = a3; + __asm__ volatile("svc 0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2) : "memory"); + return x0; +} +static long _syscall4(long n, long a1, long a2, long a3, long a4) { + register long x8 __asm__("x8") = n; + register long x0 __asm__("x0") = a1; + register long x1 __asm__("x1") = a2; + register long x2 __asm__("x2") = a3; + register long x3 __asm__("x3") = a4; + __asm__ volatile("svc 0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2), "r"(x3) : "memory"); + return x0; +} +static long _syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6) { + register long x8 __asm__("x8") = n; + register long x0 __asm__("x0") = a1; + register long x1 __asm__("x1") = a2; + register long x2 __asm__("x2") = a3; + register long x3 __asm__("x3") = a4; + register long x4 __asm__("x4") = a5; + register long x5 __asm__("x5") = a6; + __asm__ volatile("svc 0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5) : "memory"); + return x0; +} + +#define SYS_READ 63 +#define SYS_OPENAT 56 +#define SYS_CLOSE 57 +#define SYS_LSEEK 62 +#define SYS_WRITE 64 +#define SYS_SENDFILE 71 +#define SYS_READLINKAT 78 +#define SYS_PWRITE64 68 +#define SYS_EXIT_GROUP 94 +#define SYS_MMAP 222 +#define SYS_MUNMAP 215 +#define SYS_MPROTECT 226 +#define SYS_MEMFD_CREATE 279 +#ifndef AT_FDCWD +#define AT_FDCWD -100 +#endif + +#else +#error "solib_bootstrap: unsupported architecture" +#endif + +// }}} +// ---- Stack / auxv parsing {{{ + +static void parse_stack(void *stack_top, struct boot_args *args) { + long *sp = (long *)stack_top; + args->argc = (int)*sp; + args->argv = (char **)(sp + 1); + args->envp = args->argv + args->argc + 1; + char **ep = args->envp; + while (*ep) ep++; + args->auxv = (Elf64_auxv_t *)(ep + 1); +} + +static const char *find_env(char **envp, const char *name) { + int name_len = bs_strlen(name); + for (char **ep = envp; *ep; ep++) { + if (bs_strncmp(*ep, name, name_len) == 0 && (*ep)[name_len] == '=') + return *ep + name_len + 1; + } + return NULL; +} + +static Elf64_auxv_t *find_auxv_entry(Elf64_auxv_t *auxv, unsigned long type) { + for (Elf64_auxv_t *a = auxv; a->a_type != AT_NULL; a++) { + if (a->a_type == type) return a; + } + return NULL; +} + +static unsigned long get_auxv(Elf64_auxv_t *auxv, unsigned long type) { + Elf64_auxv_t *a = find_auxv_entry(auxv, type); + return a ? a->a_un.a_val : 0; +} + +// }}} +// ---- Shared ELF helpers {{{ + +// Validate ELF magic, class, type, and phnum limit. +// type_mask: bitmask of permitted e_type values, eg (1u<e_ident[EI_MAG0] != ELFMAG0 || ehdr->e_ident[EI_MAG1] != ELFMAG1 || + ehdr->e_ident[EI_MAG2] != ELFMAG2 || ehdr->e_ident[EI_MAG3] != ELFMAG3 || + ehdr->e_ident[EI_CLASS] != ELFCLASS64) return -1; + if (!(type_mask & (1u << ehdr->e_type))) return -1; + if (ehdr->e_phnum > max_phnum) return -1; + return 0; +} + +// Find the first phdr of a given type, or NULL. +static const Elf64_Phdr *find_phdr(const Elf64_Phdr *phdrs, int phnum, uint32_t type) { + for (int i = 0; i < phnum; i++) + if (phdrs[i].p_type == type) return &phdrs[i]; + return NULL; +} + +// Compute the page-aligned vaddr range of all PT_LOAD segments, reserve it with +// a PROT_NONE mmap, and return the load bias (base + p_vaddr = runtime +// address). Sets *total to the reserved byte count. Returns -1 on failure. +static int elf_reserve(const Elf64_Phdr *phdrs, int phnum, long page_size, + uintptr_t *base_out, long *total_out) { + uintptr_t lo = (uintptr_t)-1, hi = 0; + for (int i = 0; i < phnum; i++) { + if (phdrs[i].p_type != PT_LOAD) continue; + uintptr_t slo = BS_PAGE_DOWN(phdrs[i].p_vaddr, page_size); + uintptr_t shi = BS_PAGE_UP(phdrs[i].p_vaddr + phdrs[i].p_memsz, page_size); + if (slo < lo) lo = slo; + if (shi > hi) hi = shi; + } + if (lo == (uintptr_t)-1) return -1; + long total = (long)(hi - lo); + void *base_map = sys_mmap(NULL, total, BS_PROT_NONE, + BS_MAP_PRIVATE | BS_MAP_ANONYMOUS, -1, 0); + if (base_map == BS_MAP_FAILED) return -1; + *base_out = (uintptr_t)base_map - lo; + *total_out = total; + return 0; +} + +// }}} +// ---- Common PT_LOAD segment mapper {{{ +// +// Maps all PT_LOAD segments from an open fd into a pre-reserved address space. +// `file_bias` is added to each segment's page-aligned file offset: +// * pass 0 for a standalone file (e.g. ld.so loaded from its own path) +// * pass (DD_TRAMPOLINE_BIN.ptr - &__ehdr_start) for the trampoline embedded +// in /proc/self/exe +// +// Does NOT close fd on failure; caller is responsible. +// + +static int elf_map_segments(int fd, long file_bias, const Elf64_Phdr *phdrs, + int phnum, uintptr_t base, long page_size) { + if (file_bias != BS_PAGE_DOWN(file_bias, page_size)) { + bs_fatal("file_bias not page-aligned", 123); + __builtin_unreachable(); + } + + for (int i = 0; i < phnum; i++) { + if (phdrs[i].p_type != PT_LOAD) continue; + + uintptr_t seg_start = BS_PAGE_DOWN(phdrs[i].p_vaddr, page_size); + uintptr_t seg_file_end = phdrs[i].p_vaddr + phdrs[i].p_filesz; + uintptr_t seg_mem_end = phdrs[i].p_vaddr + phdrs[i].p_memsz; + uintptr_t file_page_end = BS_PAGE_UP(seg_file_end, page_size); + uintptr_t mem_page_end = BS_PAGE_UP(seg_mem_end, page_size); + int prot = elf_pf_to_prot(phdrs[i].p_flags); + + if (phdrs[i].p_filesz > 0) { + // ELF spec (gABI): p_vaddr ≡ p_offset (mod p_align), so + // PAGE_DOWN(p_offset) places p_vaddr at the correct address. + long file_offset = file_bias + (long)BS_PAGE_DOWN(phdrs[i].p_offset, page_size); + long file_map_len = (long)(file_page_end - seg_start); + void *seg = sys_mmap((void *)(base + seg_start), file_map_len, + prot, BS_MAP_PRIVATE | BS_MAP_FIXED, fd, file_offset); + if (seg == BS_MAP_FAILED) return -1; + + // Zero tail within the last file-backed page (writable only). + // Both glibc and musl do this dlopen. One can't trus the linkers to + // have the zeros in the file. + if (seg_mem_end > seg_file_end && (phdrs[i].p_flags & PF_W)) + bs_memset((void *)(base + seg_file_end), 0, + (long)(file_page_end - seg_file_end)); + } + + // Anonymous pages for BSS. For pure-BSS segments (p_filesz==0) start + // at seg_start; for mixed segments start after the last file-backed page. + uintptr_t anon_start = (phdrs[i].p_filesz > 0) ? file_page_end : seg_start; + if (mem_page_end > anon_start) { + void *bss = sys_mmap((void *)(base + anon_start), + (long)(mem_page_end - anon_start), + prot, + BS_MAP_PRIVATE | BS_MAP_FIXED | BS_MAP_ANONYMOUS, + -1, 0); + if (bss == BS_MAP_FAILED) return -1; + } + } + return 0; +} + +// }}} +// ---- Load trampoline ELF from /proc/self/exe {{{ +// +// The trampoline binary is embedded in ddtrace.so at DD_TRAMPOLINE_BIN.ptr. +// Since ddtrace.so is /proc/self/exe when executed directly, we open that file +// and mmap each PT_LOAD segment directly from it with the correct permissions. + +static int elf_load_trampoline(const void *src, size_t src_len, + struct trampoline_map *out, long page_size) { + bs_memset(out, 0, sizeof(*out)); + + if (src_len < sizeof(Elf64_Ehdr)) return -1; + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr *)src; + + if (elf_check_header(ehdr, 1u << ET_DYN, 32) < 0) return -1; + // The trampoline must be a PIE (ET_DYN) executable. ET_EXEC cannot be + // loaded at a random base address. build.rs enforces -fPIE/-pie; if + // something goes wrong in the build and ET_EXEC slips through, abort + // loudly rather than silently misbehaving. + if (ehdr->e_type != ET_DYN) __builtin_trap(); + if (ehdr->e_phoff + (uint64_t)ehdr->e_phnum * sizeof(Elf64_Phdr) > src_len) return -1; + + const Elf64_Phdr *phdrs = (const Elf64_Phdr *)((const char *)src + ehdr->e_phoff); + + // Compute the file offset of the trampoline ELF within /proc/self/exe. + // __ehdr_start is the runtime load address of ddtrace.so's own ELF header. + uintptr_t tramp_file_bias = (uintptr_t)src - (uintptr_t)&__ehdr_start; + if (tramp_file_bias & ((uintptr_t)page_size - 1)) + return -1; // DD_TRAMPOLINE_BIN not page-aligned within ddtrace.so + + int fd = sys_open_rdonly("/proc/self/exe"); + if (fd < 0) return -1; + + uintptr_t base; long total; + if (elf_reserve(phdrs, ehdr->e_phnum, page_size, &base, &total) < 0) { + sys_close(fd); return -1; + } + if (elf_map_segments(fd, (long)tramp_file_bias, phdrs, ehdr->e_phnum, + base, page_size) < 0) { + sys_close(fd); return -1; + } + sys_close(fd); + + out->base = base; + out->entry = base + ehdr->e_entry; + out->phnum = ehdr->e_phnum; + out->total_map = total; + // Use PT_PHDR.p_vaddr for the phdr runtime address when present; + // e_phoff is a file offset and works as a vaddr only when the first + // PT_LOAD has p_vaddr==0 (true for standard PIE, but PT_PHDR is portable). + out->phdr = base + ehdr->e_phoff; + const Elf64_Phdr *pt_phdr = find_phdr(phdrs, ehdr->e_phnum, PT_PHDR); + if (pt_phdr) out->phdr = base + pt_phdr->p_vaddr; + return 0; +} + +// }}} +// ---- ELF loader (for ld.so from file) {{{ + +static long bs_read_full(int fd, void *buf, long count) { + long total = 0; + while (total < count) { + long n = sys_read(fd, (char *)buf + total, count - total); + if (n <= 0) return -1; + total += n; + } + return total; +} + +// Load an ELF shared library from file (for ld.so / musl) +static int elf_load(const char *path, struct loaded_lib *lib, long page_size) { + bs_memset(lib, 0, sizeof(*lib)); + + int fd = sys_open_rdonly(path); + if (fd < 0) return -1; + + Elf64_Ehdr ehdr; + if (bs_read_full(fd, &ehdr, sizeof(ehdr)) < 0) { sys_close(fd); return -1; } + if (elf_check_header(&ehdr, 1u << ET_DYN, 32) < 0) { sys_close(fd); return -1; } + + Elf64_Phdr phdrs[32]; + long hdr_map_size = (long)BS_PAGE_UP(ehdr.e_phoff + ehdr.e_phnum * sizeof(Elf64_Phdr), + page_size); + void *hdr_map = sys_mmap(NULL, hdr_map_size, BS_PROT_READ, BS_MAP_PRIVATE, fd, 0); + if (hdr_map == BS_MAP_FAILED) { sys_close(fd); return -1; } + bs_memcpy(phdrs, (char *)hdr_map + ehdr.e_phoff, ehdr.e_phnum * sizeof(Elf64_Phdr)); + sys_munmap(hdr_map, hdr_map_size); + + uintptr_t base; long total; + if (elf_reserve(phdrs, ehdr.e_phnum, page_size, &base, &total) < 0) { + sys_close(fd); return -1; + } + lib->base = base; + lib->entry = base + ehdr.e_entry; + + if (elf_map_segments(fd, 0, phdrs, ehdr.e_phnum, base, page_size) < 0) { + sys_close(fd); return -1; + } + sys_close(fd); + + const Elf64_Phdr *pt_dyn = find_phdr(phdrs, ehdr.e_phnum, PT_DYNAMIC); + if (!pt_dyn) return -1; + lib->dynamic = (Elf64_Dyn *)(base + pt_dyn->p_vaddr); + + for (Elf64_Dyn *d = lib->dynamic; d->d_tag != DT_NULL; d++) { + switch (d->d_tag) { + case DT_SYMTAB: lib->dynsym = (Elf64_Sym *)(base + d->d_un.d_ptr); break; + case DT_STRTAB: lib->dynstr = (const char *)(base + d->d_un.d_ptr); break; + case DT_GNU_HASH: lib->gnu_hash = (uint32_t *)(base + d->d_un.d_ptr); break; + case DT_RELA: lib->rela = (Elf64_Rela *)(base + d->d_un.d_ptr); break; + case DT_RELASZ: lib->rela_count = (long)(d->d_un.d_val / sizeof(Elf64_Rela)); break; + case DT_JMPREL: lib->jmprel = (Elf64_Rela *)(base + d->d_un.d_ptr); break; + case DT_PLTRELSZ: lib->jmprel_count = (long)(d->d_un.d_val / sizeof(Elf64_Rela)); break; + } + } + return 0; +} + +// }}} +// ---- Patched memfd: make a copy of ddtrace.so with weakened symbols {{{ + +static long gnu_hash_symcount(const uint32_t *ht); +static long vaddr_to_file_offset(const Elf64_Phdr *phdrs, int phnum, uintptr_t vaddr); + +// The trampoline calls dlopen(ddtrace.so) in a process with no PHP loaded. +// Without patching, ld.so aborts on hundreds of unresolved STB_GLOBAL symbols +// (OnUpdateString, zend_hash_find_ex, ...) that normally come from the PHP +// binary. +// +// Strategy: sendfile the entire ddtrace.so into a memfd, then patch two +// sections of the memfd via pwrite: +// +// Ste 1: .dynamic section: +// neutralize DT_NEEDED (no PHP deps loaded), DT_INIT/FINI (no PHP init +// called), DT_VERNEED/VERSYM (no version checking), DT_BIND_NOW / DF flags +// (lazy PLT ok) +// +// Step 2: .dynsym table: +// every STB_GLOBAL/SHN_UNDEF symbol → STB_WEAK, so unresolved PHP symbols +// silently resolve to NULL rather than aborting dlopen +// +// The patching reads ELF structures from the already exec'd mapping +// (__ehdr_start) to locate the right sections and compute their file offsets, +// then pwrite's the patches into the memfd. The exec'd mapping itself is never +// modified. + + +// Create a patched memfd from /proc/self/exe. +// Returns the memfd fd (>= 0) on success, -1 on error. +static int create_patched_memfd(void) { + // Source: the exec'd binary is /proc/self/exe (same inode as ddtrace.so) + int src = sys_open_rdonly("/proc/self/exe"); + if (src < 0) return -1; + + int mfd = sys_memfd_create("ddtrace_patched", 0); + if (mfd < 0) { + bs_fatal("Failed to create memfd", 123); + __builtin_unreachable(); + } + + // Copy entire file into the memfd via sendfile (in-kernel, no userspace buf) + for (;;) { + long n = sys_sendfile(mfd, src, 0x10000000 /*256 MB chunk*/); + if (n == 0) break; /* EOF – copy complete */ + if (n == -4) continue; /* EINTR – retry the interrupted syscall */ + if (n < 0) { + bs_fatal("Failed to copy ddtrace.so to memfd", 122); + __builtin_unreachable(); + } + } + sys_close(src); + + // Use the exec'd mapping (__ehdr_start) as a read-only guide to locate + // .dynamic and .dynsym. vaddr_to_file_offset converts each virtual address + // to the corresponding file offset; that offset is where pwrite patches the memfd. + uintptr_t base = (uintptr_t)&__ehdr_start; + const Elf64_Ehdr *ehdr = (const Elf64_Ehdr *)base; + const Elf64_Phdr *phdrs = (const Elf64_Phdr *)(base + ehdr->e_phoff); + + // Locate PT_DYNAMIC + long dyn_foff = -1; + uintptr_t dyn_vaddr = 0; + for (int i = 0; i < ehdr->e_phnum; i++) { + if (phdrs[i].p_type == PT_DYNAMIC) { + dyn_foff = (long)phdrs[i].p_offset; + dyn_vaddr = phdrs[i].p_vaddr; + break; + } + } + if (dyn_foff < 0) { sys_close(mfd); return -1; } + + const Elf64_Dyn *dyn = (const Elf64_Dyn *)(base + dyn_vaddr); + + uintptr_t dynsym_vaddr = 0; + uintptr_t strtab_vaddr = 0; + const uint32_t *hash = NULL, *gnu_hash = NULL; + + // Pre-pass: find DT_STRTAB (needed to look up DT_NEEDED library names) + for (long i = 0; dyn[i].d_tag != DT_NULL; i++) { + if (dyn[i].d_tag == DT_STRTAB) { strtab_vaddr = dyn[i].d_un.d_ptr; break; } + } + + // Pass 1: patch dynamic section tags; collect symtab/hash pointers + for (long i = 0; dyn[i].d_tag != DT_NULL; i++) { + Elf64_Xword new_tag = 0; + Elf64_Xword new_val = 0; + int patch_val = 0; + + // DT_TOMBSTONE must NOT collide with any tag the dynamic linker + // recognises. The previous value 0x6ffffef5 == DT_GNU_HASH, which + // caused musl to treat every neutralised entry as a GNU hash table + // pointer, corrupting dso->ghashtab and crashing in gnu_lookup_filtered. +#define DT_TOMBSTONE 0x6ffffef4 + switch (dyn[i].d_tag) { + // Neutralize: deps (with exceptions), init/fini, version info, binding flags. + case DT_NEEDED: + // Keep libgcc_s and libunwind: they provide _Unwind_RaiseException, + // which Rust needs for panic handling. Without these, any Rust panic + // (common in debug builds) causes SIGSEGV. All other DT_NEEDED + // entries are tombstoned to prevent ld.so from loading PHP-specific + // or problematic deps (e.g. ld-linux-x86-64.so.2, libcurl). + if (strtab_vaddr) { + const char *libname = (const char *)(base + strtab_vaddr) + dyn[i].d_un.d_val; + if (bs_strncmp(libname, "libgcc_s", 8) == 0 || + bs_strncmp(libname, "libunwind", 9) == 0) { + break; /* keep this DT_NEEDED */ + } + } + new_tag = DT_TOMBSTONE; + break; + case DT_BIND_NOW: + case DT_INIT: + case DT_INIT_ARRAY: case DT_INIT_ARRAYSZ: + case DT_FINI: + case DT_FINI_ARRAY: case DT_FINI_ARRAYSZ: + case DT_VERNEED: + case DT_VERNEEDNUM: + case DT_VERSYM: + case DT_RELACOUNT: /* neutralize: ld.so doesn't need the count hint */ + new_tag = DT_TOMBSTONE; + break; + // Clear DF_BIND_NOW (bit 3) from DT_FLAGS + case DT_FLAGS: + new_val = dyn[i].d_un.d_val & ~8ULL; + patch_val = 1; + break; + // Clear DF_1_NOW (bit 0) from DT_FLAGS_1 + case DT_FLAGS_1: + new_val = dyn[i].d_un.d_val & ~1ULL; + patch_val = 1; + break; + // Track symtab, strtab, and hash tables for dynsym patching + case DT_SYMTAB: dynsym_vaddr = dyn[i].d_un.d_ptr; break; + case DT_STRTAB: strtab_vaddr = dyn[i].d_un.d_ptr; break; + case DT_HASH: hash = (const uint32_t *)(base + dyn[i].d_un.d_ptr); break; + case DT_GNU_HASH: gnu_hash = (const uint32_t *)(base + dyn[i].d_un.d_ptr); break; + default: break; + } + + if (new_tag) { + long entry_off = dyn_foff + i * (long)sizeof(Elf64_Dyn); + sys_pwrite(mfd, &new_tag, sizeof(new_tag), entry_off); + } + if (patch_val) { + long val_off = dyn_foff + i * (long)sizeof(Elf64_Dyn) + 8; + sys_pwrite(mfd, &new_val, sizeof(new_val), val_off); + } + } + + // Pass 2: weaken STB_GLOBAL/SHN_UNDEF dynsym entries + if (!dynsym_vaddr) { sys_close(mfd); return -1; } + + long dynsym_foff = vaddr_to_file_offset(phdrs, ehdr->e_phnum, dynsym_vaddr); + if (dynsym_foff < 0) { sys_close(mfd); return -1; } + + long symcount = 0; + if (hash) + symcount = (long)hash[1]; // nchain = total symbol count + else if (gnu_hash) + symcount = gnu_hash_symcount(gnu_hash); + if (!symcount) { sys_close(mfd); return -1; } + + const Elf64_Sym *syms = (const Elf64_Sym *)(base + dynsym_vaddr); + for (long i = 0; i < symcount; i++) { + if (ELF64_ST_BIND(syms[i].st_info) == STB_GLOBAL && + syms[i].st_shndx == SHN_UNDEF) { + unsigned char new_info = (unsigned char)ELF64_ST_INFO( + STB_WEAK, ELF64_ST_TYPE(syms[i].st_info)); + // st_info is at offset 4 within Elf64_Sym (after st_name) + sys_pwrite(mfd, &new_info, 1, + dynsym_foff + i * (long)sizeof(Elf64_Sym) + 4); + } + } + + return mfd; +} + +// Count dynamic symbols via GNU hash table. +static long gnu_hash_symcount(const uint32_t *ht) { + uint32_t nbuckets = ht[0]; + uint32_t symndx = ht[1]; + uint32_t maskwords = ht[2]; // count of 64-bit bloom filter words + const uint32_t *buckets = &ht[4 + maskwords * 2]; + const uint32_t *chains = &buckets[nbuckets]; + + uint32_t max_sym = symndx; + for (uint32_t b = 0; b < nbuckets; b++) { + uint32_t idx = buckets[b]; + if (!idx) continue; + uint32_t off = idx - symndx; + for (;;) { + if (idx > max_sym) max_sym = idx; + if (chains[off] & 1) break; + off++; idx++; + } + } + return (long)(max_sym + 1); +} + +// Compute file offset of a virtual address within a PT_LOAD segment. +static long vaddr_to_file_offset(const Elf64_Phdr *phdrs, int phnum, uintptr_t vaddr) { + for (int i = 0; i < phnum; i++) { + if (phdrs[i].p_type != PT_LOAD) continue; + if (vaddr >= phdrs[i].p_vaddr && + vaddr < phdrs[i].p_vaddr + phdrs[i].p_filesz) + return (long)(phdrs[i].p_offset + (vaddr - phdrs[i].p_vaddr)); + } + return -1; +} + +// Write an unsigned int as decimal into buf (no nul terminator). Returns len. +static int uint_to_dec(unsigned int v, char *buf) { + char tmp[12]; int n = 0; + if (!v) { buf[0] = '0'; return 1; } + while (v) { tmp[n++] = '0' + (v % 10); v /= 10; } + for (int i = 0; i < n; i++) buf[i] = tmp[n - 1 - i]; + return n; +} + +// }}} +// ---- DT_RELR (compact relative relocations) {{{ +// Called from _dd_solib_start asm after DT_RELA/DT_JMPREL RELATIVE relocs have +// been applied. Uses only register arguments and stack locals - no GOT access. +// Algorithm matches musl ldso/dlstart.c (and glibc ELF_DYNAMIC_DO_RELR): +// * Entry with low bit 0: absolute address -> *addr += base; addr++ +// * Entry with low bit 1: bitmap -> for each set bit i in bits[1..63]: addr[i] += base; +// then addr += 63 (covers 63 slots per bitmap word) +__attribute__((used)) +static void _dd_apply_relr(uintptr_t base, const uint64_t *relr, long relrsz) { + uint64_t *addr = NULL; + for (; relrsz > 0; relr++, relrsz -= 8) { + if ((*relr & 1) == 0) { + addr = (uint64_t *)(base + *relr); + *addr++ += base; + } else { + uint64_t bitmap = *relr >> 1; + for (uint64_t i = 0; bitmap; bitmap >>= 1, i++) + if (bitmap & 1) addr[i] += base; + addr += 63; + } + } +} + +// }}} +// ---- Error output {{{ + +static void bs_write_str(const char *s) { + sys_write(2, s, bs_strlen(s)); +} + +static noreturn void bs_fatal(const char *msg, int code) { + bs_write_str("dd_solib_bootstrap: "); + bs_write_str(msg); + bs_write_str("\n"); + sys_exit_group(code); +} + +// }}} +// ---- Minimal string/memory utilities {{{ + +// The compiler is too smart for its own good. We must prevent it from +// replacing this calls with PLT calls to libc strlen/memcpy/memset, +// which would crash because those GOT entries are unresolved when the bootstrap +// runs. (TODO: try harder with -fno-builtin?) +__attribute__((noinline)) +static int bs_strlen(const char *s) { + int n = 0; + while (s[n]) { __asm__ volatile("" : "+r"(n)); n++; } + return n; +} + +__attribute__((noinline)) +static int bs_strncmp(const char *a, const char *b, int n) { + for (int i = 0; i < n; i++) { + __asm__ volatile("" : "+r"(i)); + if (a[i] != b[i]) return (unsigned char)a[i] - (unsigned char)b[i]; + if (!a[i]) return 0; + } + return 0; +} + +__attribute__((noinline)) +static void bs_memcpy(void *dst, const void *src, long n) { + char *d = dst; const char *s = src; + while (n--) { __asm__ volatile("" : "+r"(d) : "r"(s)); *d++ = *s++; } +} + +__attribute__((noinline)) +static void bs_memset(void *dst, int c, long n) { + char *d = dst; + while (n--) { __asm__ volatile("" : "+r"(d)); *d++ = (char)c; } +} + +static int elf_pf_to_prot(uint32_t pf) { + int prot = 0; + if (pf & PF_R) prot |= BS_PROT_READ; + if (pf & PF_W) prot |= BS_PROT_WRITE; + if (pf & PF_X) prot |= BS_PROT_EXEC; + return prot; +} + +// }}} +// ---- Syscall convenience wrappers {{{ + +static int sys_open_rdonly(const char *path) { +#ifdef SYS_OPEN + return (int)_syscall2(SYS_OPEN, (long)path, 0 /*O_RDONLY*/); +#else + return (int)_syscall3(SYS_OPENAT, AT_FDCWD, (long)path, 0 /*O_RDONLY*/); +#endif +} + +static long sys_read(int fd, void *buf, long count) { + return _syscall3(SYS_READ, fd, (long)buf, count); +} + +static long sys_write(int fd, const void *buf, long count) { + return _syscall3(SYS_WRITE, fd, (long)buf, count); +} + +static int sys_close(int fd) { + return (int)_syscall1(SYS_CLOSE, fd); +} + +static void *sys_mmap(void *addr, long length, int prot, int flags, int fd, long offset) { + return (void *)_syscall6(SYS_MMAP, (long)addr, length, prot, flags, fd, offset); +} + +static int sys_munmap(void *addr, long length) { + return (int)_syscall2(SYS_MUNMAP, (long)addr, length); +} + +static noreturn void sys_exit_group(int status) { + _syscall1(SYS_EXIT_GROUP, status); + __builtin_unreachable(); +} + +static long sys_readlink(const char *path, char *buf, long bufsiz) { +#ifdef __x86_64__ + return _syscall3(SYS_READLINK, (long)path, (long)buf, bufsiz); +#elif defined(__aarch64__) + // aarch64 has readlinkat only + return _syscall4(SYS_READLINKAT, AT_FDCWD, (long)path, (long)buf, bufsiz); +#endif +} + +static int sys_memfd_create(const char *name, unsigned int flags) { + return (int)_syscall2(SYS_MEMFD_CREATE, (long)name, (long)flags); +} + +static long sys_sendfile(int out_fd, int in_fd, long count) { + return _syscall4(SYS_SENDFILE, out_fd, in_fd, 0L /*offset=NULL*/, count); +} + +static long sys_pwrite(int fd, const void *buf, long count, long offset) { + return _syscall4(SYS_PWRITE64, fd, (long)buf, count, offset); +} + +// }}} + +#ifdef __clang__ +# pragma clang attribute pop +#endif +#endif // __linux__ + +// vim: foldmethod=marker diff --git a/libdatadog b/libdatadog index 11d4111c934..40b06ad3ddf 160000 --- a/libdatadog +++ b/libdatadog @@ -1 +1 @@ -Subproject commit 11d4111c934d9af49d8124b8266dbbdda5857cb4 +Subproject commit 40b06ad3ddfc8042981c4f0d4723bcdc51937b42 diff --git a/loader/dd_library_loader.c b/loader/dd_library_loader.c index 18f69bb370f..5e3cdd408bb 100644 --- a/loader/dd_library_loader.c +++ b/loader/dd_library_loader.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include #include
#include diff --git a/loader/tests/functional/test_configuration_telemetry.php b/loader/tests/functional/test_configuration_telemetry.php index 6a15c892a45..f27ba9f1057 100644 --- a/loader/tests/functional/test_configuration_telemetry.php +++ b/loader/tests/functional/test_configuration_telemetry.php @@ -16,7 +16,7 @@ assertMatchesFormat($output, '%A"loaded_by_ssi":true%s%A'); // Let time to write the telemetry log -usleep(10000); +usleep(100000); $content = file_get_contents($telemetryLogPath); assertContains($content, '{"name":"instrumentation_source","value":"ssi","origin":"default","config_id":null,"seq_id":null}'); diff --git a/src/DDTrace/Integrations/Frankenphp/FrankenphpIntegration.php b/src/DDTrace/Integrations/Frankenphp/FrankenphpIntegration.php index 0c1226ead48..aa3cf391770 100644 --- a/src/DDTrace/Integrations/Frankenphp/FrankenphpIntegration.php +++ b/src/DDTrace/Integrations/Frankenphp/FrankenphpIntegration.php @@ -105,18 +105,18 @@ static function (HookData $hookData) { $res = notify_commit( $rootSpan, - \http_response_code(), + \http_response_code() ?: 200, self::convertHeaders(\headers_list()), null /* response body is available through special mechanisms */ ); // we did not block before and were now told to block - if (!$hookData->data && $res) { + if (!isset($hookData->data) && $res) { $hookData->data = new FrankenphpAppSecException(); self::commitBlockingResponse($res); } - if ($hookData->data && !$rootSpan->exception) { + if (isset($hookData->data) && !$rootSpan->exception) { $rootSpan->exception = $hookData->data; } }, diff --git a/tests/Frameworks/Symfony/Latest/.env b/tests/Frameworks/Symfony/Latest/.env index 4c4d4e3ac63..7ff1b34b186 100644 --- a/tests/Frameworks/Symfony/Latest/.env +++ b/tests/Frameworks/Symfony/Latest/.env @@ -35,3 +35,9 @@ DATABASE_URL="postgresql://app:!ChangeMe!@127.0.0.1:5432/app?serverVersion=15&ch # MESSENGER_TRANSPORT_DSN=redis://localhost:6379/messages MESSENGER_TRANSPORT_DSN=doctrine://default?auto_setup=0 ###< symfony/messenger ### + +###> symfony/routing ### +# Configure how to generate URLs in non-HTTP contexts, such as CLI commands. +# See https://symfony.com/doc/current/routing.html#generating-urls-in-commands +DEFAULT_URI=http://localhost +###< symfony/routing ###