Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,30 @@ include:
- local: ".gitlab/macrobenchmarks.yml"
- local: ".gitlab/exploration-tests.yml"
- local: ".gitlab/ci-visibility-tests.yml"
- project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
file: '.gitlab/ci-java-spring-petclinic.yml'
ref: 'main'
- project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
file: '.gitlab/ci-java-insecure-bank.yml'
ref: 'main'
- project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
file: '.gitlab/ci-java-dacapo.yml'
ref: 'main'

stages:
- build
- publish
- shared-pipeline
- benchmarks
- infrastructure
- java-spring-petclinic-tests
- java-spring-petclinic-macrobenchmarks
- java-startup-microbenchmarks
- java-load-microbenchmarks
- java-dacapo-microbenchmarks
- benchmark-comparison
- generate-slos
- upload-to-bp-api
- macrobenchmarks
- tests
- exploration-tests
Expand Down
177 changes: 69 additions & 108 deletions .gitlab/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -1,111 +1,4 @@
.benchmarks:
stage: benchmarks
timeout: 1h
tags: ["runner:apm-k8s-tweaked-metal"]
image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-java-benchmarks
needs: [ "build", "publish-artifacts-to-s3" ]
rules:
- if: '$POPULATE_CACHE'
when: never
- if: '$CI_COMMIT_TAG =~ /^v?[0-9]+\.[0-9]+\.[0-9]+$/'
when: manual
allow_failure: true
- if: '$CI_COMMIT_BRANCH == "master"'
when: on_success
interruptible: false
- when: on_success
interruptible: true
script:
- export ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${ARTIFACTS_DIR}"
- git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/"
- git clone --branch dd-trace-java/tracer-benchmarks-parallel https://github.com/DataDog/benchmarking-platform.git /platform && cd /platform
artifacts:
name: "reports"
paths:
- reports/
expire_in: 3 months
variables:
UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java"
UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.

benchmarks-startup:
extends: .benchmarks
script:
- !reference [ .benchmarks, script ]
- ./steps/capture-hardware-software-info.sh
- ./steps/run-benchmarks.sh startup
- ./steps/analyze-results.sh startup

benchmarks-load:
extends: .benchmarks
script:
- !reference [ .benchmarks, script ]
- ./steps/capture-hardware-software-info.sh
- ./steps/run-benchmarks.sh load
- ./steps/analyze-results.sh load

benchmarks-dacapo:
extends: .benchmarks
script:
- !reference [ .benchmarks, script ]
- ./steps/capture-hardware-software-info.sh
- ./steps/run-benchmarks.sh dacapo
- ./steps/analyze-results.sh dacapo

benchmarks-post-results:
extends: .benchmarks
tags: ["arch:amd64"]
script:
- !reference [ .benchmarks, script ]
- ./steps/upload-results-to-s3.sh
- ./steps/post-pr-comment.sh
needs:
- job: benchmarks-startup
artifacts: true
- job: benchmarks-load
artifacts: true
- job: benchmarks-dacapo
artifacts: true

check-big-regressions:
extends: .benchmarks
needs:
- job: benchmarks-startup
artifacts: true
- job: benchmarks-dacapo
artifacts: true
when: on_success
tags: ["arch:amd64"]
rules:
- if: '$POPULATE_CACHE'
when: never
- if: '$CI_COMMIT_BRANCH =~ /backport-pr-/'
when: never
- if: '$CI_COMMIT_BRANCH !~ /^(master|release\/)/'
when: on_success
- when: never
# ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
# need to convert them
script:
- !reference [ .benchmarks, script ]
- |
for benchmarkType in startup dacapo; do
find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}"
prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
prefix="${prefix#./}" # Remove any leading ./
prefix="${prefix//\//-}" # Replace / with -
case "$file" in
*benchmark-baseline.json) type="baseline" ;;
*benchmark-candidate.json) type="candidate" ;;
esac
echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json"
cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
done
done
- bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
# Insert more benchmark logic here

.dsm-kafka-benchmarks:
stage: benchmarks
Expand Down Expand Up @@ -187,3 +80,71 @@ debugger-benchmarks:
UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.
FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"

.benchmark-compare-template:
stage: benchmark-comparison
image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:6845f3c7
Comment thread
sarahchen6 marked this conversation as resolved.
Outdated
tags: ["arch:amd64"]
interruptible: true
rules:
- if: '$POPULATE_CACHE'
when: never
- when: on_success
script:
- mkdir -p "$(pwd)/reports/${BENCHMARK_TYPE}"
- .gitlab/scripts/get-baseline-commit-info.sh "$(pwd)/reports/baseline-info.env" "$(pwd)/reports/${BENCHMARK_TYPE}/fallback_to_master.txt"
- .gitlab/scripts/benchmark-compare.sh "${BENCHMARK_TYPE}"
artifacts:
name: "benchmark-compare-${BENCHMARK_TYPE}"
when: always
paths:
- reports/${BENCHMARK_TYPE}/
expire_in: 3 months
variables:
TARGET_BRANCH: "master"

benchmarks-compare-startup:
extends: .benchmark-compare-template
variables:
BENCHMARK_TYPE: startup

benchmarks-compare-load:
extends: .benchmark-compare-template
variables:
BENCHMARK_TYPE: load

benchmarks-compare-dacapo:
extends: .benchmark-compare-template
variables:
BENCHMARK_TYPE: dacapo

benchmarks-post-pr-comment:
stage: benchmark-comparison
image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1
tags: ["arch:amd64"]
interruptible: true
needs:
- job: benchmarks-compare-startup
artifacts: true
- job: benchmarks-compare-load
artifacts: true
- job: benchmarks-compare-dacapo
artifacts: true
rules:
- if: '$POPULATE_CACHE'
when: never
- when: always
id_tokens:
DDOCTOSTS_ID_TOKEN:
aud: dd-octo-sts
before_script:
- dd-octo-sts token --scope DataDog/${UPSTREAM_PROJECT_NAME} --policy "${DDOCTOSTS_POLICY}" > github-token.txt
- export GITHUB_TOKEN="$(cat github-token.txt)"
script:
- .gitlab/scripts/post-benchmark-pr-comment.sh
after_script:
- dd-octo-sts revoke -t "$(cat github-token.txt)" || true
variables:
DDOCTOSTS_POLICY: "self.gitlab.github-access"
UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME
UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME
6 changes: 0 additions & 6 deletions .gitlab/macrobenchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,6 @@ check-slo-breaches:
artifacts: true
- job: otel-latest
artifacts: true
- job: benchmarks-startup
artifacts: true
- job: benchmarks-load
artifacts: true
- job: benchmarks-dacapo
artifacts: true
script:
# macrobenchmarks are located here, files are already in "converted" format
- export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}"
Expand Down
96 changes: 96 additions & 0 deletions .gitlab/scripts/append-dacapo-report
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3

import json
import os
import sys
from io import StringIO

import numpy as np

from python_utils import compute_confidence_interval, import_benchmark_values, round_value

NO_AGENT_VARIANT = os.getenv("NO_AGENT_VARIANT")
CI_INTERVAL = 0.99
UOM = "µs"
CANDIDATE = os.getenv("CANDIDATE_VERSION")
BASELINE = os.getenv("BASELINE_VERSION")

input_folder = sys.argv[1]


def _parse_benchmarks(folder: str, build: str) -> list[float]:
with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader:
benchmark_json = json.loads(reader.read())
return import_benchmark_values(list(benchmark_json["benchmarks"])[0], "execution_time", UOM)


def _sort_variants(item: str) -> str:
if item == NO_AGENT_VARIANT:
return ""
return item


def _build_application_results(folder: str) -> str:
application = os.path.basename(folder)
chart = StringIO()
tables = {
"baseline": StringIO(),
"candidate": StringIO(),
}
values = {
"baseline": dict(),
"candidate": dict(),
}
for variant_folder in [f.path for f in os.scandir(application_folder) if f.is_dir()]:
variant = os.path.basename(variant_folder)
values["candidate"][variant] = _parse_benchmarks(variant_folder, "candidate")
values["baseline"][variant] = _parse_benchmarks(variant_folder, "baseline")
no_agent_means = {
"baseline": float(np.mean(values["baseline"][NO_AGENT_VARIANT])),
"candidate": float(np.mean(values["candidate"][NO_AGENT_VARIANT])),
}
variants = sorted(values["candidate"].keys(), key=_sort_variants)
chart.write(
f"""
```mermaid
gantt
title {application} - execution time [CI {CI_INTERVAL}] : candidate={CANDIDATE}, baseline={BASELINE}
dateFormat X
axisFormat %s
"""
)

for build in ("baseline", "candidate"):
table = tables[build]
table.write(f"\n|Variant|Execution Time [CI {CI_INTERVAL}]|Δ {NO_AGENT_VARIANT}|\n")
table.write("|---|---|---|\n")
build_values = values[build]
no_agent_mean = no_agent_means[build]
chart.write(f"section {build}\n")
for variant in variants:
variant_values = build_values[variant]
mean = float(np.mean(variant_values))
lower, upper = compute_confidence_interval(variant_values, CI_INTERVAL)
overhead = mean - no_agent_mean
overhead_pct = overhead * 100 / no_agent_mean
chart.write(f"{variant} ({round_value(mean, UOM)}) : {round(lower)}, {round(upper)}\n")
chart.write(f". : milestone, {round(mean)},\n")
table.write(
f"|{variant}|{round_value(mean, UOM)} [{round_value(lower, UOM)}, {round_value(upper, UOM)}]"
f"|{'-' if variant == NO_AGENT_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n"
)

chart.write("```\n")

result = StringIO()
result.write(f"\n\n<details><summary>Execution time for {application}</summary>\n")
result.write(chart.getvalue())
for build, table in tables.items():
result.write(f"\n* **{build}** results\n")
result.write(table.getvalue())
result.write("\n</details>\n")
return result.getvalue()


for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]:
print(_build_application_results(application_folder))
Loading