Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
86397a8
Changes needed to get clickhouse e2e test working with external click…
ddelnano Apr 13, 2026
f590005
Implement parquet export format
ddelnano Apr 16, 2026
3510794
Allow prometheus recorders to specifiy different kubeconfig or kubeco…
ddelnano Apr 16, 2026
5a8fb65
Fix parquet file overflow bug
ddelnano Apr 16, 2026
17188d5
Add duck db wasm visualization file
ddelnano Apr 16, 2026
63f7d5f
Temporary changes to make load testing easier
ddelnano Apr 17, 2026
839af02
Add clickhouse perf_tool suite, ability to query cross kubeconfig/kub…
ddelnano Apr 20, 2026
06a8d3a
Ensure px delete works with external k8s ApiService
ddelnano Apr 20, 2026
1f9c121
Add github workflow for perf clickhouse suite
ddelnano Apr 22, 2026
5ecab7c
Ignore non alphabetic characters in the service account json
ddelnano Apr 22, 2026
5112a10
Add tailscale debugging info for perf workflow
ddelnano Apr 22, 2026
bb80ebb
Initial sovereign_soc suite, which segfaults kelvin on first run
ddelnano Apr 22, 2026
f1302fd
Fix segfault issues, but fails with missing alerts clickhouse table
ddelnano Apr 22, 2026
cf29e2b
Add --skaffold_stderr_file to perf_tool to ease github workflow debug…
ddelnano Apr 23, 2026
026e3eb
Add x86_64_sysroot in profile
ddelnano Apr 23, 2026
6dd6107
Don't use verbose logging
ddelnano Apr 23, 2026
267ea25
Remove verbosity flag that was missed
ddelnano Apr 24, 2026
5c0eb9f
fix protocol_loadtest build
ddelnano Apr 24, 2026
d9b9adc
Install the px cli
ddelnano Apr 24, 2026
78f2853
Use correct cloud
ddelnano Apr 24, 2026
eb1abb3
Reduce test time
ddelnano Apr 24, 2026
dfcf602
Get redis-attack experiment working
ddelnano Apr 25, 2026
1d6ad69
Add perf github action for soc attack
ddelnano Apr 25, 2026
7cf848f
Don't let cronjobs fail the build
ddelnano Apr 25, 2026
296dbb0
Only attempt job once
ddelnano Apr 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions .github/workflows/perf_clickhouse.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
---
name: perf-eval-clickhouse
on:
workflow_dispatch:
inputs:
ref:
description: 'Branch or commit'
required: false
type: string
tags:
description: 'Tags (comma separated)'
required: false
type: string
permissions:
contents: read
packages: write
jobs:
get-dev-image-with-extras:
uses: ./.github/workflows/get_image.yaml
with:
image-base-name: "dev_image_with_extras"
ref: ${{ inputs.ref }}

clickhouse-export-perf:
name: ClickHouse export perf eval
needs: get-dev-image-with-extras
runs-on: oracle-vm-16cpu-64gb-x86-64
container:
image: ${{ needs.get-dev-image-with-extras.outputs.image-with-tag }}
options: --cap-add=NET_ADMIN --device=/dev/net/tun
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.ref }}
fetch-depth: 0
- name: Add pwd to git safe dir
run: git config --global --add safe.directory `pwd`
- id: get-commit-sha
run: echo "commit-sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT

# TODO(ddelnano): swap TAILSCALE_AUTH_KEY for an OAuth client once one is
# provisioned in the k8sstormcenter tailnet. Use
# `tailscale/github-action@v2` with `oauth-client-id` and `oauth-secret`
# inputs (`TS_OAUTH_CLIENT_ID` / `TS_OAUTH_CLIENT_SECRET` secrets) so
# credentials rotate automatically instead of expiring on a fixed cadence.
- name: Start Tailscale sidecar
env:
TS_AUTHKEY: ${{ secrets.TAILSCALE_AUTH_KEY }}
run: |
curl -fsSL https://tailscale.com/install.sh | sh
mkdir -p /var/run/tailscale /var/lib/tailscale
tailscaled \
--socket=/var/run/tailscale/tailscaled.sock \
--state=/var/lib/tailscale/tailscaled.state &
until tailscale status --json >/dev/null 2>&1; do sleep 1; done
tailscale up \
--authkey="${TS_AUTHKEY}" \
--accept-routes \
--hostname="pixie-perf-ci-${GITHUB_RUN_ID}"

- name: Write kubeconfig
env:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
run: |
mkdir -p "${RUNNER_TEMP}"
echo "${KUBECONFIG_B64}" | base64 -d > "${RUNNER_TEMP}/kubeconfig"
chmod 600 "${RUNNER_TEMP}/kubeconfig"

# Fail fast if Tailscale can't reach the cluster API, before the 2+ minute
# bazel/skaffold build wastes time.
- name: Tailscale connectivity probe
env:
KUBECONFIG: ${{ runner.temp }}/kubeconfig
run: |
tailscale status
tailscale netcheck
api_host="$(kubectl --kubeconfig="$KUBECONFIG" config view --minify -o jsonpath='{.clusters[0].cluster.server}' | sed -E 's|https?://||; s|/.*||')"
api_ip="${api_host%%:*}"
api_port="${api_host##*:}"
echo "--- tailscale ping ${api_ip} ---"
tailscale ping --c 3 --until-direct=false "${api_ip}" || true
echo "--- tcp probe ${api_ip}:${api_port} ---"
timeout 5 bash -c "</dev/tcp/${api_ip}/${api_port}" \
&& echo "API port reachable" \
|| { echo "API port UNREACHABLE"; exit 1; }
echo "--- kubectl get nodes ---"
kubectl --kubeconfig="$KUBECONFIG" get nodes

- name: Use github bazel config
uses: ./.github/actions/bazelrc
with:
download_toplevel: 'true'
BB_API_KEY: ${{ secrets.BB_IO_API_KEY }}

# TODO(ddelnano): revert to `./.github/actions/gcloud_creds` once GCP_SA_KEY
# is re-uploaded with `base64 -w0`. The shared composite uses plain
# `base64 --decode` which rejects the wrapped (multi-line/CRLF) value
# currently stored in the secret.
- id: gcloud-creds
env:
SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SA_KEY }}
run: |
printf '%s' "$SERVICE_ACCOUNT_KEY" | base64 -di > /tmp/gcloud.json
chmod 600 /tmp/gcloud.json
echo "gcloud-creds=/tmp/gcloud.json" >> $GITHUB_OUTPUT
- name: Activate gcloud service account
env:
GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.gcloud-creds.outputs.gcloud-creds }}
run: |
service_account="$(jq -r '.client_email' "$GOOGLE_APPLICATION_CREDENTIALS")"
gcloud auth activate-service-account "${service_account}" --key-file="$GOOGLE_APPLICATION_CREDENTIALS"
gcloud auth configure-docker

- name: Log in to GHCR
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: echo "${GH_TOKEN}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin

- name: Build and install px CLI
run: |
bazel build --config=x86_64_sysroot //src/pixie_cli:px
install -m 0755 bazel-bin/src/pixie_cli/px_/px /usr/local/bin/px
px version

- name: Run clickhouse-export perf
env:
PX_API_KEY: ${{ secrets.PX_API_KEY }}
GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.gcloud-creds.outputs.gcloud-creds }}
KUBECONFIG: ${{ runner.temp }}/kubeconfig
run: |
bazel run //src/e2e_test/perf_tool:perf_tool -- run \
--api_key="${PX_API_KEY}" \
--cloud_addr=pixie.austrianopencloudcommunity.org:443 \
--commit_sha="${{ steps.get-commit-sha.outputs.commit-sha }}" \
--experiment_name=clickhouse-export \
--suite=clickhouse-exec \
--use_local_cluster \
--export_backend=parquet-gcs \
--gcs_bucket=k8sstormcenter-soc-perf \
--container_repo=ghcr.io/k8sstormcenter \
--prom_recorder_override 'clickhouse-operator=:k8ss-forensic' \
--tags "${{ inputs.tags }}"

- name: Upload skaffold stderr log
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: skaffold-stderr-${{ github.run_id }}-${{ github.run_attempt }}
path: ${{ runner.temp }}/skaffold-stderr.log
if-no-files-found: ignore

- name: Deactivate gcloud service account
if: always()
run: gcloud auth revoke || true

- name: Tailscale logout
if: always()
run: tailscale logout || true
158 changes: 158 additions & 0 deletions .github/workflows/perf_soc_attack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
---
name: perf-eval-soc-attack
on:
workflow_dispatch:
inputs:
ref:
description: 'Branch or commit'
required: false
type: string
tags:
description: 'Tags (comma separated)'
required: false
type: string
permissions:
contents: read
packages: write
jobs:
get-dev-image-with-extras:
uses: ./.github/workflows/get_image.yaml
with:
image-base-name: "dev_image_with_extras"
ref: ${{ inputs.ref }}

soc-attack-perf:
name: Sovereign SOC redis-attack perf eval
needs: get-dev-image-with-extras
runs-on: oracle-vm-16cpu-64gb-x86-64
container:
image: ${{ needs.get-dev-image-with-extras.outputs.image-with-tag }}
options: --cap-add=NET_ADMIN --device=/dev/net/tun
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.ref }}
fetch-depth: 0
- name: Add pwd to git safe dir
run: git config --global --add safe.directory `pwd`
- id: get-commit-sha
run: echo "commit-sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT

# TODO(ddelnano): swap TAILSCALE_AUTH_KEY for an OAuth client once one is
# provisioned in the k8sstormcenter tailnet. Use
# `tailscale/github-action@v2` with `oauth-client-id` and `oauth-secret`
# inputs (`TS_OAUTH_CLIENT_ID` / `TS_OAUTH_CLIENT_SECRET` secrets) so
# credentials rotate automatically instead of expiring on a fixed cadence.
- name: Start Tailscale sidecar
env:
TS_AUTHKEY: ${{ secrets.TAILSCALE_AUTH_KEY }}
run: |
curl -fsSL https://tailscale.com/install.sh | sh
mkdir -p /var/run/tailscale /var/lib/tailscale
tailscaled \
--socket=/var/run/tailscale/tailscaled.sock \
--state=/var/lib/tailscale/tailscaled.state &
until tailscale status --json >/dev/null 2>&1; do sleep 1; done
tailscale up \
--authkey="${TS_AUTHKEY}" \
--accept-routes \
--hostname="pixie-perf-ci-${GITHUB_RUN_ID}"

- name: Write kubeconfig
env:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
run: |
mkdir -p "${RUNNER_TEMP}"
echo "${KUBECONFIG_B64}" | base64 -d > "${RUNNER_TEMP}/kubeconfig"
chmod 600 "${RUNNER_TEMP}/kubeconfig"

# Fail fast if Tailscale can't reach the cluster API, before the 2+ minute
# bazel/skaffold build wastes time.
- name: Tailscale connectivity probe
env:
KUBECONFIG: ${{ runner.temp }}/kubeconfig
run: |
tailscale status
tailscale netcheck
api_host="$(kubectl --kubeconfig="$KUBECONFIG" config view --minify -o jsonpath='{.clusters[0].cluster.server}' | sed -E 's|https?://||; s|/.*||')"
api_ip="${api_host%%:*}"
api_port="${api_host##*:}"
echo "--- tailscale ping ${api_ip} ---"
tailscale ping --c 3 --until-direct=false "${api_ip}" || true
echo "--- tcp probe ${api_ip}:${api_port} ---"
timeout 5 bash -c "</dev/tcp/${api_ip}/${api_port}" \
&& echo "API port reachable" \
|| { echo "API port UNREACHABLE"; exit 1; }
echo "--- kubectl get nodes ---"
kubectl --kubeconfig="$KUBECONFIG" get nodes

- name: Use github bazel config
uses: ./.github/actions/bazelrc
with:
download_toplevel: 'true'
BB_API_KEY: ${{ secrets.BB_IO_API_KEY }}

# TODO(ddelnano): revert to `./.github/actions/gcloud_creds` once GCP_SA_KEY
# is re-uploaded with `base64 -w0`. The shared composite uses plain
# `base64 --decode` which rejects the wrapped (multi-line/CRLF) value
# currently stored in the secret.
- id: gcloud-creds
env:
SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SA_KEY }}
run: |
printf '%s' "$SERVICE_ACCOUNT_KEY" | base64 -di > /tmp/gcloud.json
chmod 600 /tmp/gcloud.json
echo "gcloud-creds=/tmp/gcloud.json" >> $GITHUB_OUTPUT
- name: Activate gcloud service account
env:
GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.gcloud-creds.outputs.gcloud-creds }}
run: |
service_account="$(jq -r '.client_email' "$GOOGLE_APPLICATION_CREDENTIALS")"
gcloud auth activate-service-account "${service_account}" --key-file="$GOOGLE_APPLICATION_CREDENTIALS"
gcloud auth configure-docker

- name: Log in to GHCR
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: echo "${GH_TOKEN}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin

- name: Build and install px CLI
run: |
bazel build --config=x86_64_sysroot //src/pixie_cli:px
install -m 0755 bazel-bin/src/pixie_cli/px_/px /usr/local/bin/px
px version

# The sovereign-soc suite installs Kubescape + Vector on the experiment
# cluster as part of the run (see KubescapeVectorWorkload). The
# kubescape-operator chart is pre-rendered under
# src/e2e_test/perf_tool/pkg/suites/k8s/sovereign-soc/helm-rendered/
# and applied via PrerenderedDeploy, so no extra ./scripts step is needed.
#
# ClickHouse operator metrics are scraped on the forensic cluster via
# the prom_recorder_override; the kubescape node-agent prom recorder
# is intentionally NOT overridden — kubescape runs on the experiment
# cluster (where redis+bobctl drive traffic), so the recorder uses the
# default kubeconfig.
- name: Run sovereign-soc redis-attack perf
env:
PX_API_KEY: ${{ secrets.PX_API_KEY }}
GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.gcloud-creds.outputs.gcloud-creds }}
KUBECONFIG: ${{ runner.temp }}/kubeconfig
run: |
bazel run //src/e2e_test/perf_tool:perf_tool -- run \
--api_key="${PX_API_KEY}" \
--cloud_addr=pixie.austrianopencloudcommunity.org:443 \
--commit_sha="${{ steps.get-commit-sha.outputs.commit-sha }}" \
--experiment_name=redis-attack \
--suite=sovereign-soc \
--use_local_cluster \
--export_backend=parquet-gcs \
--gcs_bucket=k8sstormcenter-soc-perf \
--container_repo=ghcr.io/k8sstormcenter \
--prom_recorder_override 'clickhouse-operator=:k8ss-forensic' \
--max_retries=1
--tags "${{ inputs.tags }}"

- name: Tailscale logout
if: always()
run: tailscale logout || true
11 changes: 8 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ require (
github.com/ory/dockertest/v3 v3.8.1
github.com/ory/hydra-client-go v1.9.2
github.com/ory/kratos-client-go v0.10.1
github.com/parquet-go/parquet-go v0.25.1
github.com/phayes/freeport v0.0.0-20171002181615-b8543db493a5
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/client_model v0.3.0
Expand Down Expand Up @@ -115,6 +116,7 @@ require (
github.com/VividCortex/ewma v1.1.1 // indirect
github.com/a8m/envsubst v1.3.0 // indirect
github.com/alecthomas/participle/v2 v2.0.0-beta.5 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/cascadia v1.1.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
Expand Down Expand Up @@ -171,7 +173,7 @@ require (
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/gax-go/v2 v2.7.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
Expand All @@ -191,7 +193,7 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/jstemmer/go-junit-report v0.9.1 // indirect
github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kr/pretty v0.2.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
Expand Down Expand Up @@ -232,6 +234,7 @@ require (
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
github.com/pelletier/go-toml v1.9.3 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
Expand Down Expand Up @@ -276,7 +279,7 @@ require (
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.29.1 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/launchdarkly/go-jsonstream.v1 v1.0.1 // indirect
Expand Down Expand Up @@ -317,3 +320,5 @@ replace (
google.golang.org/grpc => google.golang.org/grpc v1.43.0
gopkg.in/yaml.v2 => gopkg.in/yaml.v2 v2.4.0
)

replace google.golang.org/protobuf => google.golang.org/protobuf v1.29.1
Loading
Loading