Skip to content

refactor(tests): split suites by execution level and speed up CI #5949

refactor(tests): split suites by execution level and speed up CI

refactor(tests): split suites by execution level and speed up CI #5949

Workflow file for this run

on:
workflow_dispatch:
push:
branches:
- "main"
tags:
- "v*"
merge_group:
pull_request:
branches:
- "*"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv and set Python version
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
with:
version: "0.11.2"
python-version: "3.13"
enable-cache: true
- name: Install dependencies
run: uv sync --locked
- name: Run Ruff
run: uv run --frozen ruff check .
type-checking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv and set Python version
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
with:
version: "0.11.2"
python-version: "3.13"
enable-cache: true
- uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5
name: Cache mypy cache
with:
path: ./.mypy_cache
key: mypy-${{ hashFiles('**/*.py', 'pyproject.toml') }}
restore-keys: |
mypy-
- name: Install dependencies
run: uv sync --locked
- name: Run mypy type checking
run: uv run --frozen mypy langfuse --no-error-summary
unit-tests:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
LANGFUSE_BASE_URL: "http://localhost:3000"
LANGFUSE_PUBLIC_KEY: "pk-lf-test"
LANGFUSE_SECRET_KEY: "sk-lf-test"
OPENAI_API_KEY: "test-openai-key"
strategy:
fail-fast: false
matrix:
python-version:
- "3.10"
- "3.11"
- "3.12"
- "3.13"
- "3.14"
name: Unit tests on Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv and set Python version
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
with:
version: "0.11.2"
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Check Python version
run: python --version
- name: Install the project dependencies
run: uv sync --locked
- name: Run the automated tests
run: |
python --version
uv run --frozen pytest -n auto --dist worksteal -s -v --log-cli-level=INFO tests/unit
e2e-tests:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
include:
- suite: e2e
job_name: E2E shard 1 tests on Python 3.13
shard_name: shard-1
shard_index: 0
shard_count: 2
- suite: e2e
job_name: E2E shard 2 tests on Python 3.13
shard_name: shard-2
shard_index: 1
shard_count: 2
- suite: live_provider
job_name: E2E live-provider tests on Python 3.13
shard_name: live-provider
env:
LANGFUSE_BASE_URL: "http://localhost:3000"
LANGFUSE_PUBLIC_KEY: "pk-lf-1234567890"
LANGFUSE_SECRET_KEY: "sk-lf-1234567890"
LANGFUSE_INIT_ORG_ID: "0c6c96f4-0ca0-4f16-92a8-6dd7d7c6a501"
LANGFUSE_INIT_ORG_NAME: "SDK Test Org"
LANGFUSE_INIT_PROJECT_ID: "7a88fb47-b4e2-43b8-a06c-a5ce950dc53a"
LANGFUSE_INIT_PROJECT_NAME: "SDK Test Project"
LANGFUSE_INIT_PROJECT_PUBLIC_KEY: "pk-lf-1234567890"
LANGFUSE_INIT_PROJECT_SECRET_KEY: "sk-lf-1234567890"
LANGFUSE_INIT_USER_EMAIL: "sdk-tests@langfuse.local"
LANGFUSE_INIT_USER_NAME: "SDK Tests"
LANGFUSE_INIT_USER_PASSWORD: "langfuse-ci-password"
LANGFUSE_E2E_READ_TIMEOUT_SECONDS: "60"
LANGFUSE_E2E_READ_INTERVAL_SECONDS: "0.5"
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# SERPAPI_API_KEY: ${{ secrets.SERPAPI_API_KEY }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
name: ${{ matrix.job_name }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Install uv and set Python version
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8
with:
version: "0.11.2"
python-version: "3.13"
enable-cache: true
- name: Install the project dependencies
run: uv sync --locked
- name: Check uv Python version
run: uv run --frozen python --version
- name: Prepare langfuse server compose
run: |
mkdir -p ./langfuse-server
LANGFUSE_SERVER_SHA="$(git ls-remote https://github.com/langfuse/langfuse.git HEAD | cut -f1)"
curl -fsSL "https://raw.githubusercontent.com/langfuse/langfuse/${LANGFUSE_SERVER_SHA}/docker-compose.yml" \
-o ./langfuse-server/docker-compose.yml
echo "${LANGFUSE_SERVER_SHA}"
- name: Run langfuse server
run: |
cd ./langfuse-server
echo "::group::Start langfuse server"
TELEMETRY_ENABLED=false \
NEXT_PUBLIC_LANGFUSE_RUN_NEXT_INIT=true \
LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT=http://localhost:9090 \
LANGFUSE_INGESTION_QUEUE_DELAY_MS=10 \
LANGFUSE_INGESTION_CLICKHOUSE_WRITE_INTERVAL_MS=10 \
LANGFUSE_EXPERIMENT_INSERT_INTO_EVENTS_TABLE=true \
QUEUE_CONSUMER_EVENT_PROPAGATION_QUEUE_IS_ENABLED=true \
LANGFUSE_ENABLE_EVENTS_TABLE_V2_APIS=true \
LANGFUSE_ENABLE_EVENTS_TABLE_OBSERVATIONS=true \
docker compose up -d
echo "::endgroup::"
- name: Health check for langfuse server
run: |
echo "Checking if the langfuse server is up..."
retry_count=0
max_retries=20
until curl --output /dev/null --silent --head --fail http://localhost:3000/api/public/health && \
uv run --frozen python -c "from langfuse import Langfuse; client = Langfuse(); project_id = client._get_project_id(); assert project_id == '7a88fb47-b4e2-43b8-a06c-a5ce950dc53a', project_id; print(project_id)"
do
retry_count=`expr $retry_count + 1`
echo "Attempt $retry_count of $max_retries..."
if [ $retry_count -ge $max_retries ]; then
echo "Langfuse server did not respond in time. Printing logs..."
(cd ./langfuse-server && docker compose ps)
(cd ./langfuse-server && docker compose logs langfuse-web langfuse-worker)
echo "Failing the step..."
exit 1
fi
sleep 5
done
echo "Langfuse server is up and running!"
- name: Select e2e shard files
if: ${{ matrix.suite == 'e2e' }}
run: |
uv run --frozen python scripts/select_e2e_shard.py \
--shard-index ${{ matrix.shard_index }} \
--shard-count ${{ matrix.shard_count }} \
--json
uv run --frozen python scripts/select_e2e_shard.py \
--shard-index ${{ matrix.shard_index }} \
--shard-count ${{ matrix.shard_count }} \
> "$RUNNER_TEMP/e2e-shard-files.txt"
cat "$RUNNER_TEMP/e2e-shard-files.txt"
- name: Run the parallel end-to-end tests
if: ${{ matrix.suite == 'e2e' }}
run: |
uv run --frozen python --version
mapfile -t e2e_files < "$RUNNER_TEMP/e2e-shard-files.txt"
set +e
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO "${e2e_files[@]}" -m "not serial_e2e"
status=$?
set -e
if [ "$status" -eq 5 ]; then
echo "No parallel e2e tests selected for this shard."
elif [ "$status" -ne 0 ]; then
exit "$status"
fi
- name: Run serial end-to-end tests
if: ${{ matrix.suite == 'e2e' }}
run: |
mapfile -t e2e_files < "$RUNNER_TEMP/e2e-shard-files.txt"
set +e
uv run --frozen pytest -s -v --log-cli-level=INFO "${e2e_files[@]}" -m "serial_e2e"
status=$?
set -e
if [ "$status" -eq 5 ]; then
echo "No serial e2e tests selected for this shard."
elif [ "$status" -ne 0 ]; then
exit "$status"
fi
- name: Run live-provider tests
if: ${{ matrix.suite == 'live_provider' }}
run: |
uv run --frozen python --version
uv run --frozen pytest -n 4 --dist worksteal -s -v --log-cli-level=INFO tests/live_provider -m "live_provider"
all-tests-passed:
# This allows us to have a branch protection rule for tests and deploys with matrix
runs-on: ubuntu-latest
needs: [unit-tests, e2e-tests, linting, type-checking]
if: always()
steps:
- name: Successful deploy
if: ${{ !(contains(needs.*.result, 'failure')) }}
run: exit 0
- name: Failing deploy
if: ${{ contains(needs.*.result, 'failure') }}
run: exit 1