Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions .github/scripts/aiter_prebuild_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""Summarize Aiter prebuild timing from setup.py build logs."""

from __future__ import annotations

import argparse
import glob
import os
import re
from pathlib import Path

FINISH_RE = re.compile(r"finish build \[([^\]]+)\], cost ([0-9.]+)s")


def parse_module_costs(log_path: Path) -> list[tuple[str, float]]:
module_costs: list[tuple[str, float]] = []
try:
with log_path.open(encoding="utf-8", errors="replace") as log:
for line in log:
match = FINISH_RE.search(line)
if match:
module_costs.append((match.group(1), float(match.group(2))))
except OSError as exc:
print(f"::warning::Unable to read prebuild log {log_path}: {exc}")
return module_costs


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--log", required=True, type=Path, help="Path to the tee'd prebuild log"
)
parser.add_argument(
"--build-status", required=True, type=int, help="setup.py exit code"
)
parser.add_argument(
"--start", required=True, type=int, help="Prebuild start timestamp in seconds"
)
parser.add_argument(
"--end", required=True, type=int, help="Prebuild end timestamp in seconds"
)
parser.add_argument(
"--kernel-glob",
default="aiter/jit/*.so",
help="Glob used to count prebuilt kernel shared objects",
)
return parser.parse_args()


def main() -> int:
args = parse_args()
module_costs = parse_module_costs(args.log)
kernels = sorted(glob.glob(args.kernel_glob))
wall_seconds = max(0, args.end - args.start)
total_module_seconds = sum(cost for _, cost in module_costs)

print("=== Aiter prebuild summary ===")
print(f"Runner: {os.environ.get('AITER_RUNNER_NAME', 'unknown')}")
print(f"GPU_ARCHS: {os.environ.get('GPU_ARCHS', 'unknown')}")
print(f"PREBUILD_KERNELS: {os.environ.get('PREBUILD_KERNELS', 'unknown')}")
print(f"MAX_JOBS: {os.environ.get('MAX_JOBS', 'unknown')}")
print(f"Build status: {args.build_status}")
print(f"Prebuild wall time: {wall_seconds}s ({wall_seconds / 60:.1f} min)")
print(f"Kernel count: {len(kernels)}")
print(f"Module builds observed: {len(module_costs)}")
print(
f"Total module compile cost: {total_module_seconds:.1f}s ({total_module_seconds / 60:.1f} min)"
)

if module_costs:
print("Top slowest module builds:")
for name, cost in sorted(module_costs, key=lambda item: item[1], reverse=True)[
:20
]:
print(f" {name}: {cost:.1f}s")

print("All module build costs (seconds):")
for name, cost in sorted(module_costs):
print(f" {name}: {cost:.1f}")
else:
print("::warning::No module build cost lines were found in the prebuild log")

return 0


if __name__ == "__main__":
raise SystemExit(main())
43 changes: 41 additions & 2 deletions .github/workflows/aiter-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ env:
GPU_ARCH_LIST: "gfx942;gfx950"
AITER_TEST: "op_tests"
AITER_WHEEL_ARTIFACT_NAME: aiter-whl-${{ github.run_id }}
AITER_PREBUILD_MAX_JOBS: "96"

jobs:
check-signal:
Expand Down Expand Up @@ -93,10 +94,13 @@ jobs:
set -euo pipefail
docker run --rm \
--network=host \
-e AITER_PREBUILD_MAX_JOBS="${{ env.AITER_PREBUILD_MAX_JOBS }}" \
-e AITER_RUNNER_NAME="${RUNNER_NAME:-unknown}" \
-v "${{ github.workspace }}:/workspace" \
-w /workspace \
${{ env.DOCKER_IMAGE }} \
bash -lc '
set -euo pipefail
git config --global --add safe.directory /workspace &&
git -C /workspace rev-parse HEAD >/dev/null &&
shopt -s nullglob &&
Expand All @@ -107,11 +111,46 @@ jobs:
pip install --upgrade "ninja>=1.11.1" &&
pip install --upgrade setuptools_scm &&
pip install tabulate &&
echo "Prebuilding kernels with GPU_ARCHS: ${{ env.GPU_ARCH_LIST }}, PREBUILD_KERNELS: 1, and MAX_JOBS: 128" &&
PREBUILD_KERNELS=1 MAX_JOBS=128 GPU_ARCHS="${{ env.GPU_ARCH_LIST }}" python setup.py bdist_wheel &&
echo "Prebuilding kernels with GPU_ARCHS: ${{ env.GPU_ARCH_LIST }}, PREBUILD_KERNELS: 1, and MAX_JOBS: ${AITER_PREBUILD_MAX_JOBS}" &&
export PREBUILD_KERNELS=1 &&
export MAX_JOBS="${AITER_PREBUILD_MAX_JOBS}" &&
export GPU_ARCHS="${{ env.GPU_ARCH_LIST }}" &&
prebuild_start=$(date +%s) &&
set +e &&
python setup.py bdist_wheel 2>&1 | tee .aiter-prebuild.log
build_status=${PIPESTATUS[0]}
set -e
prebuild_end=$(date +%s)
Comment on lines +119 to +123
{
echo "BUILD_STATUS=${build_status}"
echo "PREBUILD_START=${prebuild_start}"
echo "PREBUILD_END=${prebuild_end}"
} > .aiter-prebuild.env
if [ "${build_status}" -ne 0 ]; then
exit "${build_status}"
fi
ls -lh dist/*.whl
'

- name: Summarize Aiter prebuild
if: ${{ always() }}
run: |
set -euo pipefail
if [ ! -f .aiter-prebuild.env ]; then
echo "::warning::Aiter prebuild metadata was not generated"
exit 0
fi
. ./.aiter-prebuild.env
AITER_RUNNER_NAME="${RUNNER_NAME:-unknown}" \
GPU_ARCHS="${{ env.GPU_ARCH_LIST }}" \
PREBUILD_KERNELS=1 \
MAX_JOBS="${{ env.AITER_PREBUILD_MAX_JOBS }}" \
python3 .github/scripts/aiter_prebuild_summary.py \
--log .aiter-prebuild.log \
--build-status "${BUILD_STATUS}" \
--start "${PREBUILD_START}" \
--end "${PREBUILD_END}"

- name: Verify prebuilt kernels in wheel
run: |
set -euo pipefail
Expand Down
Loading