Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions .github/workflows/_accuracy_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'

wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}

tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}

rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
Expand Down Expand Up @@ -145,7 +160,10 @@ jobs:
docker rm -f ${runner_name} || true
fi

docker run --rm --ipc=host --pid=host --net=host \
docker run --rm --net=host \
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

大小写不一致

此处的 --shm-size=64g 使用了小写 g,而其他 workflow 文件(如 _unit_test_coverage.yml)使用大写 G。虽然 Docker 对大小写不敏感,但建议保持一致。

--shm-size=64g \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
Expand All @@ -160,6 +178,7 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/

Expand Down Expand Up @@ -204,3 +223,10 @@ jobs:
fi
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
exit ${TEST_EXIT_CODE}

- name: Terminate and delete the container
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug 容器清理逻辑存在漏洞

当容器已停止时(例如测试失败导致容器退出),docker exec -t ${{ runner.name }} 会失败,导致 workspace 内容不会被清理,但容器仍会被 docker rm -f 删除。

建议修改为:

- name: Terminate and delete the container
  if: always()
  run: |
    set +e
    # 清理 workspace(如果容器仍在运行)
    docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' 2>/dev/null || true
    # 强制删除容器
    docker rm -f ${{ runner.name }}
    # 如果容器已停止但 workspace 残留,直接清理宿主机上的 workspace
    find $(pwd) -mindepth 1 -maxdepth 1 -delete 2>/dev/null || true

if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
23 changes: 21 additions & 2 deletions .github/workflows/_base_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,14 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
Expand Down Expand Up @@ -111,7 +118,11 @@ jobs:
exit 1
fi
tar -xf FastDeploy.tar.gz
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
Expand Down Expand Up @@ -200,6 +211,7 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
Expand Down Expand Up @@ -294,3 +306,10 @@ jobs:
fi
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
exit ${TEST_EXIT_CODE}
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
12 changes: 11 additions & 1 deletion .github/workflows/_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline
- name: FastDeploy Build
shell: bash
env:
Expand Down Expand Up @@ -156,7 +157,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
Expand All @@ -171,6 +173,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
Expand Down Expand Up @@ -248,3 +251,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
12 changes: 11 additions & 1 deletion .github/workflows/_build_linux_cu129.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline

- name: FastDeploy Build
shell: bash
env:
Expand Down Expand Up @@ -143,7 +144,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
Expand All @@ -158,6 +160,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
Expand Down Expand Up @@ -235,3 +238,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path_cu129=${WHEEL_PATH}" >> $GITHUB_OUTPUT

- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
12 changes: 11 additions & 1 deletion .github/workflows/_build_linux_cu130.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline

- name: FastDeploy Build
shell: bash
env:
Expand Down Expand Up @@ -143,7 +144,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache_cu130:/root/.cache" \
Expand All @@ -158,6 +160,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
Expand Down Expand Up @@ -235,3 +238,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path_cu130=${WHEEL_PATH}" >> $GITHUB_OUTPUT

- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
12 changes: 11 additions & 1 deletion .github/workflows/_build_linux_fd_router.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline

- name: FastDeploy FD_ROUTER Build
shell: bash
env:
Expand Down Expand Up @@ -137,7 +138,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
Expand All @@ -151,6 +153,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
Expand Down Expand Up @@ -211,3 +214,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
FD_ROUTER_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/fd-router
echo "fd_router_path=${FD_ROUTER_PATH}" >> $GITHUB_OUTPUT

- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
13 changes: 12 additions & 1 deletion .github/workflows/_build_linux_rl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ on:
wheel_path_rl:
description: "Output path of the generated wheel"
value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }}

jobs:
fd-build-rl:
runs-on: [self-hosted, GPU-Build-RL]
Expand Down Expand Up @@ -107,6 +108,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline

- name: FastDeploy Build
shell: bash
env:
Expand Down Expand Up @@ -137,7 +139,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache_rl:/root/.cache" \
Expand All @@ -151,6 +154,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
Expand Down Expand Up @@ -202,3 +206,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT

- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
29 changes: 26 additions & 3 deletions .github/workflows/_golang_router_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
Expand Down Expand Up @@ -191,6 +206,7 @@ jobs:
-e "fd_router_url=${fd_router_url}" \
-e "BASE_REF=${BASE_REF}" \
-e "IS_PR=${IS_PR}" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
git config --global --add safe.directory /workspace/FastDeploy
Expand All @@ -211,3 +227,10 @@ jobs:
bash scripts/run_golang_router.sh
'
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
Loading