diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml index 4efb008da17..87994625c58 100644 --- a/.github/workflows/_accuracy_test.yml +++ b/.github/workflows/_accuracy_test.yml @@ -69,12 +69,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -145,7 +160,10 @@ jobs: docker rm -f ${runner_name} || true fi - docker run --rm --ipc=host --pid=host --net=host \ + docker run --rm --net=host \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -160,6 +178,7 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ @@ -204,3 +223,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml index b114bad15d4..3eb022725e5 100644 --- a/.github/workflows/_base_test.yml +++ b/.github/workflows/_base_test.yml @@ -81,7 +81,14 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' @@ -111,7 +118,11 @@ jobs: exit 1 fi - tar -xf FastDeploy.tar.gz + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -200,6 +211,7 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ @@ -294,3 +306,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml index 172f07cfd73..1431df353cb 100644 --- a/.github/workflows/_build_linux.yml +++ b/.github/workflows/_build_linux.yml @@ -125,6 +125,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -156,7 +157,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -171,6 +173,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -248,3 +251,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux_cu129.yml b/.github/workflows/_build_linux_cu129.yml index 6370268c7cb..61108a82b40 100644 --- a/.github/workflows/_build_linux_cu129.yml +++ b/.github/workflows/_build_linux_cu129.yml @@ -112,6 +112,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -143,7 +144,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -158,6 +160,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -235,3 +238,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path_cu129=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux_cu130.yml b/.github/workflows/_build_linux_cu130.yml index 278aff6956b..7c2aee69c6f 100644 --- a/.github/workflows/_build_linux_cu130.yml +++ b/.github/workflows/_build_linux_cu130.yml @@ -112,6 +112,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -143,7 +144,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache_cu130:/root/.cache" \ @@ -158,6 +160,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -235,3 +238,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path_cu130=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux_fd_router.yml b/.github/workflows/_build_linux_fd_router.yml index b600cc2328e..9e93290d509 100644 --- a/.github/workflows/_build_linux_fd_router.yml +++ b/.github/workflows/_build_linux_fd_router.yml @@ -107,6 +107,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy FD_ROUTER Build shell: bash env: @@ -137,7 +138,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -151,6 +153,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -211,3 +214,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" FD_ROUTER_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/fd-router echo "fd_router_path=${FD_ROUTER_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux_rl.yml b/.github/workflows/_build_linux_rl.yml index 9e809d59a59..1a131adb1a1 100644 --- a/.github/workflows/_build_linux_rl.yml +++ b/.github/workflows/_build_linux_rl.yml @@ -52,6 +52,7 @@ on: wheel_path_rl: description: "Output path of the generated wheel" value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }} + jobs: fd-build-rl: runs-on: [self-hosted, GPU-Build-RL] @@ -107,6 +108,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -137,7 +139,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache_rl:/root/.cache" \ @@ -151,6 +154,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -202,3 +206,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_golang_router_test.yml b/.github/workflows/_golang_router_test.yml index 4964f3a3a05..bbb1bc7799f 100644 --- a/.github/workflows/_golang_router_test.yml +++ b/.github/workflows/_golang_router_test.yml @@ -76,12 +76,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -191,6 +206,7 @@ jobs: -e "fd_router_url=${fd_router_url}" \ -e "BASE_REF=${BASE_REF}" \ -e "IS_PR=${IS_PR}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy @@ -211,3 +227,10 @@ jobs: bash scripts/run_golang_router.sh ' + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_gpu_4cards_case_test.yml b/.github/workflows/_gpu_4cards_case_test.yml index 02a16b8b93b..5c9a51aa809 100644 --- a/.github/workflows/_gpu_4cards_case_test.yml +++ b/.github/workflows/_gpu_4cards_case_test.yml @@ -81,12 +81,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -186,6 +201,7 @@ jobs: -e "fd_wheel_url=${fd_wheel_url}" \ -e "BASE_REF=${BASE_REF}" \ -e "IS_PR=${IS_PR}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy @@ -204,3 +220,10 @@ jobs: export CUDA_VISIBLE_DEVICES=0,1,2,3 bash scripts/run_gpu_4cards.sh ' + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_logprob_test_linux.yml b/.github/workflows/_logprob_test_linux.yml index 47486cef243..0a014d26854 100644 --- a/.github/workflows/_logprob_test_linux.yml +++ b/.github/workflows/_logprob_test_linux.yml @@ -78,11 +78,27 @@ jobs: if ls /workspace/* >/dev/null 2>&1; then echo "ERROR: Failed to clean /workspace/* after multiple attempts" ls -ld /workspace/* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls /workspace/* >/dev/null 2>&1; then + echo "ERROR: Force cleanup failed. Exiting..." + exit 1 + else + echo "Force cleanup succeeded." + fi fi ' - wget -q --no-proxy ${paddletest_archive_url} - tar -xf PaddleTest.tar.gz + + wget -q --no-proxy ${paddletest_archive_url} || { + echo "ERROR: Failed to download archive from ${paddletest_archive_url}" + exit 1 + } + + tar --no-same-owner -xf PaddleTest.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf PaddleTest.tar.gz cd PaddleTest git config --global user.name "FastDeployCI" @@ -171,6 +187,7 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ @@ -223,3 +240,10 @@ jobs: run: | echo "logprob test failed with exit code ${{ env.LOGPROB_EXIT_CODE }}" exit 8 + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_pre_ce_test.yml b/.github/workflows/_pre_ce_test.yml index 9e313606a36..8c5e20e2de0 100644 --- a/.github/workflows/_pre_ce_test.yml +++ b/.github/workflows/_pre_ce_test.yml @@ -83,12 +83,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -182,6 +197,7 @@ jobs: -e "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}" \ -e "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}" \ -e "fd_wheel_url=${fd_wheel_url}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy cd FastDeploy @@ -189,3 +205,10 @@ jobs: python -m pip install ${fd_wheel_url} bash scripts/run_pre_ce.sh ' + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_stable_test.yml b/.github/workflows/_stable_test.yml index dd4ce4e811d..11ae14927ef 100644 --- a/.github/workflows/_stable_test.yml +++ b/.github/workflows/_stable_test.yml @@ -81,12 +81,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -176,6 +191,7 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/ @@ -221,3 +237,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml index 75aef3e937a..4096fe4f4c0 100644 --- a/.github/workflows/_unit_test_coverage.yml +++ b/.github/workflows/_unit_test_coverage.yml @@ -86,12 +86,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -178,10 +193,12 @@ jobs: --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ --cap-add=SYS_PTRACE --cap-add=IPC_LOCK \ - --shm-size=64G \ + --shm-size=128G \ ${RDMA_DEVICES} \ --device=/dev/infiniband/rdma_cm \ --ulimit memlock=-1:-1 \ + --ulimit nofile=65536:65536 \ + --ulimit nproc=8192:8192 \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -201,6 +218,7 @@ jobs: -e "fd_wheel_url=${fd_wheel_url}" \ -e "BASE_REF=${BASE_REF}" \ -e "IS_PR=${IS_PR}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy @@ -388,6 +406,13 @@ jobs: echo "coverage passed" exit 0 + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} + diff_coverage_report: needs: run_tests_with_coverage if: always() diff --git a/scripts/run_pre_ce.sh b/scripts/run_pre_ce.sh index 8eafe280346..928aa2e7cef 100644 --- a/scripts/run_pre_ce.sh +++ b/scripts/run_pre_ce.sh @@ -7,7 +7,11 @@ python -m pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/p python -m pip install -r requirements.txt python -m pip install jsonschema aistudio_sdk==0.3.5 -python -m pip install xgrammar==0.1.19 torch==2.6.0 +# Use prebuilt wheel files to install xgrammar==0.1.19 and torch==2.6.0 specifically for the CI environment +python -m pip install \ + https://paddle-qa.bj.bcebos.com/FastDeploy/torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl \ + https://paddle-qa.bj.bcebos.com/FastDeploy/triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl \ + https://paddle-qa.bj.bcebos.com/FastDeploy/xgrammar-0.1.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl failed_files=() run_path="$DIR/../tests/ci_use/"