Skip to content

Commit 07f8672

Browse files
EmmaQiaoChchzblych
andauthored
[TRTLLM-9132][infra] Update to ignore failure for release check and building images (#9871)
Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: Emma Qiao <qqiao@nvidia.com> Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
1 parent 48e283e commit 07f8672

2 files changed

Lines changed: 69 additions & 48 deletions

File tree

jenkins/BuildDockerImage.groovy

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -706,39 +706,41 @@ pipeline {
706706
}
707707
steps {
708708
script {
709-
container("python3") {
710-
trtllm_utils.llmExecStepWithRetry(this, script: "pip3 install --upgrade pip")
711-
trtllm_utils.llmExecStepWithRetry(this, script: "pip3 install --upgrade requests")
712-
def nspect_commit = "4cb9c0c42d44ebeeba1e40d2c3eb6aab6fb90173"
713-
def override_commit = env."NSPECT_OVERRIDE_${nspect_commit}"
714-
if (override_commit) {
715-
echo "Overriding nspect_commit with value from environment variable \$NSPECT_OVERRIDE_${nspect_commit}: ${override_commit}"
709+
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
710+
container("python3") {
711+
trtllm_utils.llmExecStepWithRetry(this, script: "pip3 install --upgrade pip")
712+
trtllm_utils.llmExecStepWithRetry(this, script: "pip3 install --upgrade requests")
713+
def nspect_commit = "4cb9c0c42d44ebeeba1e40d2c3eb6aab6fb90173"
714+
def override_commit = env."NSPECT_OVERRIDE_${nspect_commit}"
715+
if (override_commit) {
716+
echo "Overriding nspect_commit with value from environment variable \$NSPECT_OVERRIDE_${nspect_commit}: ${override_commit}"
716717
nspect_commit = override_commit
717-
}
718-
withCredentials([string(credentialsId: "TRTLLM_NSPECT_REPO", variable: "NSPECT_REPO")]) {
719-
trtllm_utils.checkoutSource("${NSPECT_REPO}", nspect_commit, "nspect")
720-
}
721-
def nspect_env = params.nspect_env ? params.nspect_env : "prod"
722-
def program_version_name = params.program_version_name ? params.program_version_name : "PostMerge"
723-
def cmd = """./nspect/nspect.py \
724-
--env ${nspect_env} \
725-
--nspect_id ${params.nspect_id} \
726-
--program_version_name '${program_version_name}' \
727-
"""
728-
if (params.register_images) {
729-
cmd += "--register "
730-
}
731-
if (params.osrb_ticket) {
732-
cmd += "--osrb_ticket ${params.osrb_ticket} "
733-
}
734-
if (params.wait_success_seconds) {
735-
cmd += "--check_launch_api "
736-
cmd += "--wait_success ${params.wait_success_seconds} "
737-
}
738-
cmd += "--image "
739-
cmd += imageKeyToTag.values().join(" ")
740-
withCredentials([usernamePassword(credentialsId: "NSPECT_CLIENT-${nspect_env}", usernameVariable: 'NSPECT_CLIENT_ID', passwordVariable: 'NSPECT_CLIENT_SECRET')]) {
741-
trtllm_utils.llmExecStepWithRetry(this, script: cmd, sleepInSecs: 600, numRetries: 6, shortCommondRunTimeMax: 7200)
718+
}
719+
withCredentials([string(credentialsId: "TRTLLM_NSPECT_REPO", variable: "NSPECT_REPO")]) {
720+
trtllm_utils.checkoutSource("${NSPECT_REPO}", nspect_commit, "nspect")
721+
}
722+
def nspect_env = params.nspect_env ? params.nspect_env : "prod"
723+
def program_version_name = params.program_version_name ? params.program_version_name : "PostMerge"
724+
def cmd = """./nspect/nspect.py \
725+
--env ${nspect_env} \
726+
--nspect_id ${params.nspect_id} \
727+
--program_version_name '${program_version_name}' \
728+
"""
729+
if (params.register_images) {
730+
cmd += "--register "
731+
}
732+
if (params.osrb_ticket) {
733+
cmd += "--osrb_ticket ${params.osrb_ticket} "
734+
}
735+
if (params.wait_success_seconds) {
736+
cmd += "--check_launch_api "
737+
cmd += "--wait_success ${params.wait_success_seconds} "
738+
}
739+
cmd += "--image "
740+
cmd += imageKeyToTag.values().join(" ")
741+
withCredentials([usernamePassword(credentialsId: "NSPECT_CLIENT-${nspect_env}", usernameVariable: 'NSPECT_CLIENT_ID', passwordVariable: 'NSPECT_CLIENT_SECRET')]) {
742+
trtllm_utils.llmExecStepWithRetry(this, script: cmd, sleepInSecs: 600, numRetries: 0, shortCommondRunTimeMax: 7200)
743+
}
742744
}
743745
}
744746
}

jenkins/L0_MergeRequest.groovy

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ STAGE_CHOICE_NORMAL = "normal"
5252
STAGE_CHOICE_SKIP = "skip"
5353
STAGE_CHOICE_IGNORE = "ignore"
5454

55-
RELESE_CHECK_CHOICE = env.releaseCheckChoice ? env.releaseCheckChoice : STAGE_CHOICE_NORMAL
55+
RELEASE_CHECK_CHOICE = env.releaseCheckChoice ? env.releaseCheckChoice : STAGE_CHOICE_NORMAL
56+
BUILD_CHECK_CHOICE = env.buildCheckChoice ? env.buildCheckChoice : STAGE_CHOICE_NORMAL
5657
X86_TEST_CHOICE = env.x86TestChoice ? env.x86TestChoice : STAGE_CHOICE_NORMAL
5758
SBSA_TEST_CHOICE = env.SBSATestChoice ? env.SBSATestChoice : STAGE_CHOICE_NORMAL
5859

@@ -437,10 +438,14 @@ def launchReleaseCheck(pipeline, globalVars)
437438
sh "cd ${LLM_ROOT} && confidentiality-scan \$(find . -type f ${ignoreList.collect { "-not -path \"${it}\"" }.join(' ')}) 2>&1 | tee scan.log"
438439
def lastLine = sh(script: "tail -n 1 ${LLM_ROOT}/scan.log", returnStdout: true).trim()
439440
if (lastLine.toLowerCase().contains("error")) {
440-
error "Guardwords Scan Failed."
441+
error "GUARDWORDS_WARN: Guardwords Scan Failed."
441442
}
442-
} catch (Exception e) {
443+
} catch (InterruptedException e) {
443444
throw e
445+
} catch (Exception e) {
446+
catchError(buildResult: 'SUCCESS', stageResult: 'UNSTABLE') {
447+
error "Release Check failed (warn-only): ${e.getMessage()}"
448+
}
444449
} finally {
445450
trtllm_utils.uploadArtifacts("${LLM_ROOT}/scan.log", "${UPLOAD_PATH}/guardwords-scan-results/")
446451
echo "Guardwords Scan Results: https://urm.nvidia.com/artifactory/${UPLOAD_PATH}/guardwords-scan-results/scan.log"
@@ -488,7 +493,7 @@ def launchReleaseCheck(pipeline, globalVars)
488493
stageName = "Release-Check"
489494
trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(image, "package"), "trt-llm", {
490495
stage("[${stageName}] Run") {
491-
if (RELESE_CHECK_CHOICE == STAGE_CHOICE_SKIP) {
496+
if (RELEASE_CHECK_CHOICE == STAGE_CHOICE_SKIP) {
492497
echo "Release Check job is skipped due to Jenkins configuration"
493498
return
494499
}
@@ -498,7 +503,7 @@ def launchReleaseCheck(pipeline, globalVars)
498503
} catch (InterruptedException e) {
499504
throw e
500505
} catch (Exception e) {
501-
if (RELESE_CHECK_CHOICE == STAGE_CHOICE_IGNORE) {
506+
if (RELEASE_CHECK_CHOICE == STAGE_CHOICE_IGNORE) {
502507
catchError(
503508
buildResult: 'SUCCESS',
504509
stageResult: 'FAILURE') {
@@ -1275,19 +1280,33 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
12751280
script {
12761281
def testStageName = "[Build-Docker-Images] Remote Run"
12771282
stage(testStageName) {
1278-
def branch = env.gitlabBranch ? env.gitlabBranch : "main"
1279-
if (globalVars[GITHUB_PR_API_URL]) {
1280-
branch = "github-pr-" + globalVars[GITHUB_PR_API_URL].split('/').last()
1281-
}
1283+
try {
1284+
def branch = env.gitlabBranch ? env.gitlabBranch : "main"
1285+
if (globalVars[GITHUB_PR_API_URL]) {
1286+
branch = "github-pr-" + globalVars[GITHUB_PR_API_URL].split('/').last()
1287+
}
12821288

1283-
def additionalParameters = [
1284-
'branch': branch,
1285-
'action': "push",
1286-
'triggerType': env.JOB_NAME ==~ /.*PostMerge.*/ ? "post-merge" : "pre-merge",
1287-
'runSanityCheck': env.JOB_NAME ==~ /.*PostMerge.*/ ? true : false,
1288-
]
1289+
def additionalParameters = [
1290+
'branch': branch,
1291+
'action': "push",
1292+
'triggerType': env.JOB_NAME ==~ /.*PostMerge.*/ ? "post-merge" : "pre-merge",
1293+
'runSanityCheck': env.JOB_NAME ==~ /.*PostMerge.*/ ? true : false,
1294+
]
12891295

1290-
launchJob(pipeline, "/LLM/helpers/BuildDockerImages", false, enableFailFast, globalVars, "x86_64", additionalParameters)
1296+
launchJob(pipeline, "/LLM/helpers/BuildDockerImages", false, enableFailFast, globalVars, "x86_64", additionalParameters)
1297+
} catch (InterruptedException e) {
1298+
throw e
1299+
} catch (Exception e) {
1300+
if (BUILD_CHECK_CHOICE == STAGE_CHOICE_IGNORE) {
1301+
catchError(
1302+
buildResult: 'SUCCESS',
1303+
stageResult: 'FAILURE') {
1304+
error "Build-Docker-Images job failed but ignored due to Jenkins configuration"
1305+
}
1306+
} else {
1307+
throw e
1308+
}
1309+
}
12911310
}
12921311
}
12931312
}

0 commit comments

Comments
 (0)