@@ -16,13 +16,18 @@ name: Approve Test Queue
1616
1717on :
1818 schedule :
19- - cron : ' */5 * * * *' # Runs every 5 minutes
20- workflow_dispatch : # Allows manual triggering
19+ - cron : " */5 * * * *" # Runs every 5 minutes
20+ workflow_dispatch : # Allows manual triggering
2121
2222jobs :
2323 approve-queue :
2424 runs-on : ubuntu-latest
2525 environment : main
26+ if : github.repository == 'NVIDIA-NeMo/Automodel'
27+ strategy :
28+ matrix :
29+ branch : [main, others, workflow_dispatch]
30+ contributor_type : [internal, external]
2631 steps :
2732 - name : Checkout repository
2833 uses : actions/checkout@v6
@@ -37,22 +42,53 @@ jobs:
3742 python -m pip install --upgrade pip
3843 pip install requests
3944
45+ - name : Download SSO users list
46+ run : |
47+ gh release download v0.1.0 \
48+ --repo NVIDIA-GitHub-Management/github-audits \
49+ --pattern users_sso.json \
50+ --output users_sso.json || echo '{}' > users_sso.json
51+ env :
52+ GH_TOKEN : ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
53+
4054 - name : Approve waiting deployments
4155 env :
4256 GITHUB_TOKEN : ${{ secrets.PAT }}
4357 MAX_CONCURRENCY : ${{ vars.MAX_CONCURRENCY || 1 }}
58+ MAX_CONCURRENCY_EXTERNAL : ${{ vars.MAX_CONCURRENCY_EXTERNAL || 3 }}
59+ MAX_CONCURRENCY_WORKFLOW_DISPATCH : ${{ vars.MAX_CONCURRENCY || 1 }}
60+ CONTRIBUTOR_TYPE : ${{ matrix.contributor_type }}
61+ MATRIX_BRANCH : ${{ matrix.branch }}
62+ SSO_USERS_FILE : users_sso.json
63+ PYTHONUNBUFFERED : 1
64+ shell : python
4465 run : |
45- python - <<EOF
4666 import os
67+ import json
4768 import requests
69+ import re
4870 import time
4971
50-
5172 # GitHub API configuration
5273 GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
5374 REPO = os.environ["GITHUB_REPOSITORY"]
54- MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"])
55- API_BASE = f"https://api.github.com/repos/{REPO}"
75+ CONTRIBUTOR_TYPE = os.environ["CONTRIBUTOR_TYPE"]
76+ MATRIX_BRANCH = os.environ["MATRIX_BRANCH"]
77+ if MATRIX_BRANCH == "workflow_dispatch":
78+ MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY_WORKFLOW_DISPATCH"])
79+ API_BASE = f"https://api.github.com/repos/{REPO}"
80+ WORKFLOW_NAME = "CICD NeMo"
81+ else:
82+ if CONTRIBUTOR_TYPE == "external":
83+ MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY_EXTERNAL"])
84+ else:
85+ MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"])
86+ API_BASE = "https://api.github.com/repos/NVIDIA-NeMo/Automodel"
87+ WORKFLOW_NAME = "CICD NeMo"
88+
89+ # Load SSO users for internal/external classification
90+ with open(os.environ["SSO_USERS_FILE"]) as f:
91+ sso_users = json.load(f)
5692
5793 # Headers for GitHub API
5894 headers = {
@@ -94,7 +130,91 @@ jobs:
94130 print(f"Max retries ({max_retries}) exceeded for {endpoint}")
95131 return None
96132
133+ def is_internal_contributor(pr_info):
134+ """Return True if the PR author is a member of NVIDIA or NVIDIA-NeMo org (is_org_member)."""
135+ login = pr_info.get("user", {}).get("login", "")
136+ org_roles = sso_users.get(login, {}).get("org_roles", [])
137+ return any(role in ("NVIDIA:Member", "NVIDIA-NeMo:Member") for role in org_roles)
138+
139+ def get_pr_base_branch(workflow_run):
140+ """
141+ Return the base branch of the PR associated with a workflow run, or None.
142+ Extracts PR number from head branch like 'pull-request/1913' and fetches PR info.
143+ Returns (base_branch, pr_info) tuple, or (None, None) if not a PR run.
144+ """
145+ print(workflow_run.get("head_branch", ""))
146+ head_branch = workflow_run.get("head_branch", "")
147+ match = re.match(r"pull-request/(\d+)", head_branch)
148+ if not match:
149+ return None, None # Not a PR branch pattern
150+
151+ pr_number = int(match.group(1))
152+
153+ # Fetch PR info from GitHub API
154+ pr_info = make_request(f"pulls/{pr_number}")
155+ if not pr_info:
156+ print(f"Failed to fetch PR #{pr_number}")
157+ return None, None
158+
159+ base_branch = pr_info.get("base", {}).get("ref")
160+ return base_branch, pr_info
161+
162+ def is_internal_actor(workflow_run):
163+ """Return True if the actor who triggered the workflow run is an NVIDIA/NVIDIA-NeMo member."""
164+ login = (workflow_run.get("triggering_actor") or workflow_run.get("actor") or {}).get("login", "")
165+ org_roles = sso_users.get(login, {}).get("org_roles", [])
166+ return any(role in ("NVIDIA:Member", "NVIDIA-NeMo:Member") for role in org_roles)
167+
168+ def is_pr_run(workflow_run):
169+ """Return True if this run was triggered by a PR (head_branch matches pull-request/<number>)."""
170+ return bool(re.match(r"pull-request/\d+", workflow_run.get("head_branch", "")))
171+
172+ def is_workflow_dispatch_run(workflow_run):
173+ """Return True if this run was manually triggered (head_branch starts with mcore-testing-)."""
174+ return workflow_run.get("head_branch", "").startswith("mcore-testing-")
175+
176+ def matches_queue(workflow_run, target_branch, contributor_type):
177+ """
178+ Return True if the workflow run belongs to this queue cell:
179+ matching target branch AND matching contributor type (internal/external).
180+
181+ workflow_dispatch runs (head_branch: mcore-testing-*) are routed to the 'workflow_dispatch' queue only.
182+ PR runs (head_branch: pull-request/<n>) are routed to 'main' or 'others' queues only.
183+ """
184+ if target_branch == "workflow_dispatch":
185+ if not is_workflow_dispatch_run(workflow_run):
186+ return False
187+ internal = is_internal_actor(workflow_run)
188+ contributor_match = (contributor_type == "internal") == internal
189+ if contributor_match:
190+ actor = (workflow_run.get("triggering_actor") or workflow_run.get("actor") or {}).get("login", "unknown")
191+ print(f"workflow_dispatch run by {actor}, contributor_type={contributor_type} (internal={internal})")
192+ return contributor_match
193+
194+ # PR queue: skip non-PR runs
195+ if not is_pr_run(workflow_run):
196+ return False
197+
198+ base_branch, pr_info = get_pr_base_branch(workflow_run)
199+ if base_branch is None:
200+ return False
201+
202+ branch_match = (
203+ (base_branch == target_branch) or
204+ (base_branch != "main" and base_branch != "dev" and target_branch == "others")
205+ )
206+ if not branch_match:
207+ return False
208+
209+ pr_number = re.match(r"pull-request/(\d+)", workflow_run.get("head_branch", "")).group(1)
210+ internal = is_internal_contributor(pr_info)
211+ contributor_match = (contributor_type == "internal") == internal
212+ if branch_match and contributor_match:
213+ print(f"PR #{pr_number} targets {target_branch}, contributor_type={contributor_type} (internal={internal})")
214+ return branch_match and contributor_match
215+
97216 # Get current running and queued workflows
217+ print(f"\n=== Queue cell: branch=${{ matrix.branch }}, contributor_type={CONTRIBUTOR_TYPE} ===")
98218 print("Fetching workflow runs...")
99219 queued_resp = make_request("actions/runs?status=queued")
100220 if queued_resp is None:
@@ -107,13 +227,25 @@ jobs:
107227 exit(1)
108228 in_progress_workflow_runs = in_progress_resp.get("workflow_runs", [])
109229
230+ def log_and_filter(runs, label):
231+ cicd_runs = [r for r in runs if r["name"] == WORKFLOW_NAME]
232+ print(f"{label}: {len(runs)} total, {len(cicd_runs)} {WORKFLOW_NAME}")
233+ for r in cicd_runs:
234+ actor = (r.get("triggering_actor") or r.get("actor") or {}).get("login", "unknown")
235+ matched = matches_queue(r, "${{ matrix.branch }}", CONTRIBUTOR_TYPE)
236+ print(f" run={r['id']} head_branch={r.get('head_branch')} event={r.get('event')} actor={actor} -> matched={matched}")
237+ return [r for r in cicd_runs if matches_queue(r, "${{ matrix.branch }}", CONTRIBUTOR_TYPE)]
238+
239+ queued_workflow_runs = log_and_filter(queued_workflow_runs, "queued")
240+ in_progress_workflow_runs = log_and_filter(in_progress_workflow_runs, "in_progress")
241+
110242 # Count running and queued workflows
111- queued_workflows = sum(1 for run in queued_workflow_runs if run["name"] == "CICD NeMo" )
112- in_progress_workflows = sum(1 for run in in_progress_workflow_runs if run["name"] == "CICD NeMo" )
243+ queued_workflows = len( queued_workflow_runs)
244+ in_progress_workflows = len( in_progress_workflow_runs)
113245
114246 total_workflows = queued_workflows + in_progress_workflows
115- print(f"Current queued workflows: {queued_workflows}")
116- print(f"Current running workflows: {in_progress_workflows}")
247+ print(f"Current queued workflows (PRs targeting ${{ matrix.branch }}, {CONTRIBUTOR_TYPE}) : {queued_workflows}")
248+ print(f"Current running workflows (PRs targeting ${{ matrix.branch }}, {CONTRIBUTOR_TYPE}) : {in_progress_workflows}")
117249 print(f"Total workflows: {total_workflows}")
118250 print(f"Max concurrency: {MAX_CONCURRENCY}")
119251
@@ -122,20 +254,19 @@ jobs:
122254 exit(0)
123255
124256 # Get waiting CI workflows for test environment
125- print("Fetching deployments...")
257+ print("Fetching waiting deployments...")
126258 waiting_resp = make_request("actions/runs?status=waiting")
127259 if waiting_resp is None:
128260 print("Failed to fetch waiting workflow runs after retries, exiting")
129261 exit(1)
130- pending_workflows = waiting_resp.get("workflow_runs", [])
131- pending_workflows = [run for run in pending_workflows if run["name"] == "CICD NeMo"]
262+ pending_workflows = log_and_filter(waiting_resp.get("workflow_runs", []), "waiting")
132263
133264 # Sort deployments by creation date (oldest first)
134265 print("Sorting workflows...")
135266 pending_workflows = sorted(pending_workflows, key=lambda x: x["created_at"])
136267
137268 # Process each deployment
138- print("Processing ...")
269+ print(f "Processing {len(pending_workflows)} pending workflows ...")
139270 for workflow in pending_workflows:
140271 if total_workflows >= MAX_CONCURRENCY:
141272 print("Maximum concurrency reached, stopping approvals")
@@ -166,8 +297,6 @@ jobs:
166297 else:
167298 print(f"Failed to approve deployment {deployment['id']}")
168299 exit(1)
169-
170- EOF
171300 notify :
172301 if : failure()
173302 runs-on : ubuntu-latest
0 commit comments