55from typing import List , Tuple , Optional
66import requests
77
8- from bugzooka .core .constants import MAX_CONTEXT_SIZE
8+ from bugzooka .core .constants import GCSWEB_BASE_URL , MAX_CONTEXT_SIZE
99from bugzooka .analysis .prompts import ERROR_SUMMARIZATION_PROMPT
1010from bugzooka .analysis .failure_keywords import FAILURE_KEYWORDS
1111from bugzooka .core .utils import (
1212 download_file_from_gcs ,
13+ extract_gcs_path ,
1314 filter_most_frequent_errors ,
15+ gcs_basename ,
1416 list_gcs_files ,
1517 run_shell_command ,
18+ strip_step_prefixes ,
1619)
1720
1821logger = logging .getLogger (__name__ )
@@ -56,9 +59,9 @@ def get_prow_inner_artifact_files(gcs_path):
5659 # Identify nested log folder (match last segment with gcs_path)
5760 log_folder = next (
5861 (
59- f . strip ( "/" ). split ( "/" )[ - 1 ]
62+ gcs_basename ( f )
6063 for f in top_files
61- if f . strip ( "/" ). split ( "/" )[ - 1 ] in gcs_path
64+ if gcs_basename ( f ) in gcs_path
6265 ),
6366 None ,
6467 )
@@ -71,34 +74,78 @@ def get_prow_inner_artifact_files(gcs_path):
7174 return log_folder_path , inner_files
7275
7376
74- def download_prow_orion_jsons (gcs_path , output_dir ):
77+ def list_orion_step_dirs (gcs_path , exclude_report = False ):
7578 """
76- Downloads all orion jsons to the output directory .
79+ Discover orion step directories under a Prow job's artifacts .
7780
78- :param gcs_path: path in gcs storage
81+ :param gcs_path: raw GCS path (no gs:// prefix)
82+ :param exclude_report: if True, skip folders containing 'orion-report'
83+ :return: list of (folder_name, step_artifacts_gcs_path) tuples
84+ """
85+ log_folder_path , inner_files = get_prow_inner_artifact_files (gcs_path )
86+ if not log_folder_path :
87+ return []
88+
89+ results = []
90+ for f in inner_files :
91+ if not f .rstrip ().endswith ("/" ):
92+ continue
93+ folder = gcs_basename (f )
94+ if "orion" not in folder :
95+ continue
96+ if exclude_report and "orion-report" in folder :
97+ continue
98+ results .append ((folder , f"{ log_folder_path } { folder } /artifacts/" ))
99+ return results
100+
101+
102+ def download_prow_orion_jsons (step_dirs , output_dir ):
103+ """
104+ Downloads orion jsons from pre-discovered step directories.
105+
106+ Creates a subdirectory per step using strip_step_prefixes so that
107+ scan_orion_jsons can identify which workload each JSON belongs to
108+ using the same names as the visualization URLs.
109+
110+ :param step_dirs: list of (folder, artifacts_gcs_path) tuples
79111 :param output_dir: output directory to store artifacts
80112 :return: None
81113 """
82114 try :
83- log_folder_path , inner_files = get_prow_inner_artifact_files (gcs_path )
84- if not log_folder_path :
85- return
115+ for folder , step_artifacts in step_dirs :
116+ step_name = strip_step_prefixes (folder )
117+ step_dir = os .path .join (output_dir , step_name )
118+ os .makedirs (step_dir , exist_ok = True )
119+ files = list_gcs_files (step_artifacts )
120+ for f in files :
121+ basename = gcs_basename (f )
122+ if basename .endswith (".json" ) and basename != "prowjob.json" :
123+ download_file_from_gcs (f , step_dir )
86124
87- orion_folders = [
88- f .strip ("/" ).split ("/" )[- 1 ] for f in inner_files if "orion" in f
89- ]
125+ except subprocess .CalledProcessError as e :
126+ logger .error ("Error processing Orion JSONs: %s" , e .stderr )
90127
91- orion_jsons = []
92- for folder in orion_folders :
93- json_path = f"{ log_folder_path } { folder } /artifacts/"
94- json_files = list_gcs_files (json_path )
95- orion_jsons .extend (f for f in json_files if f .endswith (".json" ))
96128
97- for json_url in orion_jsons :
98- download_file_from_gcs (json_url , output_dir )
129+ def download_prow_orion_report_summary (step_dirs , output_dir ):
130+ """
131+ Download orion-report-summary.txt from the report step's artifacts.
99132
133+ :param step_dirs: list of (folder, artifacts_gcs_path) tuples
134+ (should include the report step)
135+ :param output_dir: output directory to store the summary file
136+ :return: None
137+ """
138+ try :
139+ for folder , step_artifacts in step_dirs :
140+ if "orion-report" not in folder :
141+ continue
142+ files = list_gcs_files (step_artifacts )
143+ for f in files :
144+ if gcs_basename (f ) == "orion-report-summary.txt" :
145+ download_file_from_gcs (f , output_dir )
146+ return
100147 except subprocess .CalledProcessError as e :
101- logger .error ("Error processing Orion JSONs : %s" , e .stderr )
148+ logger .error ("Error downloading orion report summary : %s" , e .stderr )
102149
103150
104151def download_prow_cluster_operators (gcs_path , output_dir ):
@@ -137,7 +184,7 @@ def download_prow_logs(url, output_dir="/tmp/"):
137184 if "view/gs/" not in url :
138185 raise ValueError ("Invalid Prow URL: GCS path not found." )
139186
140- gcs_path = url . split ( "view/gs/" )[ 1 ]
187+ gcs_path = extract_gcs_path ( url )
141188
142189 log_dir = os .path .join (output_dir , build_id )
143190 orion_dir = os .path .join (log_dir , "orion" )
@@ -147,51 +194,92 @@ def download_prow_logs(url, output_dir="/tmp/"):
147194 download_prow_build_log (gcs_path , log_dir )
148195 download_prow_junit_operator_xml (gcs_path , log_dir )
149196 download_prow_cluster_operators (gcs_path , log_dir )
150- download_prow_orion_jsons (gcs_path , orion_dir )
197+
198+ # Discover orion step dirs once to avoid redundant GCS listings
199+ all_step_dirs = list_orion_step_dirs (gcs_path )
200+ individual_dirs = [(f , p ) for f , p in all_step_dirs if "orion-report" not in f ]
201+ download_prow_orion_jsons (individual_dirs , orion_dir )
202+ download_prow_orion_report_summary (all_step_dirs , log_dir )
151203
152204 return log_dir
153205
154206
155207def construct_visualization_url (view_url , step_name ):
156208 """
157- Build a gcsweb URL pointing to the step's artifacts directory.
209+ Build gcsweb URL(s) pointing to visualization artifacts.
210+
211+ For deferred report steps (step_name contains 'orion-report'),
212+ returns a dict mapping test names to their viz URLs by scanning
213+ the individual orion step directories.
214+ For regular orion steps, returns a single URL string.
158215
159216 :param view_url: prow view URL
160217 :param step_name: raw step name from junit_operator.xml
161- :return: gcsweb URL string, or None if the log folder cannot be resolved
218+ :return: str, dict[str, str], or None
219+ """
220+ if step_name and "orion-report" in step_name :
221+ return _construct_deferred_viz_urls (view_url )
222+ return _construct_single_viz_url (view_url , step_name )
223+
224+
225+ def _construct_deferred_viz_urls (view_url ):
226+ """
227+ For the deferred orion-report step, find viz HTML files in each
228+ individual orion step's artifacts directory and return a dict of
229+ {test_name: url}.
230+ """
231+ try :
232+ gcs_path = extract_gcs_path (view_url )
233+ viz_urls = {}
234+ for folder , step_artifacts in list_orion_step_dirs (gcs_path , exclude_report = True ):
235+ try :
236+ files = list_gcs_files (step_artifacts )
237+ except Exception :
238+ continue
239+ html_files = [f for f in files if f .endswith (".html" )]
240+ if html_files :
241+ html_name = gcs_basename (html_files [0 ])
242+ test_name = strip_step_prefixes (folder )
243+ artifacts_url = f"{ GCSWEB_BASE_URL } { step_artifacts .replace ('gs://' , '' )} "
244+ viz_urls [test_name ] = f"{ artifacts_url } { html_name } "
245+
246+ return viz_urls if viz_urls else None
247+ except Exception as e :
248+ logger .error ("Failed to construct deferred viz URLs: %s" , e )
249+ return None
250+
251+
252+ def _construct_single_viz_url (view_url , step_name ):
253+ """
254+ Build a gcsweb URL pointing to a single step's viz HTML.
162255 """
163256 try :
164- gcs_path = view_url .split ("view/gs/" )[1 ]
165- base = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/"
257+ gcs_path = extract_gcs_path (view_url )
166258 artifact_root = f"gs://{ gcs_path } /artifacts/"
167259 top_folders = list_gcs_files (artifact_root )
168260
169- # Find the folder that actually contains the step as a subfolder.
170- # The junit step_name often includes the log_folder as a prefix
171- # (e.g. "payload-control-plane-6nodes-openshift-qe-orion-udn-density")
172- # while the GCS folder is just "openshift-qe-orion-udn-density".
173261 for entry in top_folders :
174262 if not entry .rstrip ().endswith ("/" ):
175263 continue
176- folder = entry .strip ("/" ).split ("/" )[- 1 ]
177- # Try with prefix stripped first, then the raw step_name
264+ folder = gcs_basename (entry )
178265 candidates = [step_name ]
179266 prefix = folder + "-"
180267 if step_name .startswith (prefix ):
181- candidates .insert (0 , step_name [len (prefix ) :])
268+ candidates .insert (0 , step_name [len (prefix ):])
182269 for candidate in candidates :
183270 step_artifacts = f"{ artifact_root } { folder } /{ candidate } /artifacts/"
184271 try :
185272 files = list_gcs_files (step_artifacts )
186273 except Exception :
187274 continue
188- artifacts_path = f"{ gcs_path } /artifacts/{ folder } /{ candidate } /artifacts/"
275+ artifacts_url = (
276+ f"{ GCSWEB_BASE_URL } { gcs_path } /artifacts/"
277+ f"{ folder } /{ candidate } /artifacts/"
278+ )
189279 html_files = [f for f in files if f .endswith (".html" )]
190280 if html_files :
191- html_name = html_files [0 ].strip ("/" ).split ("/" )[- 1 ]
192- return f"{ base } { artifacts_path } { html_name } "
193- return f"{ base } { artifacts_path } "
194-
281+ return f"{ artifacts_url } { gcs_basename (html_files [0 ])} "
282+ return artifacts_url
195283 return None
196284 except Exception as e :
197285 logger .error ("Failed to construct visualization URL: %s" , e )
0 commit comments