Skip to content

Commit f4633fc

Browse files
authored
Merge pull request #103 from mohit-sheth/deferred-orion-report-support
add deferred orion-report step support for changepoint detection
2 parents 713e099 + 850605c commit f4633fc

7 files changed

Lines changed: 289 additions & 128 deletions

File tree

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ BugZooka/
387387
│ └── analysis/ # Log analysis and processing
388388
│ ├── __init__.py
389389
│ ├── failure_keywords.py # Failure pattern detection
390-
│ ├── jsonparser.py # JSON parsing utilities
391390
│ ├── log_analyzer.py # Main log analysis orchestration
392391
│ ├── log_summarizer.py # Log summarization functionality
393392
│ ├── pr_analyzer.py # PR performance analysis with Gemini+MCP

bugzooka/analysis/jsonparser.py

Lines changed: 0 additions & 61 deletions
This file was deleted.

bugzooka/analysis/log_summarizer.py

Lines changed: 126 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,17 @@
55
from typing import List, Tuple, Optional
66
import requests
77

8-
from bugzooka.core.constants import MAX_CONTEXT_SIZE
8+
from bugzooka.core.constants import GCSWEB_BASE_URL, MAX_CONTEXT_SIZE
99
from bugzooka.analysis.prompts import ERROR_SUMMARIZATION_PROMPT
1010
from bugzooka.analysis.failure_keywords import FAILURE_KEYWORDS
1111
from bugzooka.core.utils import (
1212
download_file_from_gcs,
13+
extract_gcs_path,
1314
filter_most_frequent_errors,
15+
gcs_basename,
1416
list_gcs_files,
1517
run_shell_command,
18+
strip_step_prefixes,
1619
)
1720

1821
logger = logging.getLogger(__name__)
@@ -56,9 +59,9 @@ def get_prow_inner_artifact_files(gcs_path):
5659
# Identify nested log folder (match last segment with gcs_path)
5760
log_folder = next(
5861
(
59-
f.strip("/").split("/")[-1]
62+
gcs_basename(f)
6063
for f in top_files
61-
if f.strip("/").split("/")[-1] in gcs_path
64+
if gcs_basename(f) in gcs_path
6265
),
6366
None,
6467
)
@@ -71,34 +74,78 @@ def get_prow_inner_artifact_files(gcs_path):
7174
return log_folder_path, inner_files
7275

7376

74-
def download_prow_orion_jsons(gcs_path, output_dir):
77+
def list_orion_step_dirs(gcs_path, exclude_report=False):
7578
"""
76-
Downloads all orion jsons to the output directory.
79+
Discover orion step directories under a Prow job's artifacts.
7780
78-
:param gcs_path: path in gcs storage
81+
:param gcs_path: raw GCS path (no gs:// prefix)
82+
:param exclude_report: if True, skip folders containing 'orion-report'
83+
:return: list of (folder_name, step_artifacts_gcs_path) tuples
84+
"""
85+
log_folder_path, inner_files = get_prow_inner_artifact_files(gcs_path)
86+
if not log_folder_path:
87+
return []
88+
89+
results = []
90+
for f in inner_files:
91+
if not f.rstrip().endswith("/"):
92+
continue
93+
folder = gcs_basename(f)
94+
if "orion" not in folder:
95+
continue
96+
if exclude_report and "orion-report" in folder:
97+
continue
98+
results.append((folder, f"{log_folder_path}{folder}/artifacts/"))
99+
return results
100+
101+
102+
def download_prow_orion_jsons(step_dirs, output_dir):
103+
"""
104+
Downloads orion jsons from pre-discovered step directories.
105+
106+
Creates a subdirectory per step using strip_step_prefixes so that
107+
scan_orion_jsons can identify which workload each JSON belongs to
108+
using the same names as the visualization URLs.
109+
110+
:param step_dirs: list of (folder, artifacts_gcs_path) tuples
79111
:param output_dir: output directory to store artifacts
80112
:return: None
81113
"""
82114
try:
83-
log_folder_path, inner_files = get_prow_inner_artifact_files(gcs_path)
84-
if not log_folder_path:
85-
return
115+
for folder, step_artifacts in step_dirs:
116+
step_name = strip_step_prefixes(folder)
117+
step_dir = os.path.join(output_dir, step_name)
118+
os.makedirs(step_dir, exist_ok=True)
119+
files = list_gcs_files(step_artifacts)
120+
for f in files:
121+
basename = gcs_basename(f)
122+
if basename.endswith(".json") and basename != "prowjob.json":
123+
download_file_from_gcs(f, step_dir)
86124

87-
orion_folders = [
88-
f.strip("/").split("/")[-1] for f in inner_files if "orion" in f
89-
]
125+
except subprocess.CalledProcessError as e:
126+
logger.error("Error processing Orion JSONs: %s", e.stderr)
90127

91-
orion_jsons = []
92-
for folder in orion_folders:
93-
json_path = f"{log_folder_path}{folder}/artifacts/"
94-
json_files = list_gcs_files(json_path)
95-
orion_jsons.extend(f for f in json_files if f.endswith(".json"))
96128

97-
for json_url in orion_jsons:
98-
download_file_from_gcs(json_url, output_dir)
129+
def download_prow_orion_report_summary(step_dirs, output_dir):
130+
"""
131+
Download orion-report-summary.txt from the report step's artifacts.
99132
133+
:param step_dirs: list of (folder, artifacts_gcs_path) tuples
134+
(should include the report step)
135+
:param output_dir: output directory to store the summary file
136+
:return: None
137+
"""
138+
try:
139+
for folder, step_artifacts in step_dirs:
140+
if "orion-report" not in folder:
141+
continue
142+
files = list_gcs_files(step_artifacts)
143+
for f in files:
144+
if gcs_basename(f) == "orion-report-summary.txt":
145+
download_file_from_gcs(f, output_dir)
146+
return
100147
except subprocess.CalledProcessError as e:
101-
logger.error("Error processing Orion JSONs: %s", e.stderr)
148+
logger.error("Error downloading orion report summary: %s", e.stderr)
102149

103150

104151
def download_prow_cluster_operators(gcs_path, output_dir):
@@ -137,7 +184,7 @@ def download_prow_logs(url, output_dir="/tmp/"):
137184
if "view/gs/" not in url:
138185
raise ValueError("Invalid Prow URL: GCS path not found.")
139186

140-
gcs_path = url.split("view/gs/")[1]
187+
gcs_path = extract_gcs_path(url)
141188

142189
log_dir = os.path.join(output_dir, build_id)
143190
orion_dir = os.path.join(log_dir, "orion")
@@ -147,51 +194,92 @@ def download_prow_logs(url, output_dir="/tmp/"):
147194
download_prow_build_log(gcs_path, log_dir)
148195
download_prow_junit_operator_xml(gcs_path, log_dir)
149196
download_prow_cluster_operators(gcs_path, log_dir)
150-
download_prow_orion_jsons(gcs_path, orion_dir)
197+
198+
# Discover orion step dirs once to avoid redundant GCS listings
199+
all_step_dirs = list_orion_step_dirs(gcs_path)
200+
individual_dirs = [(f, p) for f, p in all_step_dirs if "orion-report" not in f]
201+
download_prow_orion_jsons(individual_dirs, orion_dir)
202+
download_prow_orion_report_summary(all_step_dirs, log_dir)
151203

152204
return log_dir
153205

154206

155207
def construct_visualization_url(view_url, step_name):
156208
"""
157-
Build a gcsweb URL pointing to the step's artifacts directory.
209+
Build gcsweb URL(s) pointing to visualization artifacts.
210+
211+
For deferred report steps (step_name contains 'orion-report'),
212+
returns a dict mapping test names to their viz URLs by scanning
213+
the individual orion step directories.
214+
For regular orion steps, returns a single URL string.
158215
159216
:param view_url: prow view URL
160217
:param step_name: raw step name from junit_operator.xml
161-
:return: gcsweb URL string, or None if the log folder cannot be resolved
218+
:return: str, dict[str, str], or None
219+
"""
220+
if step_name and "orion-report" in step_name:
221+
return _construct_deferred_viz_urls(view_url)
222+
return _construct_single_viz_url(view_url, step_name)
223+
224+
225+
def _construct_deferred_viz_urls(view_url):
226+
"""
227+
For the deferred orion-report step, find viz HTML files in each
228+
individual orion step's artifacts directory and return a dict of
229+
{test_name: url}.
230+
"""
231+
try:
232+
gcs_path = extract_gcs_path(view_url)
233+
viz_urls = {}
234+
for folder, step_artifacts in list_orion_step_dirs(gcs_path, exclude_report=True):
235+
try:
236+
files = list_gcs_files(step_artifacts)
237+
except Exception:
238+
continue
239+
html_files = [f for f in files if f.endswith(".html")]
240+
if html_files:
241+
html_name = gcs_basename(html_files[0])
242+
test_name = strip_step_prefixes(folder)
243+
artifacts_url = f"{GCSWEB_BASE_URL}{step_artifacts.replace('gs://', '')}"
244+
viz_urls[test_name] = f"{artifacts_url}{html_name}"
245+
246+
return viz_urls if viz_urls else None
247+
except Exception as e:
248+
logger.error("Failed to construct deferred viz URLs: %s", e)
249+
return None
250+
251+
252+
def _construct_single_viz_url(view_url, step_name):
253+
"""
254+
Build a gcsweb URL pointing to a single step's viz HTML.
162255
"""
163256
try:
164-
gcs_path = view_url.split("view/gs/")[1]
165-
base = "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/"
257+
gcs_path = extract_gcs_path(view_url)
166258
artifact_root = f"gs://{gcs_path}/artifacts/"
167259
top_folders = list_gcs_files(artifact_root)
168260

169-
# Find the folder that actually contains the step as a subfolder.
170-
# The junit step_name often includes the log_folder as a prefix
171-
# (e.g. "payload-control-plane-6nodes-openshift-qe-orion-udn-density")
172-
# while the GCS folder is just "openshift-qe-orion-udn-density".
173261
for entry in top_folders:
174262
if not entry.rstrip().endswith("/"):
175263
continue
176-
folder = entry.strip("/").split("/")[-1]
177-
# Try with prefix stripped first, then the raw step_name
264+
folder = gcs_basename(entry)
178265
candidates = [step_name]
179266
prefix = folder + "-"
180267
if step_name.startswith(prefix):
181-
candidates.insert(0, step_name[len(prefix) :])
268+
candidates.insert(0, step_name[len(prefix):])
182269
for candidate in candidates:
183270
step_artifacts = f"{artifact_root}{folder}/{candidate}/artifacts/"
184271
try:
185272
files = list_gcs_files(step_artifacts)
186273
except Exception:
187274
continue
188-
artifacts_path = f"{gcs_path}/artifacts/{folder}/{candidate}/artifacts/"
275+
artifacts_url = (
276+
f"{GCSWEB_BASE_URL}{gcs_path}/artifacts/"
277+
f"{folder}/{candidate}/artifacts/"
278+
)
189279
html_files = [f for f in files if f.endswith(".html")]
190280
if html_files:
191-
html_name = html_files[0].strip("/").split("/")[-1]
192-
return f"{base}{artifacts_path}{html_name}"
193-
return f"{base}{artifacts_path}"
194-
281+
return f"{artifacts_url}{gcs_basename(html_files[0])}"
282+
return artifacts_url
195283
return None
196284
except Exception as e:
197285
logger.error("Failed to construct visualization URL: %s", e)

0 commit comments

Comments
 (0)