Skip to content

Commit 970a8a4

Browse files
committed
[INFRA] Reconcile primary JIRA components with the PR title in merge_spark_pr.py
When merging a PR, merge_spark_pr.py now compares the primary component tags in the normalized PR title against the primary components on the linked JIRA ticket. On a mismatch it prompts the committer to overwrite the JIRA's primary components with the PR title's, append them, or keep JIRA unchanged (the default). Non-primary tags such as [TEST] and non-primary JIRA components such as "Optimizer" are ignored by the comparison and preserved by both updates, so a common title like [SQL][TEST] no longer prompts against a SQL-only ticket. The JIRA summary printed during a merge now also lists the ticket's components. Generated-by: Claude Code (Opus 4.8)
1 parent 90f6bab commit 970a8a4

1 file changed

Lines changed: 129 additions & 6 deletions

File tree

dev/merge_spark_pr.py

Lines changed: 129 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -652,13 +652,107 @@ def print_jira_issue_summary(issue):
652652
assignee = assignee.displayName
653653
assignee = "Assignee\t%s\n" % assignee
654654
status = "Status\t\t%s\n" % issue.fields.status.name
655+
components = "Components\t%s\n" % [x.name for x in issue.fields.components]
655656
url = "Url\t\t%s/%s\n" % (JIRA_BASE, issue.key)
656657
target_versions = "Affected\t%s\n" % [x.name for x in issue.fields.versions]
657658
fix_versions = ""
658659
if len(issue.fields.fixVersions) > 0:
659660
fix_versions = "Fixed\t\t%s\n" % [x.name for x in issue.fields.fixVersions]
660661
print("=== JIRA %s ===" % issue.key)
661-
print("%s%s%s%s%s%s" % (summary, assignee, status, url, target_versions, fix_versions))
662+
print(
663+
"%s%s%s%s%s%s%s"
664+
% (summary, assignee, status, components, url, target_versions, fix_versions)
665+
)
666+
667+
668+
def jira_components_from_title_tags(tags, primary_only=False):
669+
"""Canonical SPARK JIRA component names implied by PR-title component tags.
670+
671+
Each tag is resolved through the component registry; tags that are not JIRA
672+
components (status markers like [FOLLOWUP]/[MINOR], version tags like [4.X],
673+
or unknown tags) contribute nothing. Aliases normalize to the canonical JIRA
674+
name. With ``primary_only`` set, non-primary components (e.g. [TEST],
675+
[SHUFFLE]) are dropped too, leaving only primary tags. The result preserves
676+
input order and is de-duplicated.
677+
678+
>>> jira_components_from_title_tags(["SQL", "CORE"])
679+
['SQL', 'Spark Core']
680+
>>> jira_components_from_title_tags(["PYSPARK", "DOCS"])
681+
['PySpark', 'Documentation']
682+
>>> jira_components_from_title_tags(["SQL", "FOLLOWUP", "4.X", "BOGUS"])
683+
['SQL']
684+
>>> jira_components_from_title_tags(["SQL", "SQL"])
685+
['SQL']
686+
>>> jira_components_from_title_tags(["SQL", "TEST"], primary_only=True)
687+
['SQL']
688+
>>> jira_components_from_title_tags(["TEST", "SHUFFLE"], primary_only=True)
689+
[]
690+
"""
691+
names = []
692+
for tag in tags:
693+
c = Component.find(tag)
694+
if c is not None and c.jira_name and (c.primary or not primary_only):
695+
names.append(c.jira_name)
696+
return list(dict.fromkeys(names))
697+
698+
699+
def reconcile_jira_components(issue, title_components):
700+
"""Prompt to sync primary JIRA components when they differ from the PR title.
701+
702+
``title_components`` is the list of normalized PR-title component tags (e.g.
703+
["SQL", "TEST"]). Only primary components are reconciled: the PR title's
704+
primary tags, mapped to canonical JIRA names, are compared as a set against
705+
the issue's current primary components. Non-primary tags (e.g. [TEST]) and
706+
non-primary JIRA components (e.g. "Optimizer") are ignored by the comparison
707+
and preserved by both updates. When the primary sets differ, offer to
708+
overwrite JIRA's primary components with the PR title's, append the PR title's
709+
primary components, or keep JIRA unchanged (the default). Titles with no
710+
primary component (e.g. [MINOR]) are skipped.
711+
"""
712+
pr_primary = jira_components_from_title_tags(title_components, primary_only=True)
713+
if not pr_primary:
714+
return
715+
716+
current = [c.name for c in issue.fields.components]
717+
current_primary = []
718+
current_nonprimary = []
719+
for n in current:
720+
comp = Component.find_by_jira_name(n)
721+
if comp is not None and comp.primary:
722+
current_primary.append(n)
723+
else:
724+
current_nonprimary.append(n)
725+
726+
if set(current_primary) == set(pr_primary):
727+
return
728+
729+
print()
730+
print("=" * 80)
731+
print("PR title primary components differ from JIRA %s:" % issue.key)
732+
print(" PR title: %s" % ", ".join(pr_primary))
733+
print(" JIRA: %s" % (", ".join(current_primary) if current_primary else "(none)"))
734+
if current_nonprimary:
735+
print(" (non-primary JIRA components, preserved: %s)" % ", ".join(current_nonprimary))
736+
print("=" * 80)
737+
choice = get_input(
738+
"[o]verwrite JIRA primaries with PR title / [a]ppend PR title / [k]eep JIRA as is "
739+
"(default: keep): ",
740+
{"o": ["o", "overwrite"], "a": ["a", "append"], "k": ["k", "keep", ""]},
741+
)
742+
if choice == "k":
743+
print("Keeping JIRA %s components unchanged." % issue.key)
744+
return
745+
if choice == "o":
746+
# Replace the primary components; keep any non-primary ones already on the issue.
747+
new_names = list(dict.fromkeys(pr_primary + current_nonprimary))
748+
else: # "a": append the PR title's primary components, keeping everything else.
749+
new_names = list(dict.fromkeys(current + pr_primary))
750+
751+
try:
752+
issue.update(fields={"components": [{"name": n} for n in new_names]})
753+
print("Updated JIRA %s components to: %s" % (issue.key, ", ".join(new_names)))
754+
except Exception as e:
755+
print_error("Failed to update components on JIRA %s: %s" % (issue.key, e))
662756

663757

664758
def get_jira_issue(prompt, default_jira_id=""):
@@ -684,14 +778,16 @@ def get_jira_issue(prompt, default_jira_id=""):
684778
return get_jira_issue("Enter the revised JIRA ID again or leave blank to skip")
685779

686780

687-
def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
781+
def resolve_jira_issue(merge_branches, comment, default_jira_id="", title_components=()):
688782
issue = get_jira_issue("Enter a JIRA id", default_jira_id)
689783
if issue is None:
690784
return
691785

692786
if issue.fields.assignee is None:
693787
choose_jira_assignee(issue)
694788

789+
reconcile_jira_components(issue, title_components)
790+
695791
versions = asf_jira.project_versions("SPARK")
696792
# Consider only x.y.z, unreleased, unarchived versions
697793
versions = [
@@ -830,13 +926,13 @@ def assign_issue(issue: int, assignee: str) -> bool:
830926
return True
831927

832928

833-
def resolve_jira_issues(title, merge_branches, comment):
929+
def resolve_jira_issues(title, merge_branches, comment, title_components=()):
834930
jira_ids = re.findall("SPARK-[0-9]{4,5}", title)
835931

836932
if len(jira_ids) == 0:
837-
resolve_jira_issue(merge_branches, comment)
933+
resolve_jira_issue(merge_branches, comment, title_components=title_components)
838934
for jira_id in jira_ids:
839-
resolve_jira_issue(merge_branches, comment, jira_id)
935+
resolve_jira_issue(merge_branches, comment, jira_id, title_components=title_components)
840936

841937

842938
class Component:
@@ -879,6 +975,28 @@ def find(cls, token):
879975
return c
880976
return None
881977

978+
@classmethod
979+
def find_by_jira_name(cls, name):
980+
"""Return the Component whose canonical JIRA name is ``name``, or None.
981+
982+
>>> Component.find_by_jira_name("Spark Core").tag
983+
'CORE'
984+
>>> Component.find_by_jira_name("SQL").primary
985+
True
986+
>>> Component.find_by_jira_name("Tests").primary
987+
False
988+
>>> Component.find_by_jira_name("Not A Component") is None
989+
True
990+
>>> Component.find_by_jira_name("") is None
991+
True
992+
"""
993+
if not name:
994+
return None
995+
for c in COMPONENTS:
996+
if c.jira_name == name:
997+
return c
998+
return None
999+
8821000

8831001
# Full SPARK JIRA component list (sorted alphabetically by tag), followed
8841002
# by status markers. Keep in sync with the components in JIRA — fetch the
@@ -1248,6 +1366,10 @@ def main():
12481366
# e.g. 'Reapply "[SPARK-56357][BUILD] Upgrade sbt to 1.12.8"'
12491367
is_reapply_pr = title.startswith('Reapply "') and title.endswith('"')
12501368

1369+
# Normalized PR-title component tags, used later to reconcile JIRA components. Empty for
1370+
# Revert/Reapply PRs, whose titles are kept verbatim and not parsed for components.
1371+
title_components: List[str] = []
1372+
12511373
# Revert and Reapply PRs keep their title verbatim.
12521374
if not (is_revert_pr or is_reapply_pr):
12531375
# Parse; fail on a malformed title.
@@ -1290,6 +1412,7 @@ def main():
12901412
print_error("Title has unknown tag(s): %s" % ", ".join("[%s]" % t for t in unknown))
12911413

12921414
parsed.components = components
1415+
title_components = list(parsed.components)
12931416
title = str(parsed)
12941417
if title != pr["title"]:
12951418
print("Normalized title: %s" % title)
@@ -1451,7 +1574,7 @@ def main():
14511574
GITHUB_BASE,
14521575
pr_num,
14531576
)
1454-
resolve_jira_issues(title, merged_refs, jira_comment)
1577+
resolve_jira_issues(title, merged_refs, jira_comment, title_components)
14551578

14561579

14571580
if __name__ == "__main__":

0 commit comments

Comments
 (0)