google-cloud-java/.github/release-note-generation/generate_module_notes.py at b38cdb5e3e28c555cfdc0f425d969ce536bf9ca7 · googleapis/google-cloud-java · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
import argparse
import re
import subprocess
import sys


def run_cmd(cmd, cwd=None):
    """Runs a shell command and returns the output."""
    result = subprocess.run(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=cwd
    )
    if result.returncode != 0:
        print(f"Error running command: {' '.join(cmd)}", file=sys.stderr)
        print(result.stderr, file=sys.stderr)
        sys.exit(result.returncode)
    return result.stdout


def main():
    parser = argparse.ArgumentParser(
        description="Generate release notes based on commit history for a specific module."
    )
    parser.add_argument(
        "--module", required=True, help="Module name as specified in versions.txt"
    )
    parser.add_argument(
        "--directory", required=True, help="Path in the monorepo where the module has code"
    )
    parser.add_argument("--version", required=True, help="Target version")
    parser.add_argument(
        "--short-name", help="Module short-name used in commit overrides (e.g., aiplatform). Omit for repo-wide generation."
    )
    args = parser.parse_args()

    module = args.module
    directory = args.directory
    target_version = args.version

    # 1. Scan backwards through git history of versions.txt
    # We use -G to find commits that modified lines matching the module name.
    # We use --first-parent to ignore merge noise.
    log_cmd = [
        "git",
        "log",
        "--oneline",
        "--first-parent",
        f"-G^{module}:",
        "--",
        "versions.txt",
    ]
    log_output = run_cmd(log_cmd)

    commits = [line.split()[0] for line in log_output.splitlines() if line]

    target_commit = None
    prev_commit = None
    prev_version = None

    for commit in commits:
        # Get content of versions.txt at this commit
        show_cmd = ["git", "show", f"{commit}:versions.txt"]
        try:
            content = run_cmd(show_cmd)
        except SystemExit:
            continue  # Ignore errors if file couldn't be read

        # Find the line for the module
        pattern = re.compile(rf"^{module}:([^:]+):([^:]+)$")
        for line in content.splitlines():
            match = pattern.match(line)
            if match:
                released_ver = match.group(1)
                current_ver = match.group(2)

                # Condition for target version
                if released_ver == target_version and not target_commit:
                    target_commit = commit
                    print(f"Found target version {target_version} at {commit}")

                # Condition for previous non-snapshot version
                # We ignore snapshot versions by checking both fields.
                elif (
                    target_commit
                    and released_ver != target_version
                    and "-SNAPSHOT" not in released_ver
                    and "-SNAPSHOT" not in current_ver
                ):
                    prev_commit = commit
                    prev_version = released_ver
                    print(f"Found previous version {released_ver} at {commit}")
                    break
        if prev_commit:
            break

    if not target_commit:
        print(
            f"Target version {target_version} not found in history for module {module}."
        )
        sys.exit(1)

    # Fallback for initial version if no previous version found
    if not prev_commit:
        print(
            f"Previous version not found in history for module {module}."
        )
        # Find the first commit affecting that directory
        first_commit_cmd = [
            "git",
            "log",
            "--reverse",
            "--oneline",
            "--first-parent",
            "--",
            directory,
        ]
        try:
            first_commit_output = run_cmd(first_commit_cmd)
            if first_commit_output:
                prev_commit = first_commit_output.splitlines()[0].split()[0]
                print(f"Using first commit affecting directory as base: {prev_commit}")
            else:
                print(f"No history found for directory {directory}.")
                sys.exit(1)
        except SystemExit:
            sys.exit(1)

    print(
        f"Generating notes between {prev_commit} and {target_commit} for directory {directory}"
    )

    # 2. Generate commit history in that range affecting that directory
    # Use --first-parent to ignore merge noise.
    # Use format that includes hash, subject, and body
    notes_cmd = [
        "git",
        "log",
        "--format=%H %s%n%b%n--END_OF_COMMIT--",
        "--first-parent",
        f"{prev_commit}..{target_commit}",
        "--",
        directory,
    ]
    notes_output = run_cmd(notes_cmd)

    # Filter commit titles based on allowed prefixes and categorize them
    # Supports scopes in parentheses, e.g., feat(spanner):
    prefix_regex = re.compile(r"^(feat|fix|deps|docs)(\([^)]+\))?(!)?:")

    breaking_changes = []
    features = []
    bug_fixes = []
    dependency_upgrades = []
    documentation = []

    def categorize_and_append(commit_hash, text):
        match = prefix_regex.match(text)
        if not match:
            return

        prefix = match.group(1)
        is_breaking = match.group(3) == "!"

        if is_breaking:
            breaking_changes.append(f"{commit_hash[:11]} {text}")
        elif prefix == "feat":
            features.append(f"{commit_hash[:11]} {text}")
        elif prefix == "fix":
            bug_fixes.append(f"{commit_hash[:11]} {text}")
        elif prefix == "deps":
            dependency_upgrades.append(f"{commit_hash[:11]} {text}")
        elif prefix == "docs":
            documentation.append(f"{commit_hash[:11]} {text}")

    commits_data = notes_output.split("--END_OF_COMMIT--")

    for commit_data in commits_data:
        commit_data = commit_data.strip()
        if not commit_data:
            continue

        lines = commit_data.splitlines()
        if not lines:
            continue

        header_parts = lines[0].split(" ", 1)
        commit_hash = header_parts[0]
        subject = header_parts[1] if len(header_parts) > 1 else ""

        body = "\n".join(lines[1:])

        # Check for override in the entire message
        if "BEGIN_COMMIT_OVERRIDE" in body or "BEGIN_COMMIT_OVERRIDE" in subject:
            match = re.search(r"BEGIN_COMMIT_OVERRIDE(.*?)END_COMMIT_OVERRIDE", commit_data, re.DOTALL)
            if match:
                override_content = match.group(1)
                current_item = []
                in_module_item = False

                for line in override_content.splitlines():
                    line_stripped = line.strip()
                    if not line_stripped:
                        continue

                    # Check if it's a new item using regex
                    is_new_item = prefix_regex.match(line_stripped)

                    if is_new_item:
                        # If we were in an item, save it
                        if in_module_item and current_item:
                            categorize_and_append(commit_hash, " ".join(current_item))
                            current_item = []
                            in_module_item = False

                        # Check if this new item is for our module or if we want all
                        should_include = False
                        if args.short_name:
                            if f"[{args.short_name}]" in line_stripped:
                                should_include = True
                        else:
                            should_include = True

                        if should_include:
                            in_module_item = True
                            current_item.append(line_stripped)
                    elif in_module_item:
                        # Continuation line
                        if line_stripped.startswith(("PiperOrigin-RevId:", "Source Link:")):
                            continue
                        if line_stripped in ("END_NESTED_COMMIT", "BEGIN_NESTED_COMMIT"):
                            continue
                        current_item.append(line_stripped)

                # Save the last item if we were in one
                if in_module_item and current_item:
                    categorize_and_append(commit_hash, " ".join(current_item))

            # Ignore the title since there was an override
            continue

        # Fallback to title check if no override
        if prefix_regex.match(subject):
            categorize_and_append(commit_hash, subject)

    print("\nRelease Notes:")
    if breaking_changes:
        print("### ⚠ BREAKING CHANGES\n")
        for item in breaking_changes:
            print(f"* {item}")
        print()

    if features:
        print("### Features\n")
        for item in features:
            print(f"* {item}")
        print()

    if bug_fixes:
        print("### Bug Fixes\n")
        for item in bug_fixes:
            print(f"* {item}")
        print()

    if dependency_upgrades:
        print("### Dependencies\n")
        for item in dependency_upgrades:
            print(f"* {item}")
        print()

    if documentation:
        print("### Documentation\n")
        for item in documentation:
            print(f"* {item}")
        print()


if __name__ == "__main__":
    main()