Skip to content

Commit c75faac

Browse files
committed
Refactor azpysdk apistub command. Extract metadata from API.md.
1 parent 7f8eeb5 commit c75faac

8 files changed

Lines changed: 227 additions & 27 deletions

File tree

.github/skills/generate-api-markdown/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@ description: Generate an API markdown file and token file using ApiView. Use thi
1919
1. Navigate to the desired package directory
2020
2. Run the command:
2121
```bash
22-
azpysdk apistub --md .
22+
azpysdk apistub --md --extract-metadata .
2323
3. The command outputs the location of the generated markdown file. Provide this file to the user for review.

.github/workflows/src/api-md-consistency/api-md-consistency.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ function formatIssueSection(title, apiFiles) {
4444
const packageName = path.basename(packageDir);
4545
lines.push(`- ${packageDir}`);
4646
lines.push(` API.md: ${apiFile}`);
47-
lines.push(` Regenerate: azpysdk apistub --md ${packageName}`);
47+
lines.push(` Regenerate: azpysdk apistub --md --extract-metadata ${packageName}`);
4848
}
4949
lines.push("");
5050
return lines.join("\n");
@@ -88,6 +88,7 @@ module.exports = async function apiMdConsistency({ core }) {
8888
if (issueCount > 0) {
8989
const messageParts = [
9090
"Generated API.md does not match committed API.md, or API.md is missing, for one or more affected packages.",
91+
"API.metadata.yml is informational only (for troubleshooting API drift, e.g., parser/runtime differences) and is not part of pass/fail gating.",
9192
"",
9293
formatIssueSection("Mismatched packages:", mismatches),
9394
formatIssueSection("Missing API.md packages:", missing),
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License. See License.txt in the project root for license information.
3+
4+
<#
5+
.SYNOPSIS
6+
Extracts Python APIView metadata from API markdown and writes API.metadata.yml.
7+
8+
.DESCRIPTION
9+
Reads an API markdown file, extracts parser and Python runtime versions from the
10+
Python APIView metadata header, removes that header from the markdown, trims leading
11+
blank lines from the markdown body, and writes API.metadata.yml beside the markdown file.
12+
13+
.PARAMETER ApiMarkdownPath
14+
Optional. Path to API markdown file. If omitted, a markdown file will be resolved
15+
from OutputPath (prefers API.md, then api.md).
16+
17+
.PARAMETER OutputPath
18+
Optional. Directory containing API markdown output. Defaults to current directory.
19+
20+
.EXAMPLE
21+
./Extract-APIViewMetadata-Python.ps1 -OutputPath ./sdk/template/azure-template
22+
23+
.EXAMPLE
24+
./Extract-APIViewMetadata-Python.ps1 -ApiMarkdownPath ./sdk/template/azure-template/API.md
25+
#>
26+
27+
[CmdletBinding()]
28+
param(
29+
[Parameter(Mandatory = $false)]
30+
[string]$ApiMarkdownPath,
31+
32+
[Parameter(Mandatory = $false)]
33+
[string]$OutputPath = "."
34+
)
35+
36+
Set-StrictMode -Version 3
37+
$ErrorActionPreference = 'Stop'
38+
39+
function Resolve-ApiMarkdownPath {
40+
param(
41+
[string]$ProvidedPath,
42+
[string]$OutputDirectory
43+
)
44+
45+
if ($ProvidedPath) {
46+
return $ProvidedPath
47+
}
48+
49+
$resolvedOutput = Resolve-Path -LiteralPath $OutputDirectory -ErrorAction Stop
50+
$apiUpper = Join-Path $resolvedOutput.Path "API.md"
51+
if (Test-Path -LiteralPath $apiUpper -PathType Leaf) {
52+
return $apiUpper
53+
}
54+
55+
$apiLower = Join-Path $resolvedOutput.Path "api.md"
56+
if (Test-Path -LiteralPath $apiLower -PathType Leaf) {
57+
return $apiLower
58+
}
59+
60+
throw "Could not find API markdown file in '$OutputDirectory'. Expected API.md or api.md."
61+
}
62+
63+
function Trim-LeadingBlankLines {
64+
param([string[]]$Lines)
65+
66+
$start = 0
67+
while ($start -lt $Lines.Count -and [string]::IsNullOrWhiteSpace($Lines[$start])) {
68+
$start++
69+
}
70+
71+
if ($start -eq 0) {
72+
return $Lines
73+
}
74+
75+
if ($start -ge $Lines.Count) {
76+
return @()
77+
}
78+
79+
return $Lines[$start..($Lines.Count - 1)]
80+
}
81+
82+
function Get-Sha256Hex {
83+
param([string]$Text)
84+
85+
$sha256 = [System.Security.Cryptography.SHA256]::Create()
86+
try {
87+
$bytes = [System.Text.Encoding]::UTF8.GetBytes($Text)
88+
$hashBytes = $sha256.ComputeHash($bytes)
89+
return ([System.BitConverter]::ToString($hashBytes)).Replace("-", "").ToLowerInvariant()
90+
}
91+
finally {
92+
$sha256.Dispose()
93+
}
94+
}
95+
96+
$resolvedApiPath = Resolve-ApiMarkdownPath -ProvidedPath $ApiMarkdownPath -OutputDirectory $OutputPath
97+
if (-not (Test-Path -LiteralPath $resolvedApiPath -PathType Leaf)) {
98+
throw "API markdown file not found: $resolvedApiPath"
99+
}
100+
101+
$metadataPattern = '^# Package is parsed using apiview-stub-generator\(version:([^\)]+)\), Python version:\s*([^\s]+)\s*$'
102+
103+
$fileText = Get-Content -LiteralPath $resolvedApiPath -Raw
104+
$lineEnding = if ($fileText -match "`r`n") { "`r`n" } else { "`n" }
105+
$lines = $fileText -split '\r?\n'
106+
107+
$metadata = [ordered]@{}
108+
$filtered = [System.Collections.Generic.List[string]]::new()
109+
110+
foreach ($line in $lines) {
111+
$match = [regex]::Match($line, $metadataPattern)
112+
if ($match.Success) {
113+
# Alphabetical keys in output YAML.
114+
$metadata['parserVersion'] = $match.Groups[1].Value
115+
$metadata['pythonVersion'] = $match.Groups[2].Value
116+
continue
117+
}
118+
119+
$filtered.Add($line)
120+
}
121+
122+
# Remove blank lines after opening fence so markdown body starts at namespace.
123+
if ($filtered.Count -gt 0 -and $filtered[0].StartsWith('```')) {
124+
$fence = $filtered[0]
125+
$body = Trim-LeadingBlankLines -Lines @($filtered | Select-Object -Skip 1)
126+
$rewritten = [System.Collections.Generic.List[string]]::new()
127+
$rewritten.Add($fence)
128+
foreach ($line in $body) {
129+
$rewritten.Add($line)
130+
}
131+
$filtered = $rewritten
132+
}
133+
else {
134+
$trimmed = Trim-LeadingBlankLines -Lines @($filtered)
135+
$filtered = [System.Collections.Generic.List[string]]::new($trimmed)
136+
}
137+
138+
$normalizedLinesForHash = @($filtered | ForEach-Object { $_.TrimEnd() })
139+
$newlineForHash = [string][char]10
140+
$normalizedTextForHash = $normalizedLinesForHash -join $newlineForHash
141+
$metadata['apiMdSha256'] = Get-Sha256Hex -Text $normalizedTextForHash
142+
143+
Set-Content -LiteralPath $resolvedApiPath -Value ($filtered -join $lineEnding) -NoNewline -Encoding utf8
144+
Write-Host "Updated markdown: $resolvedApiPath"
145+
146+
$metadataPath = Join-Path (Split-Path -Parent $resolvedApiPath) "API.metadata.yml"
147+
if ($metadata.Count -gt 0) {
148+
$yamlLines = [System.Collections.Generic.List[string]]::new()
149+
foreach ($key in ($metadata.Keys | Sort-Object)) {
150+
$yamlLines.Add(("{0}: {1}" -f $key, $metadata[$key]))
151+
}
152+
153+
Set-Content -LiteralPath $metadataPath -Value ($yamlLines -join $lineEnding) -Encoding utf8
154+
Write-Host "Generated metadata: $metadataPath"
155+
}
156+
elseif (Test-Path -LiteralPath $metadataPath) {
157+
Remove-Item -LiteralPath $metadataPath -Force
158+
Write-Host "Removed stale metadata: $metadataPath"
159+
}

eng/tools/azure-sdk-tools/azpysdk/apistub.py

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,30 @@ def register(
6969
action="store_true",
7070
help="Generate api.md from the JSON token file using Export-APIViewMarkdown.ps1. Output directory for api.md is the same as the generated token file.",
7171
)
72+
p.add_argument(
73+
"--extract-metadata",
74+
dest="extract_metadata",
75+
default=False,
76+
action="store_true",
77+
help="Extract language-specific metadata from generated api.md into API.metadata.yml and remove metadata header from api.md.",
78+
)
79+
p.add_argument(
80+
"--install-deps",
81+
dest="install_deps",
82+
default=False,
83+
action="store_true",
84+
help="Install dev requirements and apiview dependencies before running. Skipped by default for faster local iteration.",
85+
)
7286
p.set_defaults(func=self.run)
7387

7488
def run(self, args: argparse.Namespace) -> int:
7589
"""Run the apistub check command."""
7690
logger.info("Running apistub check...")
7791

92+
if getattr(args, "extract_metadata", False) and not getattr(args, "generate_md", False):
93+
logger.error("--extract-metadata requires --md.")
94+
return 1
95+
7896
set_envvar_defaults()
7997
targeted = self.get_targeted_directories(args)
8098

@@ -94,22 +112,23 @@ def run(self, args: argparse.Namespace) -> int:
94112
)
95113
logger.info(f"Processing {package_name} for apistub check")
96114

97-
# install dependencies
98-
self.install_dev_reqs(executable, args, package_dir)
99-
100-
try:
101-
install_into_venv(
102-
executable,
103-
[
104-
"-r",
105-
os.path.join(REPO_ROOT, "eng", "apiview_reqs.txt"),
106-
"--index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/",
107-
],
108-
package_dir,
109-
)
110-
except CalledProcessError as e:
111-
logger.error(f"Failed to install dependencies: {e}")
112-
return e.returncode
115+
if getattr(args, "install_deps", False):
116+
# install dependencies
117+
self.install_dev_reqs(executable, args, package_dir)
118+
119+
try:
120+
install_into_venv(
121+
executable,
122+
[
123+
"-r",
124+
os.path.join(REPO_ROOT, "eng", "apiview_reqs.txt"),
125+
"--index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/",
126+
],
127+
package_dir,
128+
)
129+
except CalledProcessError as e:
130+
logger.error(f"Failed to install dependencies: {e}")
131+
return e.returncode
113132

114133
if not os.getenv("PREBUILT_WHEEL_DIR"):
115134
create_package_and_install(
@@ -124,14 +143,15 @@ def run(self, args: argparse.Namespace) -> int:
124143
python_executable=executable,
125144
)
126145

127-
self.pip_freeze(executable)
146+
if getattr(args, "install_deps", False):
147+
self.pip_freeze(executable)
128148

129149
pkg_path = get_package_wheel_path(package_dir)
130150
pkg_path = os.path.abspath(pkg_path)
131151

132152
dest_dir = getattr(args, "dest_dir", None)
133153
if dest_dir:
134-
out_token_path = os.path.join(os.path.abspath(dest_dir), package_name)
154+
out_token_path = os.path.abspath(dest_dir)
135155
os.makedirs(out_token_path, exist_ok=True)
136156
else:
137157
out_token_path = os.path.abspath(staging_directory)
@@ -157,6 +177,9 @@ def run(self, args: argparse.Namespace) -> int:
157177
if getattr(args, "generate_md", False):
158178
token_json_path = os.path.join(out_token_path, f"{package_name}_python.json")
159179
md_script = os.path.join(REPO_ROOT, "eng", "common", "scripts", "Export-APIViewMarkdown.ps1")
180+
metadata_script = os.path.join(
181+
REPO_ROOT, "eng", "scripts", "Extract-APIViewMetadata-Python.ps1"
182+
)
160183
logger.info(f"Generating api.md for {package_name}")
161184
try:
162185
result = run(
@@ -168,11 +191,22 @@ def run(self, args: argparse.Namespace) -> int:
168191
# pwsh script logs the api.md location
169192
if result.stdout:
170193
logger.info(result.stdout)
194+
195+
if getattr(args, "extract_metadata", False):
196+
logger.info(f"Extracting API metadata for {package_name}")
197+
metadata_result = run(
198+
["pwsh", metadata_script, "-OutputPath", out_token_path],
199+
check=True,
200+
capture_output=True,
201+
text=True,
202+
)
203+
if metadata_result.stdout:
204+
logger.info(metadata_result.stdout)
171205
except FileNotFoundError:
172206
logger.error("Failed to generate api.md: pwsh (PowerShell) is not installed or not on PATH.")
173207
results.append(1)
174208
except CalledProcessError as e:
175-
logger.error(f"Failed to generate api.md (exit code {e.returncode}):")
209+
logger.error(f"Failed to generate api.md or extract metadata (exit code {e.returncode}):")
176210
if e.stderr:
177211
logger.error(e.stderr)
178212
if e.stdout:

eng/tools/azure-sdk-tools/tests/test_apistub.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def _make_args(self, dest_dir=None, generate_md=False):
9494
def test_dest_dir_creates_package_subfolder(
9595
self, _env, _install, _create, _get_whl, _get_mapping, tmp_path, monkeypatch
9696
):
97-
"""When --dest-dir is given, output should go to <dest_dir>/<package_name>/."""
97+
"""When --dest-dir is given, output should go directly to <dest_dir>/."""
9898
monkeypatch.chdir(os.getcwd())
9999
dest = tmp_path / "output"
100100
dest.mkdir()
@@ -130,7 +130,7 @@ def fake_pwsh(cmd, **kwargs):
130130

131131
stub.run(self._make_args(dest_dir=str(dest), generate_md=True))
132132

133-
expected_out = os.path.join(str(dest), "azure-core")
133+
expected_out = str(dest)
134134
assert os.path.isdir(expected_out)
135135
assert os.path.exists(os.path.join(expected_out, "api.md"))
136136
assert os.path.exists(os.path.join(expected_out, "azure-core_python.json"))

scripts/api_md_workflow/README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ This folder contains the helper scripts used by the GitHub Actions workflows tha
55
## Purpose
66

77
The workflow validates that when a pull request changes one or more SDK packages, the committed `API.md` files are still up to date.
8+
Only `API.md` is diff-gated by this workflow; `API.metadata.yml` is intentionally excluded from mismatch checks.
9+
Use `API.metadata.yml` as diagnostic context when `API.md` drifts (for example, parser/runtime version differences), but it does not affect pass/fail.
810

911
The logic is split between GitHub workflow YAML files and helper scripts in Python and JavaScript.
1012

@@ -20,7 +22,7 @@ It runs on pull requests for changes under `sdk/**`.
2022
- Regenerates `API.md` for those packages.
2123
- Fails if the generated files differ from the committed files.
2224
- Fails if an affected package does not have a committed `API.md`.
23-
- Prints the mismatched or missing packages and the `azpysdk apistub --md` command needed to regenerate each `API.md` file.
25+
- Prints the mismatched or missing packages and the `azpysdk apistub --md --extract-metadata` command needed to regenerate each `API.md` file.
2426

2527
## Script Layout
2628

@@ -47,7 +49,7 @@ Also writes `count=<n>` to `GITHUB_OUTPUT`.
4749

4850
### `regenerate.js`
4951

50-
Reads package directories from `API_MD_PACKAGES_FILE` and runs `azpysdk apistub --md <package-name>` for each package.
52+
Reads package directories from `API_MD_PACKAGES_FILE` and runs `azpysdk apistub --md --extract-metadata <package-name>` for each package.
5153

5254
This script is used by the consistency check.
5355

@@ -60,6 +62,8 @@ Reads package directories from `API_MD_PACKAGES_FILE`, checks whether `<package>
6062

6163
Also writes `mismatch_count=<n>`, `missing_count=<n>`, and `issue_count=<n>` to `GITHUB_OUTPUT`.
6264

65+
`API.metadata.yml` is not part of this diff check.
66+
6367
### `create_api_review_pr.js` and adapters
6468

6569
API review PR creation now uses a shared JavaScript orchestrator with a language adapter boundary:
@@ -96,4 +100,4 @@ Common variables include:
96100
3. `find_affected.js` determines which packages were touched.
97101
4. `regenerate.js` rebuilds `API.md` for those packages.
98102
5. `find_mismatches.js` records any `API.md` drift, including missing or untracked `API.md` files.
99-
6. If drift is found, the workflow fails and prints the affected packages plus the `azpysdk apistub --md <package-name>` command to regenerate each `API.md` file locally.
103+
6. If drift is found, the workflow fails and prints the affected packages plus the `azpysdk apistub --md --extract-metadata <package-name>` command to regenerate each `API.md` file locally.

scripts/api_md_workflow/adapters/python.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function generateApiForPackage({
114114
activeLogger.info(`--- Generating API.md on ${refLabel} ---`);
115115
}
116116

117-
run("azpysdk", ["apistub", "--md", packageName], {
117+
run("azpysdk", ["apistub", "--md", "--extract-metadata", packageName], {
118118
cwd: repoRoot,
119119
check: true,
120120
logger: activeLogger,

scripts/api_md_workflow/find_mismatches.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ async function main() {
1313
const mismatches = [];
1414
const missing = [];
1515
for (const pkgDir of packages) {
16+
// Deliberately scope consistency gating to API.md only.
17+
// API.metadata.yml is generated sidecar metadata and is not diff-gated here.
1618
const apiFile = `${pkgDir}/API.md`;
1719

1820
// Enforce that each affected package has a committed API.md file.

0 commit comments

Comments
 (0)