Skip to content

Commit da2c1d1

Browse files
authored
Automate service coverage data update for Azure (#474)
1 parent 1285a25 commit da2c1d1

File tree

3 files changed

+314
-0
lines changed

3 files changed

+314
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: Update Azure Coverage Data
2+
3+
on:
4+
schedule:
5+
- cron: 0 5 * * MON
6+
workflow_dispatch:
7+
inputs:
8+
targetBranch:
9+
required: true
10+
type: string
11+
description: "Branch to checkout and compare against (e.g. harshmishra/doc-91)"
12+
13+
jobs:
14+
update-azure-coverage:
15+
name: Update Azure coverage data
16+
runs-on: ubuntu-latest
17+
permissions:
18+
contents: write
19+
pull-requests: write
20+
steps:
21+
- name: Checkout docs
22+
uses: actions/checkout@v4
23+
with:
24+
fetch-depth: 0
25+
path: docs
26+
ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }}
27+
28+
- name: Set up system wide dependencies
29+
run: |
30+
sudo apt-get install jq wget
31+
32+
- name: Set up Python 3.11
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version: "3.11"
36+
37+
- name: Download Azure implementation metrics artifact
38+
working-directory: docs
39+
run: bash ./scripts/get_latest_github_metrics.sh ./target main
40+
env:
41+
GITHUB_TOKEN: ${{ secrets.PRO_ACCESS_TOKEN }}
42+
REPOSITORY_NAME: localstack-pro
43+
ARTIFACT_ID: implemented_features_python-amd64
44+
WORKFLOW: "Az / Build, Test, Push"
45+
46+
- name: Generate Azure coverage JSON data
47+
working-directory: docs
48+
run: |
49+
python3 scripts/create_azure_coverage.py -i target/implemented_features.csv -o target/updated_azure_coverage
50+
if ls target/updated_azure_coverage/*.json > /dev/null 2>&1; then
51+
mv -f target/updated_azure_coverage/*.json src/data/azure-coverage/
52+
else
53+
echo "No JSON files generated in target/updated_azure_coverage."
54+
exit 1
55+
fi
56+
57+
- name: Check for changes
58+
id: check-for-changes
59+
working-directory: docs
60+
env:
61+
TARGET_BRANCH: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.targetBranch || 'main' }}
62+
run: |
63+
mkdir -p resources
64+
(git diff --name-only origin/automated-azure-coverage-updates src/data/azure-coverage/ 2>/dev/null || git diff --name-only "origin/$TARGET_BRANCH" src/data/azure-coverage/ 2>/dev/null) | tee -a resources/diff-check.log
65+
echo "diff-count=$(cat resources/diff-check.log | wc -l)" >> "$GITHUB_OUTPUT"
66+
cat resources/diff-check.log
67+
68+
- name: Create PR
69+
uses: peter-evans/create-pull-request@v7
70+
if: ${{ success() && steps.check-for-changes.outputs.diff-count != '0' && steps.check-for-changes.outputs.diff-count != '' }}
71+
with:
72+
path: docs
73+
title: "Update Azure coverage data"
74+
body: "Update generated Azure coverage JSON data from the latest LocalStack Pro parity metrics artifact."
75+
branch: "automated-azure-coverage-updates"
76+
author: "LocalStack Bot <localstack-bot@users.noreply.github.com>"
77+
committer: "LocalStack Bot <localstack-bot@users.noreply.github.com>"
78+
commit-message: "update generated azure coverage data"
79+
token: ${{ secrets.PRO_ACCESS_TOKEN }}

scripts/create_azure_coverage.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""
2+
Generate Azure coverage JSON files from implementation CSV data.
3+
"""
4+
5+
import argparse
6+
import csv
7+
import json
8+
from pathlib import Path
9+
from typing import Any
10+
11+
12+
def _as_bool(value: Any, default: bool = True) -> bool:
13+
if value is None:
14+
return default
15+
if isinstance(value, bool):
16+
return value
17+
return str(value).strip().lower() in {"1", "true", "yes", "y"}
18+
19+
20+
def _group_name(service_name: str, category: str) -> str:
21+
service_name = (service_name or "").strip()
22+
category = (category or "").strip()
23+
if not category:
24+
return service_name
25+
if category.lower() in {"none", "null", "n/a"}:
26+
return service_name
27+
if category == service_name:
28+
return service_name
29+
return f"{service_name} ({category})"
30+
31+
32+
def _normalize_provider(value: str) -> str:
33+
return (value or "").strip().replace("_", ".")
34+
35+
36+
def _resolve_input_csv(path: Path) -> Path:
37+
if path.exists():
38+
if path.is_file():
39+
return path
40+
# Support passing a directory that contains the extracted artifact.
41+
nested_csv = path / "implemented_features.csv"
42+
if nested_csv.exists():
43+
return nested_csv
44+
matches = sorted(path.rglob("implemented_features.csv"))
45+
if matches:
46+
return matches[0]
47+
raise FileNotFoundError(f"No implemented_features.csv found under: {path}")
48+
49+
# Backward-compatible fallback for target/implemented_features.csv.
50+
if path.name == "implemented_features.csv" and path.parent.exists():
51+
matches = sorted(path.parent.rglob("implemented_features.csv"))
52+
if matches:
53+
return matches[0]
54+
55+
raise FileNotFoundError(f"Input CSV not found: {path}")
56+
57+
58+
def _load_csv(path: Path) -> dict[str, dict[str, dict[str, dict[str, Any]]]]:
59+
path = _resolve_input_csv(path)
60+
61+
coverage: dict[str, dict[str, dict[str, dict[str, Any]]]] = {}
62+
with path.open(mode="r", encoding="utf-8") as file:
63+
reader = csv.DictReader(file)
64+
if not reader.fieldnames:
65+
raise ValueError("Input CSV has no headers.")
66+
required_headers = {"resource_provider", "service", "feature"}
67+
if not required_headers.issubset(set(reader.fieldnames)):
68+
raise ValueError(
69+
"Unexpected CSV schema. Expected headers including "
70+
f"{sorted(required_headers)}, got {reader.fieldnames}. "
71+
"The downloaded artifact may contain an error payload instead of CSV data."
72+
)
73+
74+
for row in reader:
75+
provider = _normalize_provider(row.get("resource_provider", ""))
76+
if not provider:
77+
continue
78+
79+
feature_name = (row.get("feature") or row.get("operation") or "").strip()
80+
if not feature_name:
81+
continue
82+
83+
group = _group_name(row.get("service", ""), row.get("category", ""))
84+
if not group:
85+
group = "General"
86+
87+
implemented = _as_bool(
88+
row.get("implemented", row.get("is_implemented", row.get("isImplemented"))),
89+
default=True,
90+
)
91+
pro_only = _as_bool(row.get("pro", row.get("is_pro", row.get("isPro"))), default=True)
92+
93+
provider_data = coverage.setdefault(provider, {})
94+
group_data = provider_data.setdefault(group, {})
95+
group_data[feature_name] = {
96+
"implemented": implemented,
97+
"pro": pro_only,
98+
}
99+
100+
if not coverage:
101+
raise ValueError(
102+
"No Azure coverage records were parsed from the input CSV. "
103+
"Please verify the artifact content is valid and non-empty."
104+
)
105+
106+
return coverage
107+
108+
109+
def _sorted_details(details: dict[str, dict[str, dict[str, Any]]]) -> dict[str, dict[str, dict[str, Any]]]:
110+
sorted_details: dict[str, dict[str, dict[str, Any]]] = {}
111+
for group_name in sorted(details.keys()):
112+
operations = details[group_name]
113+
sorted_details[group_name] = dict(sorted(operations.items(), key=lambda item: item[0]))
114+
return sorted_details
115+
116+
117+
def write_coverage_files(coverage: dict[str, dict[str, dict[str, dict[str, Any]]]], output_dir: Path) -> None:
118+
output_dir.mkdir(parents=True, exist_ok=True)
119+
for provider in sorted(coverage.keys()):
120+
payload = {
121+
"service": provider,
122+
"operations": [],
123+
"details": _sorted_details(coverage[provider]),
124+
}
125+
file_path = output_dir / f"{provider}.json"
126+
with file_path.open(mode="w", encoding="utf-8") as fd:
127+
json.dump(payload, fd, indent=2)
128+
fd.write("\n")
129+
130+
131+
def main() -> None:
132+
parser = argparse.ArgumentParser(description="Generate Azure coverage JSON data.")
133+
parser.add_argument(
134+
"-i",
135+
"--implementation-details",
136+
required=True,
137+
help="Path to implementation details CSV.",
138+
)
139+
parser.add_argument(
140+
"-o",
141+
"--output-dir",
142+
required=True,
143+
help="Directory where generated JSON files will be written.",
144+
)
145+
args = parser.parse_args()
146+
147+
coverage = _load_csv(Path(args.implementation_details))
148+
write_coverage_files(coverage, Path(args.output_dir))
149+
150+
151+
if __name__ == "__main__":
152+
main()
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
# input params
5+
PARENT_FOLDER=${1:-target}
6+
METRICS_ARTIFACTS_BRANCH=${2:-main}
7+
8+
# env vars
9+
REPOSITORY_NAME=${REPOSITORY_NAME:-localstack-pro}
10+
ARTIFACT_ID=${ARTIFACT_ID:-implemented_features_python-amd64}
11+
WORKFLOW=${WORKFLOW:-"Az / Build, Test, Push"}
12+
PREFIX_ARTIFACT=${PREFIX_ARTIFACT:-}
13+
FILTER_SUCCESS=${FILTER_SUCCESS:-1}
14+
LIMIT=${LIMIT:-20}
15+
16+
RESOURCE_FOLDER=${RESOURCE_FOLDER:-}
17+
REPOSITORY_OWNER=${REPOSITORY_OWNER:-localstack}
18+
TARGET_FOLDER="$PARENT_FOLDER/$RESOURCE_FOLDER"
19+
20+
TMP_FOLDER="$PARENT_FOLDER/tmp_download"
21+
mkdir -p "$TMP_FOLDER"
22+
23+
echo "Searching for artifact '$ARTIFACT_ID' in workflow '$WORKFLOW' on branch '$METRICS_ARTIFACTS_BRANCH' in repo '$REPOSITORY_OWNER/$REPOSITORY_NAME'."
24+
25+
if [ "$FILTER_SUCCESS" = "1" ]; then
26+
echo "Filtering runs by conclusion=success"
27+
SELECTOR='.[] | select(.conclusion=="success")'
28+
else
29+
echo "Filtering runs by completed status (success/failure)"
30+
SELECTOR='.[] | select(.status=="completed" and (.conclusion=="failure" or .conclusion=="success"))'
31+
fi
32+
33+
RUN_IDS=()
34+
while IFS= read -r run_id; do
35+
RUN_IDS+=("$run_id")
36+
done < <(
37+
gh run list \
38+
--limit "$LIMIT" \
39+
--branch "$METRICS_ARTIFACTS_BRANCH" \
40+
--repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" \
41+
--workflow "$WORKFLOW" \
42+
--json databaseId,conclusion,status \
43+
--jq "$SELECTOR | .databaseId"
44+
)
45+
46+
if [ "${#RUN_IDS[@]}" -eq 0 ]; then
47+
echo "No matching workflow runs found."
48+
exit 1
49+
fi
50+
51+
for RUN_ID in "${RUN_IDS[@]}"; do
52+
if [ -z "$RUN_ID" ] || [ "$RUN_ID" = "null" ]; then
53+
continue
54+
fi
55+
echo "Trying run id: $RUN_ID"
56+
57+
gh run download "$RUN_ID" --repo "$REPOSITORY_OWNER/$REPOSITORY_NAME" -p "$ARTIFACT_ID" -D "$TMP_FOLDER" || true
58+
59+
if [ "$(ls -1 "$TMP_FOLDER" 2>/dev/null | wc -l)" -gt 0 ]; then
60+
echo "Downloaded artifact successfully."
61+
break
62+
fi
63+
done
64+
65+
if [ "$(ls -1 "$TMP_FOLDER" 2>/dev/null | wc -l)" -eq 0 ]; then
66+
echo "Failed to download artifact '$ARTIFACT_ID' from the checked workflow runs."
67+
exit 1
68+
fi
69+
70+
echo "Moving artifact to $TARGET_FOLDER"
71+
mkdir -p "$TARGET_FOLDER"
72+
if [[ -z "${PREFIX_ARTIFACT}" ]]; then
73+
cp -R "$TMP_FOLDER"/. "$TARGET_FOLDER"/
74+
else
75+
while IFS= read -r file; do
76+
org_file_name=$(echo "$file" | sed "s/.*\///")
77+
mv -- "$file" "$TARGET_FOLDER/$PREFIX_ARTIFACT-$org_file_name"
78+
done < <(find "$TMP_FOLDER" -type f -name "*.csv")
79+
fi
80+
81+
rm -rf "$TMP_FOLDER"
82+
echo "Contents of $TARGET_FOLDER:"
83+
ls -la "$TARGET_FOLDER"

0 commit comments

Comments
 (0)