Skip to content

Commit bc3ed0e

Browse files
arash77Copilot
andauthored
Remove uninstallable revisions from lock files (#929)
* Add workflow to fix outdated tools and implement script for uninstallable revisions * Handle errors by exiting the script in fix_outdated.py * Remove ephemeris subproject reference * Implement retry logic with exponential backoff for repository fetch operations * Remove comment step from PR update workflow in fix-outdated-tools.yml * Refactor tool removal logic to ensure revisions are updated correctly and improve logging clarity * reverse over installable to get the latest revisions * Fix: Add missing github-token to uv installation step * Fix: Refactor fix_uninstallable function for improved error handling and path management * Fix: Update upload artifact step to include uninstallable revisions and ignore if no files found * Fix: Only write uninstallable revisions file if there are removed revisions * Add scheduled trigger for the fix outdated tools workflow * Rename uninstallable revisions to not-installable revisions in workflow and script * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Refactor retry_with_backoff to use a local max_retries variable instead of a global constant --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 02bbb84 commit bc3ed0e

2 files changed

Lines changed: 289 additions & 0 deletions

File tree

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
name: Fix Outdated Tools
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: '0 9 1 * *'
7+
8+
jobs:
9+
get-lockfiles:
10+
runs-on: ubuntu-latest
11+
outputs:
12+
lockfiles: ${{ steps.set-matrix.outputs.lockfiles }}
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v5
16+
17+
- name: Get all lock files
18+
id: set-matrix
19+
run: |
20+
lockfiles=$(ls *.yaml.lock | jq -R -s -c 'split("\n")[:-1]')
21+
echo "lockfiles=$lockfiles" >> $GITHUB_OUTPUT
22+
23+
fix-outdated:
24+
needs: get-lockfiles
25+
runs-on: ubuntu-latest
26+
strategy:
27+
matrix:
28+
lockfile: ${{ fromJson(needs.get-lockfiles.outputs.lockfiles) }}
29+
fail-fast: false
30+
permissions:
31+
contents: write
32+
pull-requests: write
33+
steps:
34+
- name: Checkout repository
35+
uses: actions/checkout@v5
36+
37+
- name: Set up Python
38+
uses: actions/setup-python@v6
39+
with:
40+
python-version: '3.13'
41+
42+
- name: Install uv
43+
uses: astral-sh/setup-uv@v7
44+
with:
45+
github-token: ${{ secrets.GITHUB_TOKEN }}
46+
47+
- name: Install dependencies
48+
run: uv pip install --system -r requirements.txt
49+
50+
- name: Fix ${{ matrix.lockfile }}
51+
run: python scripts/fix_outdated.py "${{ matrix.lockfile }}"
52+
53+
- name: Upload changes
54+
uses: actions/upload-artifact@v4
55+
if: always()
56+
with:
57+
name: ${{ matrix.lockfile }}
58+
path: |
59+
${{ matrix.lockfile }}
60+
*.not-installable-revisions.yaml
61+
if-no-files-found: ignore
62+
63+
create-pr:
64+
needs: fix-outdated
65+
if: always()
66+
runs-on: ubuntu-latest
67+
permissions:
68+
contents: write
69+
pull-requests: write
70+
steps:
71+
- name: Checkout repository
72+
uses: actions/checkout@v5
73+
with:
74+
fetch-depth: 0
75+
76+
- name: Download all artifacts
77+
uses: actions/download-artifact@v5
78+
with:
79+
merge-multiple: true
80+
81+
- name: Check for changes
82+
id: check_changes
83+
run: |
84+
if [[ -n $(git status --porcelain) ]]; then
85+
echo "changes=true" >> $GITHUB_OUTPUT
86+
echo "Changes detected in lock files"
87+
else
88+
echo "changes=false" >> $GITHUB_OUTPUT
89+
echo "No changes detected"
90+
fi
91+
92+
- name: Create or update Pull Request
93+
id: cpr
94+
if: steps.check_changes.outputs.changes == 'true'
95+
uses: peter-evans/create-pull-request@v7
96+
with:
97+
branch: fix-outdated-tools
98+
commit-message: Remove not-installable tool revisions
99+
title: 'Remove not-installable tool revisions'
100+
body: |
101+
This PR was automatically generated by the `fix-outdated-tools` workflow.
102+
Workflow run: [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
103+
delete-branch: true

scripts/fix_outdated.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
import argparse
2+
import logging
3+
import sys
4+
import time
5+
from pathlib import Path
6+
from concurrent.futures import ThreadPoolExecutor, as_completed
7+
from collections import defaultdict
8+
9+
import yaml
10+
from bioblend import toolshed
11+
12+
logging.basicConfig(
13+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
14+
)
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def retry_with_backoff(func, *args, **kwargs):
19+
backoff = 2
20+
max_retries = 5
21+
22+
for attempt in range(max_retries):
23+
try:
24+
return func(*args, **kwargs)
25+
except Exception as e:
26+
error_msg = str(e)
27+
if any(
28+
code in error_msg
29+
for code in ["502", "503", "504", "timed out", "timeout", "Connection"]
30+
):
31+
if attempt < max_retries - 1:
32+
logger.warning(
33+
f"Attempt {attempt + 1}/{max_retries} failed: {error_msg}. Retrying in {backoff}s..."
34+
)
35+
time.sleep(backoff)
36+
backoff = min(backoff * 2, 60)
37+
continue
38+
raise e
39+
raise Exception("Retry failed after max attempts")
40+
41+
42+
def get_tool_versions(ts, name, owner, revision):
43+
versions = set()
44+
45+
try:
46+
repo_metadata = retry_with_backoff(
47+
ts.repositories.get_repository_revision_install_info, name, owner, revision
48+
)
49+
if isinstance(repo_metadata, list) and len(repo_metadata) > 1:
50+
for tool in repo_metadata[1].get("valid_tools", []):
51+
if "id" in tool and "version" in tool:
52+
versions.add((tool["id"], tool["version"]))
53+
except Exception as e:
54+
logger.warning(f"{name},{owner}: failed to fetch {revision} ({e})")
55+
sys.exit(1)
56+
return versions
57+
58+
59+
def fetch_versions_parallel(ts, name, owner, revisions, max_workers=10):
60+
version_cache = {}
61+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
62+
futures = {
63+
executor.submit(get_tool_versions, ts, name, owner, rev): rev
64+
for rev in revisions
65+
}
66+
for future in as_completed(futures):
67+
rev = futures[future]
68+
try:
69+
version_cache[rev] = future.result()
70+
except Exception as e:
71+
logger.warning(f"{name},{owner}: error fetching {rev} ({e})")
72+
sys.exit(1)
73+
return version_cache
74+
75+
76+
def fix_uninstallable(lockfile_name, toolshed_url):
77+
ts = toolshed.ToolShedInstance(url=toolshed_url)
78+
lockfile_path = Path(lockfile_name)
79+
with open(lockfile_path) as f:
80+
lockfile = yaml.safe_load(f) or {}
81+
locked_tools = lockfile.get("tools", [])
82+
total = len(locked_tools)
83+
84+
not_installable_file = lockfile_path.with_name(
85+
lockfile_path.name.replace(".yaml.lock", ".not-installable-revisions.yaml")
86+
)
87+
88+
removed_map = defaultdict(set)
89+
try:
90+
with open(not_installable_file) as f:
91+
not_installable_data = yaml.safe_load(f) or {}
92+
for t in not_installable_data.get("tools", []):
93+
removed_map[(t["name"], t["owner"])] = set(t.get("revisions", []))
94+
except FileNotFoundError:
95+
pass
96+
97+
logger.info(f"Processing {total} tools from {lockfile_path.name}...")
98+
changed, skipped = 0, 0
99+
100+
for i, tool in enumerate(locked_tools):
101+
if i % 10 == 0:
102+
logger.info(
103+
f"Progress: {i}/{total} tools ({skipped} skipped, {changed} changed)"
104+
)
105+
106+
name, owner = tool.get("name"), tool.get("owner")
107+
current_revisions = set(tool.get("revisions", []))
108+
try:
109+
installable_list = retry_with_backoff(
110+
ts.repositories.get_ordered_installable_revisions, name, owner
111+
)
112+
except Exception as e:
113+
logger.warning(f"{name},{owner}: could not get installable revisions ({e})")
114+
continue
115+
116+
uninstallable = current_revisions - set(installable_list)
117+
if not uninstallable:
118+
skipped += 1
119+
continue
120+
121+
all_revs = list(uninstallable) + installable_list
122+
version_cache = fetch_versions_parallel(ts, name, owner, all_revs)
123+
124+
installable_signatures = {}
125+
for rev in installable_list:
126+
sig = frozenset(version_cache.get(rev, []))
127+
if sig:
128+
installable_signatures[sig] = rev
129+
to_remove = set()
130+
131+
for cur in uninstallable:
132+
cur_sig = frozenset(version_cache.get(cur, []))
133+
if not cur_sig:
134+
if installable_list:
135+
nxt = installable_list[-1]
136+
logger.info(f"{name},{owner}: unverifiable {cur}, keeping {nxt}")
137+
to_remove.add(cur)
138+
continue
139+
140+
nxt = installable_signatures.get(cur_sig)
141+
142+
if not nxt:
143+
logger.warning(
144+
f"{name},{owner}: no matching installable revision for {cur}"
145+
)
146+
sys.exit(1)
147+
148+
logger.info(f"{name},{owner}: removing {cur} in favor of {nxt}")
149+
if nxt not in current_revisions:
150+
tool["revisions"].append(nxt)
151+
to_remove.add(cur)
152+
153+
if to_remove:
154+
changed += 1
155+
tool["revisions"] = sorted(set(tool["revisions"]) - to_remove)
156+
removed_map[(name, owner)].update(to_remove)
157+
158+
logger.info(
159+
f"Completed: {total} tools processed, {skipped} skipped, {changed} changed"
160+
)
161+
162+
with open(lockfile_path, "w") as f:
163+
yaml.dump(lockfile, f, sort_keys=False, default_flow_style=False)
164+
165+
if removed_map:
166+
not_installable_output = {
167+
"tools": [
168+
{"name": n, "owner": o, "revisions": sorted(revs)}
169+
for (n, o), revs in removed_map.items()
170+
]
171+
}
172+
with open(not_installable_file, "w") as f:
173+
yaml.dump(
174+
not_installable_output, f, sort_keys=False, default_flow_style=False
175+
)
176+
177+
178+
if __name__ == "__main__":
179+
parser = argparse.ArgumentParser()
180+
parser.add_argument("lockfile", help="Tool.yaml.lock file path")
181+
parser.add_argument(
182+
"--toolshed", default="https://toolshed.g2.bx.psu.edu", help="Toolshed base URL"
183+
)
184+
args = parser.parse_args()
185+
186+
fix_uninstallable(args.lockfile, args.toolshed)

0 commit comments

Comments
 (0)