diff --git a/.github/workflows/update-iwc-tools.yml b/.github/workflows/update-iwc-tools.yml new file mode 100644 index 0000000..36fe4da --- /dev/null +++ b/.github/workflows/update-iwc-tools.yml @@ -0,0 +1,45 @@ +name: Install and update IWC tools + +on: + schedule: + # At 10:00 every day + - cron: "0 10 * * *" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + update-repos: + permissions: + contents: write + pull-requests: write + runs-on: ubuntu-latest + strategy: + matrix: + toolset: [galaxy-qa1.galaxy.cloud.e-infra.cz] + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + architecture: 'x64' + - name: Checkout + uses: actions/checkout@v4 + - name: Install dependencies + run: pip install -r requirements.txt + - name: Clone IWC repo + run: git clone https://github.com/galaxyproject/iwc /tmp/iwc + - name: Run update script + run: python scripts/get_iwc_tools.py -w /tmp/iwc -s ${{ matrix.toolset }} -u uncategorized.yml + - name: Get current date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + - name: Create Pull Request + uses: peter-evans/create-pull-request@v7 + with: + branch: iwc-update-${{ matrix.toolset }} + committer: CESNETbot + title: Install IWC tools for ${{ matrix.toolset }} ${{ steps.date.outputs.date }} + commit-message: "output of get_iwc_tools.py" + labels: automated + assignees: martenson + reviewers: martenson diff --git a/requirements.txt b/requirements.txt index 4fa8b3a..45bbfc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -bioblend -ephemeris -pykwalify -PyYAML +bioblend>=1.2.0 +ephemeris>=0.10.10 +pykwalify>=1.6.1 +PyYAML>=4.2 +setuptools>=76.1.0 diff --git a/scripts/get_iwc_tools.py b/scripts/get_iwc_tools.py new file mode 100644 index 0000000..5fcb270 --- /dev/null +++ b/scripts/get_iwc_tools.py @@ -0,0 +1,122 @@ +import argparse +import glob +import json +import os +from collections import defaultdict + +import yaml + +from ephemeris.generate_tool_list_from_ga_workflow_files import ( + generate_repo_list_from_workflow, +) +from steal_sections import steal_section +from fix_lockfile import update_file as fix_lockfile +from update_tool import update_file + +GALAXY_URL = "https://usegalaxy.eu" + + +def find_workflows(workflow_path): + workflow_files = [] + for dirpath, _, filenames in os.walk(workflow_path): + workflow_files.extend( + ( + os.path.join(dirpath, filename) + for filename in filenames + if filename.endswith(".ga") + ) + ) + return workflow_files + + +def add_repos(workflow_path, toolset, uncategorized_file): + workflow_paths = find_workflows(workflow_path) + repo_list = generate_repo_list_from_workflow(workflow_paths, "Uncategorized") + steal_section( + {"tools": repo_list}, + toolset, + leftovers_file=os.path.join(toolset, uncategorized_file), + galaxy_url=GALAXY_URL, + verbose=True, + ) + section_files = glob.glob(f"{toolset}/*.yml") + for section_file in section_files: + fix_lockfile( + section_file, + install_repository_dependencies=False, + install_resolver_dependencies=False, + ) + update_file(section_file, without=True) + lock_files = glob.glob(f"{toolset}/*.yml.lock") + lock_file_contents = {} + # Keep a global lookup to find which lock file contains each tool + global_tool_lookup = {} # (owner, name) -> lock_file + + # Load all lock files + for lock_file in lock_files: + with open(lock_file) as lock_file_fh: + lock_contents = yaml.safe_load(lock_file_fh) + lock_file_contents[lock_file] = lock_contents + + # Build global lookup for finding tools + for repo in lock_contents["tools"]: + key = (repo["owner"], repo["name"]) + if key not in global_tool_lookup: + global_tool_lookup[key] = lock_file + + # Add revisions from workflow repos to the appropriate lock files + for workflow_repo in repo_list: + key = (workflow_repo["owner"], workflow_repo["name"]) + if key in global_tool_lookup: + lock_file = global_tool_lookup[key] + lock_contents = lock_file_contents[lock_file] + # Find the tool in this specific lock file and add revisions + for repo in lock_contents["tools"]: + if repo["owner"] == workflow_repo["owner"] and repo["name"] == workflow_repo["name"]: + repo["revisions"] = sorted( + list(set(repo.get("revisions", []) + workflow_repo["revisions"])) + ) + break + + # Deduplicate tools within each lock file separately + for lock_file, entries in lock_file_contents.items(): + # Create deduplicated tools list for this specific file + tool_map = {} # key: (owner, name) -> value: merged tool dict + + for tool in entries["tools"]: + key = (tool["owner"], tool["name"]) + if key not in tool_map: + # First occurrence in this file - store it + tool_map[key] = tool + else: + # Duplicate in this file - merge revisions into first occurrence + existing_tool = tool_map[key] + existing_tool["revisions"] = sorted( + list(set(existing_tool.get("revisions", []) + tool.get("revisions", []))) + ) + + # Rebuild the tools list from the deduplicated map, preserving original order + deduplicated_tools = [] + seen = set() + for tool in entries["tools"]: + key = (tool["owner"], tool["name"]) + if key not in seen: + seen.add(key) + deduplicated_tools.append(tool_map[key]) + + entries["tools"] = deduplicated_tools + + with open(lock_file, "w") as lock_file_fh: + yaml.safe_dump(json.loads(json.dumps(entries)), stream=lock_file_fh) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="") + parser.add_argument("-w", "--workflow-path", help="Path to directory with workflows") + parser.add_argument("-s", "--toolset", default="usegalaxy.org", help="The toolset dir to add versions to") + parser.add_argument("-u", "--uncategorized-file", default="leftovers.yaml", help="The file to store leftover (uninstalled) repos in.") + + args = parser.parse_args() + + add_repos(workflow_path=args.workflow_path, toolset=args.toolset, uncategorized_file=args.uncategorized_file) diff --git a/scripts/steal_sections.py b/scripts/steal_sections.py new file mode 100644 index 0000000..f4246cd --- /dev/null +++ b/scripts/steal_sections.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# +# given an input yaml and a toolset, find tools in sections on another server and add to toolset + +import yaml +import glob +import os +import string +import argparse +import requests + + +def steal_section(repo_dict, toolset: str, leftovers_file: str, galaxy_url: str, verbose: bool = False): + section_files = glob.glob(os.path.join(toolset, "*.yml")) + + other_tools = {} + other_labels = {} + + url = f"{galaxy_url}/api/tools?in_panel=false" + if verbose: + print(f"Loading tools from: {url}") + for tool in requests.get(url).json(): + if 'tool_shed_repository' not in tool: + continue + # this overwrites multi-tool repos but that's not a biggie + tool_key = (tool['tool_shed_repository']['name'], tool['tool_shed_repository']['owner']) + section_label = tool['panel_section_name'] + section_id = ''.join(c if c in string.ascii_letters + string.digits else '_' for c in section_label).lower() + other_tools[tool_key] = section_id + other_labels[section_id] = section_label + + existing = {} + leftover_tools = [] + new = {} + + for section_file in section_files: + if verbose: + print(f"Reading section file: {section_file}") + a = yaml.safe_load(open(section_file, 'r')) + tools = a['tools'] + for tool in tools: + tool_key = (tool['name'], tool['owner']) + existing[tool_key] = section_file + + tools = repo_dict['tools'] + for tool in tools: + tool_key = (tool['name'], tool['owner']) + if tool_key in existing: + if verbose: + print(f"Skipping existing tool: {tool['owner']}/{tool['name']}") + continue + elif tool_key in other_tools: + try: + new[other_tools[tool_key]].append(tool_key) + except: + new[other_tools[tool_key]] = [tool_key] + else: + leftover_tools.append(tool) + + print(f"Found sections for {len(new)} tools ({len(leftover_tools)} left over)") + + for section, repos in new.items(): + section_file = os.path.join(toolset, section + ".yml") + if not os.path.exists(section_file): + a = {'tool_panel_section_label': other_labels[section], 'tools': []} + if verbose: + print(f"Adding to new section file: {section_file}") + else: + a = yaml.safe_load(open(section_file, 'r')) + if verbose: + print(f"Adding to existing section file: {section_file}") + tools = a['tools'] + # Get existing tool keys to avoid duplicates + existing_tools = {(tool['name'], tool['owner']) for tool in tools} + # Deduplicate repos list (same tool may appear in multiple workflows) + unique_repos = list(dict.fromkeys(repos)) # Preserves order while removing duplicates + # Only add tools that don't already exist in this section file + new_tools = [{"name": t[0], "owner": t[1]} for t in unique_repos if t not in existing_tools] + tools.extend(new_tools) + + with open(section_file, 'w') as out: + yaml.dump(a, out, default_flow_style=False) + + if leftover_tools: + # Keep only name and owner fields to match the standard .yml format + cleaned_tools = [] + for tool in leftover_tools: + cleaned_tool = {'name': tool['name'], 'owner': tool['owner']} + cleaned_tools.append(cleaned_tool) + + with open(leftovers_file, 'w') as out: + yaml.dump({'tool_panel_section_label': 'Uncategorized', 'tools': cleaned_tools}, out, default_flow_style=False) + +def main(): + + VERSION = 0.1 + + parser = argparse.ArgumentParser(description="") + parser.add_argument("-t", "--tools", default="tools.yaml", help="Input tools.yaml") + parser.add_argument("-s", "--toolset", default="usegalaxy.org", help="The toolset dir to add versions to") + parser.add_argument("-l", "--leftovers-file", default="leftovers.yaml", help="The file to store leftover (unmatched) repos in.") + parser.add_argument("-g", "--galaxy-url", default="https://usegalaxy.eu", help="The Galaxy server to steal from") + parser.add_argument("--version", action='store_true') + parser.add_argument("--verbose", action='store_true') + + args = parser.parse_args() + + if args.version: + print("merge_versions.py version: %.1f" % VERSION) + return + + with open(args.tools) as fh: + repo_dict = yaml.safe_load(fh) + toolset = args.toolset + steal_section(repo_dict=repo_dict, toolset=toolset, leftovers_file=args.leftovers_file, galaxy_url=args.galaxy_url, verbose=args.verbose) + + +if __name__ == "__main__": + main()