|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# purldb is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/purldb for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | +import shutil |
| 10 | +import subprocess |
| 11 | +from urllib.parse import urlparse |
| 12 | + |
| 13 | +""" |
| 14 | +Clone the Swift Index repo (https://github.com/SwiftPackageIndex/PackageList) and the Minecode Pipelines Swift repo. |
| 15 | +Read the packages.json file from the Swift Index repo to get a list of Git repositories. |
| 16 | +Fetch the tags for each repo using the git ls-remote command, |
| 17 | +then create package URLs for each repo with its version and store them in the Minecode Pipelines Swift repo. |
| 18 | +""" |
| 19 | + |
| 20 | + |
| 21 | +def is_safe_repo_url(repo_url: str) -> bool: |
| 22 | + """Return True if the URL is HTTPS GitHub with .git suffix or has at least two path segments.""" |
| 23 | + parsed = urlparse(repo_url) |
| 24 | + return ( |
| 25 | + parsed.scheme == "https" |
| 26 | + and parsed.netloc == "github.com" |
| 27 | + and parsed.path.endswith(".git") |
| 28 | + or parsed.path.count("/") >= 2 |
| 29 | + ) |
| 30 | + |
| 31 | + |
| 32 | +def fetch_git_tags_raw(repo_url: str, timeout: int = 60, logger=None) -> str | None: |
| 33 | + """Run `git ls-remote` on a GitHub repo and return raw output, or None on error.""" |
| 34 | + git_executable = shutil.which("git") |
| 35 | + if git_executable is None: |
| 36 | + logger("Git executable not found in PATH") |
| 37 | + return None |
| 38 | + |
| 39 | + if not is_safe_repo_url(repo_url): |
| 40 | + raise ValueError(f"Unsafe repo URL: {repo_url}") |
| 41 | + |
| 42 | + try: |
| 43 | + result = subprocess.run( |
| 44 | + [git_executable, "ls-remote", repo_url], |
| 45 | + capture_output=True, |
| 46 | + text=True, |
| 47 | + check=True, |
| 48 | + timeout=timeout, |
| 49 | + ) |
| 50 | + return result.stdout.strip() |
| 51 | + except subprocess.CalledProcessError as e: |
| 52 | + logger(f"Failed to fetch tags for {repo_url}: {e}") |
| 53 | + except subprocess.TimeoutExpired: |
| 54 | + logger(f"Timeout fetching tags for {repo_url}") |
| 55 | + return None |
| 56 | + |
| 57 | + |
| 58 | +# FIXME duplicated with miners github |
| 59 | +def split_org_repo(url_like): |
| 60 | + """ |
| 61 | + Given a URL-like string to a GitHub repo or a repo name as in org/name, |
| 62 | + split and return the org and name. |
| 63 | +
|
| 64 | + For example: |
| 65 | + >>> split_org_repo('foo/bar') |
| 66 | + ('foo', 'bar') |
| 67 | + >>> split_org_repo('https://api.github.com/repos/foo/bar/') |
| 68 | + ('foo', 'bar') |
| 69 | + >>> split_org_repo('github.com/foo/bar/') |
| 70 | + ('foo', 'bar') |
| 71 | + >>> split_org_repo('git://github.com/foo/bar.git') |
| 72 | + ('foo', 'bar') |
| 73 | + """ |
| 74 | + segments = [s.strip() for s in url_like.split("/") if s.strip()] |
| 75 | + if not len(segments) >= 2: |
| 76 | + raise ValueError(f"Not a GitHub-like URL: {url_like}") |
| 77 | + org = segments[-2] |
| 78 | + name = segments[-1] |
| 79 | + if name.endswith(".git"): |
| 80 | + name, _, _ = name.rpartition(".git") |
| 81 | + return org, name |
| 82 | + |
| 83 | + |
| 84 | +# FIXME duplicated with purl2vcs.find_source_repo.get_tags_and_commits_from_git_output |
| 85 | +def get_tags_and_commits_from_git_output(git_ls_remote): |
| 86 | + """ |
| 87 | + Yield tuples of (tag, commit), given a git ls-remote output |
| 88 | + """ |
| 89 | + for line in git_ls_remote.split("\n"): |
| 90 | + # line: kjwfgeklngelkfjofjeo123 refs/tags/1.2.3 |
| 91 | + line_segments = line.split("\t") |
| 92 | + # segments: ["kjwfgeklngelkfjofjeo123", "refs/tags/1.2.3"] |
| 93 | + if len(line_segments) > 1 and line_segments[1].startswith("refs/tags/"): |
| 94 | + commit = line_segments[0] |
| 95 | + tag = line_segments[1].replace("refs/tags/", "") |
| 96 | + yield tag, commit |
0 commit comments