diff --git a/.gitignore b/.gitignore
index 7858c187..c02e1e33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,5 +24,7 @@ repos.txt
!package_neors.json
!package_npm.json
!test_data/api_responses/*.json
+!**/test_data/api_responses/codeberg/*.json
+!**/test_data/api_responses/bitbucket/*.json
uv.lock
.python-version
diff --git a/README.md b/README.md
index 68cad3d1..380dcdb1 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ A command line interface for automatically extracting relevant metadata from cod
## Features
-Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the following categories (if present), listed in alphabetical order:
+Given a readme file (or a GitHub/Gitlab/Codeberg/Bitbucket repository) SOMEF will extract the following categories (if present), listed in alphabetical order:
- **Acknowledgement**: Text acknowledging funding sources or contributors
- **Application domain**: The application domain of the repository. Current supported domains include: Astrophysics, Audio, Computer vision, Graphs, Natural language processing, Reinforcement learning, Semantc web, Sequential. Domains are not mutually exclusive. These domains have been extracted from [awesome lists](https://github.com/topics/awesome-list) and [Papers with code](https://paperswithcode.com/). Find more information in our [documentation](https://somef.readthedocs.io/en/latest/)
@@ -38,7 +38,7 @@ We recognize the following properties:
- Year: Year of publication
- Pages: Page range in the journal
- **Code of conduct**: Link to the code of conduct of the project
-- **Code repository**: Link to the GitHub/GitLab repository used for the extraction
+- **Code repository**: Link to the GitHub/GitLab/Codeberg and Bitbucket repository used for the extraction
- **Contact**: Contact person responsible for maintaining a software component
- **Continuous integration**: Link to continuous integration service(s)
- **Contribution guidelines**: Text indicating how to contribute to this code repository
@@ -72,7 +72,7 @@ We recognize the following properties:
- **Package files**: Links to package files used to wrap the project in a package.
- **Programming languages**: Languages used in the repository
- **Related papers**: URL to possible related papers within the repository stated within the readme file (from Arxiv)
-- **Releases** (GitHub only): Pointer to the available versions of a software component. For each release, somef will track the following properties:
+- **Releases**: Pointer to the available versions of a software component. For each release, somef will track the following properties:
- Description: Release notes
- Author: Agent responsible of creating the release
- Name: Name of the release
@@ -93,7 +93,7 @@ We recognize the following properties:
- **Usage examples**: Assumptions and considerations recorded by the authors when executing a software component, or examples on how to use it
- **Workflows**: URL and path to the computational workflow files present in the repository
-We use different supervised classifiers, header analysis, regular expressions, the GitHub/Gitlab API to retrieve all these fields (more than one technique may be used for each field) and language specific metadata parsers (e.g., for package files). Each extraction records its provenance, with the confidence and technique used on each step. For more information check the [output format description](https://somef.readthedocs.io/en/latest/output/)
+We use different supervised classifiers, header analysis, regular expressions, the GitHub/Gitlab/Codeberg and Bitbucket API to retrieve all these fields (more than one technique may be used for each field) and language specific metadata parsers (e.g., for package files). Each extraction records its provenance, with the confidence and technique used on each step. For more information check the [output format description](https://somef.readthedocs.io/en/latest/output/)
## Documentation
@@ -297,10 +297,10 @@ Usage: somef describe [OPTIONS]
Options:
-t, --threshold FLOAT Threshold to classify the text [required]
Input: [mutually_exclusive, required]
- -r, --repo_url URL Github/Gitlab Repository URL
+ -r, --repo_url URL Github/Gitlab/Codeberg/Bitbucket Repository URL
-d, --doc_src PATH Path to the README file source
-i, --in_file PATH A file of newline separated links to GitHub/
- Gitlab repositories
+ Gitlab/Codeberg/Bitbucket repositories
-l, --local_repo PATH Path to the local repository source. No APIs will be used
Output: [required_any]
diff --git a/docs/index.md b/docs/index.md
index e3ac76e7..00fe5ed8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -46,7 +46,7 @@ We recognize the following properties:
- Year: Year of publication
- Pages: Page range in the journal
- **Code of conduct**: Link to the code of conduct of the project
-- **Code repository**: Link to the GitHub/GitLab repository used for the extraction
+- **Code repository**: Link to the GitHub/GitLab/Codeberg/Bitbucket repository used for the extraction
- **Contact**: Contact person responsible for maintaining a software component
- **Continuous integration**: Link to continuous integration service(s)
- **Contribution guidelines**: Text indicating how to contribute to this code repository
@@ -80,7 +80,7 @@ We recognize the following properties:
- **Package files**: Links to package files used to wrap the project in a package.
- **Programming languages**: Languages used in the repository
- **Related papers**: URL to possible related papers within the repository stated within the readme file (from Arxiv)
-- **Releases** (GitHub and Gitlab): Pointer to the available versions of a software component. For each release, somef will track the following properties:
+- **Releases** (GitHub, Gitlab, Codeberg and Bitbucket): Pointer to the available versions of a software component. For each release, somef will track the following properties:
- Assets: files attached to the release
- Description: Release notes
- Author: Agent responsible of creating the release
@@ -102,7 +102,7 @@ We recognize the following properties:
- **Usage examples**: Assumptions and considerations recorded by the authors when executing a software component, or examples on how to use it
- **Workflows**: URL and path to the computational workflow files present in the repository
-We use different supervised classifiers, header analysis, regular expressions, the GitHub/Gitlab API to retrieve all these fields (more than one technique may be used for each field) and language specific metadata parsers (e.g., for package files). Each extraction records its provenance, with the confidence and technique used on each step. For more information check the [output format description](https://somef.readthedocs.io/en/latest/output/)
+We use different supervised classifiers, header analysis, regular expressions, the GitHub/Gitlab/Codeberg/Bitbucket API to retrieve all these fields (more than one technique may be used for each field) and language specific metadata parsers (e.g., for package files). Each extraction records its provenance, with the confidence and technique used on each step. For more information check the [output format description](https://somef.readthedocs.io/en/latest/output/)
1 The available application domains currently are:
diff --git a/docs/output.md b/docs/output.md
index fcaf96ea..973c61ca 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -73,7 +73,7 @@ SOMEF aims to recognize the following categories (in alphabetical order):
- `code_of_conduct`: Link to the code of conduct file of the project
- `code_repository`: Link to the source code (typically the repository where the readme can be found)
- `contact`: Contact person responsible for maintaining a software component.
-- `continuous_integration`: Link to continuous integration service, supported on GitHub as well as in GitLab.
+- `continuous_integration`: Link to continuous integration service, supported on GitHub as well as in GitLab, Codeberg and Bitbucket.
- `contributing guidelines`: Guidelines indicating how to contribute to a software component.
- `contributor`: Contributors to this software. Note: Contributor metadata is exported from metadata files (e.g., CodeMeta, CONTRIBUTORS, etc.) not from git logs.
- `copyright_holder`: Entity or individual owning the rights to the software. The year is also extracted, if available.
@@ -167,7 +167,7 @@ Depending on the `type` of the result, additional properties may be found.
The following object `types` are currently supported:
-- `Release`: software releases of the current code repository, as available from GitHub.
+- `Release`: software releases of the current code repository, as available from GitHub, GitLab and Codeberg
- `Programming_language`: Programming language used in the repository.
- `License`: object representing all the metadata SOMEF extracts from a license.
- `Agent`: user (typically, a person) or organization responsible for authoring a software release or a paper.
@@ -317,6 +317,8 @@ The techniques can be of several types:
- `file_exploration`: the result comes from an exploration of the files in the repository
- `GitHub_API`: the result was obtained from the GitHub API.
- `GitLab_API`: the result was obtained from the GitLab API.
+- `Codeberg_API`: the result was obtained from the Codeberg API.
+- `Bitbucket_API`: the result was obtained from the Bitbucket API.
- `regular_expression`: the result was obtained after performing regular expressions on the files in the repository.
- `software_type_heuristics`: the result was obtained from analysis of the repository based on various heuristics from the README, code and extension analysis.
- `supervised_classification`: the results were obtained after running text classifiers trained for detecting that type of header.
@@ -405,6 +407,58 @@ A more detailed explanation is provided in the [wiki](https://github.com/oeg-upm
```
As shown in the Turtle snippet above, SOMEF represents the software as an entity, its relationship with each release (software version), the license found in the repository and the Person who owns it.
-->
+## Codeberg API Crosswalk
+
+When analyzing a Codeberg repository, SOMEF uses the [Codeberg API](https://codeberg.org/api/v1/swagger)
+(`GET /api/v1/repos/{owner}/{repo}`) to retrieve metadata. The table below shows how Codeberg API
+fields map to SOMEF categories:
+
+| SOMEF category | Codeberg API field | Notes |
+|---|---|---|
+| `name` | `name` | |
+| `description` | `description` | |
+| `code_repository` | `html_url` | |
+| `owner` | `owner.login` | |
+| `date_created` | `created_at` | |
+| `date_updated` | `updated_at` | |
+| `stars` | `stars_count` | In GitHub this field is `stargazers_count` |
+| `forks_count` | `forks_count` | |
+| `homepage` | `website` | In GitHub this field is `homepage` |
+| `keywords` | `topics` | |
+| `issue_tracker` | *(constructed)* | Built as `{html_url}/issues` |
+| `license` | *(not available)* | Codeberg API does not return license information |
+| `programming_languages` | `languages_url` | Additional GET request to the languages endpoint |
+| `releases` | `/repos/{owner}/{repo}/releases` | Additional GET request |
+
+For releases, the field mapping is identical to GitHub. The only differences are that Codeberg
+uses `attachments` instead of `assets` for release files, and it does not provide
+`author.type` (`AGENT_TYPE`) for release authors.
+
+
+## Bitbucket API Crosswalk
+
+When analyzing a Bitbucket repository, SOMEF uses the [Bitbucket Cloud API](https://developer.atlassian.com/cloud/bitbucket/rest/api-group-repositories/)
+(`GET /2.0/repositories/{workspace}/{repo_slug}`) to retrieve metadata. The table below shows how Bitbucket API
+fields map to SOMEF categories:
+
+| SOMEF category | Bitbucket API field | Notes |
+|---|---|---|
+| `name` | `slug` | |
+| `description` | `description` | |
+| `full_name` | `full_name` | Format: `{workspace}/{slug}` |
+| `code_repository` | `links.html.href` | |
+| `owner` | `owner.nickname` | Falls back to `owner.username` for team workspaces |
+| `date_created` | `created_on` | |
+| `date_updated` | `updated_on` | |
+| `homepage` | `website` | |
+| `forks_url` | `links.forks.href` | |
+| `download_url` | *(constructed)* | Built as `{html_url}/downloads` |
+| `issue_tracker` | *(constructed)* | Built as `{html_url}/issues` when `has_issues` is true |
+| `programming_languages` | `language` | Single string, not a dictionary with sizes |
+| `releases` | `/refs/tags` | Bitbucket has no dedicated releases endpoint; uses the tags endpoint |
+| `stars` | *(not available)* | Bitbucket does not have a stargazers feature |
+| `forks_count` | *(not available)* | Bitbucket does not expose fork counts in its API |
+
## Citation Reconciliation
diff --git a/src/somef/process_files.py b/src/somef/process_files.py
index 2b80ad0f..ae3daa6e 100644
--- a/src/somef/process_files.py
+++ b/src/somef/process_files.py
@@ -328,6 +328,7 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
# if repo_type == constants.RepositoryType.GITLAB:
if filename.endswith(".yml"):
+ category = None
if repo_type == constants.RepositoryType.GITLAB:
analysis = extract_workflows.is_file_continuous_integration_gitlab(os.path.join(repo_dir, file_path))
if analysis:
@@ -345,26 +346,29 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
{
constants.PROP_VALUE: workflow_url,
constants.PROP_TYPE: constants.URL
- }, 1, constants.TECHNIQUE_FILE_EXPLORATION)
+ }, 1, constants.TECHNIQUE_FILE_EXPLORATION)
+ elif repo_type == constants.RepositoryType.CODEBERG:
+ if (file_path.startswith(".forgejo/workflows/") or file_path.startswith(".gitea/workflows/")):
+ category = constants.CAT_CONTINUOUS_INTEGRATION
+ else:
+ category = None
+ elif repo_type == constants.RepositoryType.BITBUCKET:
+ if os.path.basename(file_path) == "bitbucket-pipelines.yml":
+ category = constants.CAT_CONTINUOUS_INTEGRATION
+ else:
+ category = None
elif repo_type == constants.RepositoryType.GITHUB:
- # if file_path.startswith(".github/workflows/"):
- # category = constants.CAT_WORKFLOWS
- # elif filename in [".travis.yml", "azure-pipelines.yml", "jenkinsfile"] or file_path.startswith(".circleci/"):
- # category = constants.CAT_CONTINUOUS_INTEGRATION
- # else:
- # category = None
if file_path.startswith(".github/workflows/"):
category = constants.CAT_CONTINUOUS_INTEGRATION
else:
category = None
- if category:
- workflow_url = get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch,
- repo_dir, repo_relative_path, filename)
- metadata_result.add_result(category,
- {constants.PROP_VALUE: workflow_url, constants.PROP_TYPE: constants.URL},
- 1, constants.TECHNIQUE_FILE_EXPLORATION)
-
+ if category:
+ workflow_url = get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch,
+ repo_dir, repo_relative_path, filename)
+ metadata_result.add_result(category,
+ {constants.PROP_VALUE: workflow_url, constants.PROP_TYPE: constants.URL},
+ 1, constants.TECHNIQUE_FILE_EXPLORATION)
if filename.endswith(".ga") or filename.endswith(".cwl") or filename.endswith(".nf") or (
filename.endswith(".snake") or filename.endswith(
".smk") or "Snakefile" == filename_no_ext) or filename.endswith(".knwf") or filename.endswith(
@@ -413,6 +417,10 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner
docs_url = f"https://github.com/{owner}/{repo_name}/tree/{urllib.parse.quote(repo_default_branch)}/{docs_path}"
elif repo_type == constants.RepositoryType.GITLAB:
docs_url = f"https://{domain_gitlab}/{owner}/{repo_name}/-/tree/{urllib.parse.quote(repo_default_branch)}/{docs_path}"
+ elif repo_type == constants.RepositoryType.CODEBERG:
+ docs_url = f"https://codeberg.org/{owner}/{repo_name}/src/branch/{urllib.parse.quote(repo_default_branch)}/{docs_path}"
+ elif repo_type == constants.RepositoryType.BITBUCKET:
+ docs_url = f"https://bitbucket.org/{owner}/{repo_name}/src/{urllib.parse.quote(repo_default_branch)}/{docs_path}"
else:
docs_url = os.path.join(repo_dir, docs_path)
# docs.append(docs_url)
@@ -452,6 +460,10 @@ def get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch, r
return convert_to_raw_user_content_github(file_path, owner, repo_name, repo_default_branch)
elif repo_type == constants.RepositoryType.GITLAB:
return convert_to_raw_user_content_gitlab(file_path, owner, repo_name, repo_default_branch)
+ elif repo_type == constants.RepositoryType.CODEBERG:
+ return convert_to_raw_user_content_codeberg(file_path, owner, repo_name, repo_default_branch)
+ elif repo_type == constants.RepositoryType.BITBUCKET:
+ return convert_to_raw_user_content_bitbucket(file_path, owner, repo_name, repo_default_branch)
else:
return os.path.join(repo_dir, repo_relative_path, filename)
@@ -695,6 +707,24 @@ def convert_to_raw_user_content_github(partial, owner, repo_name, repo_ref):
return f"https://raw.githubusercontent.com/{owner}/{repo_name}/{repo_ref}/{urllib.parse.quote(partial)}"
+def convert_to_raw_user_content_codeberg(partial, owner, repo_name, repo_ref):
+ """Converts Codeberg paths into raw content URLs"""
+ if partial.startswith("./"):
+ partial = partial.replace("./", "")
+ if partial.startswith(".\\"):
+ partial = partial.replace(".\\", "")
+ return f"https://codeberg.org/{owner}/{repo_name}/raw/branch/{repo_ref}/{urllib.parse.quote(partial)}"
+
+
+def convert_to_raw_user_content_bitbucket(partial, owner, repo_name, repo_ref):
+ """Converts Bitbucket paths into raw content URLs"""
+ if partial.startswith("./"):
+ partial = partial.replace("./", "")
+ if partial.startswith(".\\"):
+ partial = partial.replace(".\\", "")
+ return f"https://bitbucket.org/{owner}/{repo_name}/raw/{repo_ref}/{urllib.parse.quote(partial)}"
+
+
def convert_to_raw_user_content_gitlab(partial, owner, repo_name, repo_ref):
"""Converts GitLab paths into raw.githubuser content URLs, accessible by users"""
if partial.startswith("./"):
diff --git a/src/somef/process_repository.py b/src/somef/process_repository.py
index 6cbd6e06..4d83396c 100644
--- a/src/somef/process_repository.py
+++ b/src/somef/process_repository.py
@@ -85,20 +85,23 @@ def rate_limit_get(*args, backoff_rate=2, initial_backoff=1, size_limit_mb=const
stream=use_stream,
**kwargs
)
- # Detect invalid or insufficient GitHub token
+ # Detect invalid or insufficient token
if response.status_code == 401:
- raise Exception("Invalid GitHub token. Run `somef configure` to set a valid token.")
+ raise Exception("Invalid token. Run `somef configure` to set a valid token.")
if response.status_code == 403:
- raise Exception("GitHub token lacks required permissions or scopes.")
+ raise Exception("Token lacks required permissions or scopes.")
date = response.headers.get("Date", "")
# Show rate limit information if available
if "X-RateLimit-Remaining" in response.headers:
rate_limit_remaining = response.headers["X-RateLimit-Remaining"]
epochtime = int(response.headers["X-RateLimit-Reset"])
+ if epochtime < 1000000000:
+ epochtime = int(time.time()) + epochtime
+
date_reset = datetime.fromtimestamp(epochtime)
logging.info(
- "Remaining GitHub API requests: " + rate_limit_remaining + " ### Next rate limit reset at: " + str(
+ "Remaining repository API requests: " + rate_limit_remaining + " ### Next rate limit reset at: " + str(
date_reset))
if not use_stream:
@@ -483,6 +486,12 @@ def download_readme(owner, repo_name, default_branch, repo_type, authorization,
elif repo_type is constants.RepositoryType.GITHUB:
primary_url = f"https://raw.githubusercontent.com/{owner}/{repo_name}/{default_branch}/README.md"
secondary_url = f"https://raw.githubusercontent.com/{owner}/{repo_name}/master/README.md"
+ elif repo_type is constants.RepositoryType.CODEBERG:
+ primary_url = f"https://codeberg.org/{owner}/{repo_name}/raw/branch/{default_branch}/README.md"
+ secondary_url = f"https://codeberg.org/{owner}/{repo_name}/raw/branch/master/README.md"
+ elif repo_type is constants.RepositoryType.BITBUCKET:
+ primary_url = f"https://bitbucket.org/{owner}/{repo_name}/raw/{default_branch}/README.md"
+ secondary_url = f"https://bitbucket.org/{owner}/{repo_name}/raw/master/README.md"
else:
logging.error("Repository type not supported")
return None
@@ -531,6 +540,10 @@ def load_online_repository_metadata(repository_metadata: Result, repository_url,
"""
if repo_type == constants.RepositoryType.GITLAB:
return load_gitlab_repository_metadata(repository_metadata, repository_url)
+ elif repo_type == constants.RepositoryType.CODEBERG:
+ return load_codeberg_repository_metadata(repository_metadata, repository_url, authorization)
+ elif repo_type == constants.RepositoryType.BITBUCKET:
+ return load_bitbucket_repository_metadata(repository_metadata, repository_url, authorization)
elif repo_type == constants.RepositoryType.LOCAL:
logging.warning("Trying to download metadata from a local repository")
return None
@@ -768,7 +781,7 @@ def do_crosswalk(data, crosswalk_table):
if value is not None:
output[somef_key] = value
else:
- logging.error(f"Error: key {path} not present in github repository")
+ logging.error(f"Error: key {path} not present in repository")
return output
@@ -796,11 +809,14 @@ def download_repository_files(owner, repo_name, default_branch, repo_type, targe
return download_github_files(target_dir, owner, repo_name, default_branch, authorization)
elif repo_type == constants.RepositoryType.GITLAB:
return download_gitlab_files(target_dir, owner, repo_name, default_branch, repo_ref)
+ elif repo_type == constants.RepositoryType.CODEBERG:
+ return download_codeberg_files(target_dir, owner, repo_name, default_branch, authorization)
+ elif repo_type == constants.RepositoryType.BITBUCKET:
+ return download_bitbucket_files(target_dir, owner, repo_name, default_branch, authorization)
else:
logging.error("Cannot download files from a local repository!")
return None
-
# def download_github_files(directory, owner, repo_name, repo_ref, authorization):
# """
# Download all repository files from a GitHub repository
@@ -1143,3 +1159,308 @@ def get_all_paginated_results(base_url, headers, per_page=100):
return all_results
+
+def load_codeberg_repository_metadata(repo_metadata: Result, repository_url, authorization=None):
+ logging.info(f"Loading Repository {repository_url} Information....")
+
+ file_paths = configuration.get_configuration_file()
+ headers = codeberg_header_template(authorization)
+
+ if repository_url[-1] == '/':
+ repository_url = repository_url[:-1]
+ url = urlparse(repository_url)
+
+ path_components = [p for p in url.path.split('/') if p]
+ if len(path_components) < 2:
+ logging.error("Codeberg link is not correct. Expected https://codeberg.org//")
+ return repo_metadata, "", "", "", ""
+
+ owner = path_components[0]
+ repo_name = path_components[1]
+ default_branch = None
+
+ if len(path_components) >= 4 and path_components[2] == "tree":
+ default_branch = path_components[3]
+
+ repo_api_url = f"{constants.CODEBERG_API}/{owner}/{repo_name}"
+ # resp = requests.get(repo_api_url)
+ resp, _ = rate_limit_get(repo_api_url, headers=headers)
+ if resp.status_code != 200:
+ logging.error(f"Error fetching Codeberg repository: {resp.status_code}")
+ return repo_metadata, "", "", "", ""
+ general_resp = resp.json()
+
+ if default_branch is None:
+ default_branch = general_resp.get('default_branch', 'main')
+
+ filtered_resp = do_crosswalk(general_resp, constants.codeberg_crosswalk_table)
+ if 'html_url' in general_resp:
+ filtered_resp[constants.CAT_ISSUE_TRACKER] = f"{general_resp['html_url']}/issues"
+
+ filtered_resp[constants.CAT_DOWNLOAD_URL] = f"https://codeberg.org/{owner}/{repo_name}/releases"
+
+ for category, value in filtered_resp.items():
+ value_type = constants.STRING
+ if category in constants.all_categories:
+ if category == constants.CAT_ISSUE_TRACKER:
+ value = value.replace("{/number}", "") if isinstance(value, str) else value
+ if category == constants.CAT_OWNER:
+ value_type = "User"
+ if category == constants.CAT_KEYWORDS:
+ value = '%s,' % (', '.join(value))
+ value = value.rstrip(',')
+ if category in [constants.CAT_CODE_REPOSITORY, constants.CAT_ISSUE_TRACKER,
+ constants.CAT_DOWNLOAD_URL, constants.CAT_HOMEPAGE]:
+ value_type = constants.URL
+ if category in [constants.CAT_DATE_CREATED, constants.CAT_DATE_UPDATED]:
+ value_type = constants.DATE
+ if category in [constants.CAT_FORK_COUNTS, constants.CAT_STARS]:
+ value_type = constants.NUMBER
+ # Saltamos CAT_LICENSE porque la API de Codeberg no lo devuelve
+
+ result = {
+ constants.PROP_VALUE: value,
+ constants.PROP_TYPE: value_type
+ }
+ if result['value']:
+ repo_metadata.add_result(category, result, 1, constants.TECHNIQUE_CODEBERG_API)
+
+ if 'languages_url' in filtered_resp:
+ lang_resp, _ = rate_limit_get(filtered_resp['languages_url'], headers=headers)
+ if lang_resp.status_code == 200:
+ languages = lang_resp.json()
+ for l, s in languages.items():
+ result = {
+ constants.PROP_VALUE: l,
+ constants.PROP_NAME: l,
+ constants.PROP_TYPE: constants.LANGUAGE,
+ constants.PROP_SIZE: s,
+ }
+ repo_metadata.add_result(constants.CAT_PROGRAMMING_LANGUAGES, result, 1,
+ constants.TECHNIQUE_CODEBERG_API)
+
+ releases_url = f"{constants.CODEBERG_API}/{owner}/{repo_name}/releases"
+ releases_resp, _ = rate_limit_get(releases_url, headers=headers)
+ if releases_resp.status_code == 200:
+ releases_list = releases_resp.json()
+ release_list_filtered = [do_crosswalk(r, constants.release_codeberg_crosswalk_table)
+ for r in releases_list]
+ for release in release_list_filtered:
+ release_obj = {
+ constants.PROP_TYPE: constants.RELEASE,
+ constants.PROP_VALUE: release.get(constants.PROP_URL, "")
+ }
+ for category, value in release.items():
+ if category == constants.PROP_AUTHOR:
+ value = {
+ constants.PROP_NAME: value,
+ constants.PROP_TYPE: release.get(constants.AGENT_TYPE, "Person")
+ }
+ if value:
+ release_obj[category] = value
+ if category == constants.CAT_ASSETS and isinstance(value, list):
+ assets_filtered = [do_crosswalk(a, constants.release_assets_codeberg) for a in value]
+ key_mapping = {
+ constants.PROP_BROWSER_URL: constants.PROP_CONTENT_URL,
+ constants.PROP_SIZE: constants.PROP_CONTENT_SIZE,
+ constants.PROP_CONTENT_TYPE: constants.PROP_ENCODING_FORMAT,
+ constants.PROP_DATE_CREATED_AT: constants.PROP_UPLOAD_DATE
+ }
+ assets_filtered = [{key_mapping.get(k, k): v for k, v in a.items()} for a in assets_filtered]
+ release_obj[category] = assets_filtered
+ repo_metadata.add_result(constants.CAT_RELEASES, release_obj, 1, constants.TECHNIQUE_CODEBERG_API)
+
+ logging.info("Repository information successfully loaded.\n")
+ return repo_metadata, owner, repo_name, default_branch, "/".join(path_components)
+
+
+def download_codeberg_files(directory, owner, repo_name, repo_branch,authorization=None):
+ """
+ Download all repository files from a Codeberg repository.
+ """
+ repo_archive_url = f"https://codeberg.org/{owner}/{repo_name}/archive/{repo_branch}.zip"
+ logging.info(f"Downloading {repo_archive_url}")
+
+ headers = codeberg_header_template(authorization)
+
+ repo_download, _ = rate_limit_get(repo_archive_url, headers=headers)
+ if repo_download.status_code != 200:
+ logging.error(f"Error downloading Codeberg archive: HTTP {repo_download.status_code}")
+ return None
+
+ repo_zip = repo_download.content
+
+ repo_name_full = owner + "_" + repo_name
+ repo_zip_file = os.path.join(directory, repo_name_full + ".zip")
+ repo_extract_dir = os.path.join(directory, repo_name_full)
+
+ with open(repo_zip_file, "wb") as f:
+ f.write(repo_zip)
+
+ try:
+ with zipfile.ZipFile(repo_zip_file, "r") as zip_ref:
+ zip_ref.extractall(repo_extract_dir)
+ except zipfile.BadZipFile:
+ logging.error("Downloaded archive is not a valid zip")
+ return None
+
+ repo_folders = os.listdir(repo_extract_dir)
+ if not repo_folders:
+ logging.warning("Repository archive is empty")
+ return None
+
+ repo_dir = os.path.join(repo_extract_dir, repo_folders[0])
+ return repo_dir
+
+
+def codeberg_header_template(authorization=None):
+ header = {}
+ file_paths = configuration.get_configuration_file()
+ if authorization is not None:
+ header["Authorization"] = authorization
+ elif constants.CONF_CODEBERG_AUTHORIZATION in file_paths:
+ header["Authorization"] = file_paths[constants.CONF_CODEBERG_AUTHORIZATION]
+ return header
+
+
+def bitbucket_header_template(authorization=None):
+ header = {}
+ file_paths = configuration.get_configuration_file()
+ if authorization is not None:
+ header["Authorization"] = authorization
+ elif constants.CONF_BITBUCKET_AUTHORIZATION in file_paths:
+ header["Authorization"] = file_paths[constants.CONF_BITBUCKET_AUTHORIZATION]
+ return header
+
+
+def load_bitbucket_repository_metadata(repo_metadata: Result, repository_url, authorization=None):
+ logging.info(f"Loading Repository {repository_url} Information....")
+ if repository_url[-1] == '/':
+ repository_url = repository_url[:-1]
+ url = urlparse(repository_url)
+
+ path_components = [p for p in url.path.split('/') if p]
+ if len(path_components) < 2:
+ logging.error("Bitbucket link is not correct. Expected https://bitbucket.org//")
+ return repo_metadata, "", "", "", ""
+
+ owner = path_components[0]
+ repo_name = path_components[1]
+ default_branch = None
+
+ if len(path_components) >= 4 and path_components[2] == "tree":
+ default_branch = path_components[3]
+
+ # API call
+ repo_api_url = f"{constants.BITBUCKET_API}/{owner}/{repo_name}"
+ headers = bitbucket_header_template(authorization)
+ resp, _ = rate_limit_get(repo_api_url, headers=headers)
+ if resp.status_code != 200:
+ logging.error(f"Error fetching Bitbucket repository: {resp.status_code}")
+ return repo_metadata, "", "", "", ""
+ general_resp = resp.json()
+
+ if default_branch is None:
+ default_branch = general_resp.get('mainbranch', {}).get('name', 'main')
+
+ filtered_resp = do_crosswalk(general_resp, constants.bitbucket_crosswalk_table)
+
+ if constants.CAT_OWNER not in filtered_resp or not filtered_resp[constants.CAT_OWNER]:
+ owner_obj = general_resp.get('owner', {})
+ owner_val = owner_obj.get('nickname') or owner_obj.get('username')
+ if owner_val:
+ filtered_resp[constants.CAT_OWNER] = owner_val
+
+ # Issue tracker
+ if general_resp.get('has_issues', False) and 'links' in general_resp and 'html' in general_resp['links']:
+ html_url = general_resp['links']['html']['href']
+ filtered_resp[constants.CAT_ISSUE_TRACKER] = f"{html_url}/issues"
+
+
+ if 'language' in general_resp and general_resp['language']:
+ lang_value = general_resp['language']
+ result = {
+ constants.PROP_VALUE: lang_value,
+ constants.PROP_NAME: lang_value,
+ constants.PROP_TYPE: constants.LANGUAGE,
+ }
+ repo_metadata.add_result(constants.CAT_PROGRAMMING_LANGUAGES, result, 1,
+ constants.TECHNIQUE_BITBUCKET_API)
+
+
+ if 'links' in general_resp and 'html' in general_resp['links']:
+ filtered_resp[constants.CAT_DOWNLOAD_URL] = f"{general_resp['links']['html']['href']}/downloads"
+
+ for category, value in filtered_resp.items():
+ value_type = constants.STRING
+ if category in constants.all_categories:
+ if category == constants.CAT_OWNER:
+ value_type = "User"
+ if category in [constants.CAT_CODE_REPOSITORY, constants.CAT_ISSUE_TRACKER,
+ constants.CAT_DOWNLOAD_URL, constants.CAT_HOMEPAGE, constants.CAT_FORKS_URLS]:
+ value_type = constants.URL
+ if category in [constants.CAT_DATE_CREATED, constants.CAT_DATE_UPDATED]:
+ value_type = constants.DATE
+ if category == constants.CAT_PROGRAMMING_LANGUAGES:
+ value_type = constants.LANGUAGE
+
+ result = {
+ constants.PROP_VALUE: value,
+ constants.PROP_TYPE: value_type
+ }
+ if result['value']:
+ repo_metadata.add_result(category, result, 1, constants.TECHNIQUE_BITBUCKET_API)
+
+ # Releases from /refs/tags
+ tags_url = f"{constants.BITBUCKET_API}/{owner}/{repo_name}/refs/tags"
+ tags_resp, _ = rate_limit_get(tags_url, headers=headers)
+ if tags_resp.status_code == 200:
+ tags_data = tags_resp.json()
+ tags_list = tags_data.get('values', [])
+ for tag in tags_list:
+ release_obj = do_crosswalk(tag, constants.release_bitbucket_crosswalk_table)
+ release_obj[constants.PROP_TYPE] = constants.RELEASE
+ release_obj[constants.PROP_VALUE] = tag.get('name', '')
+ repo_metadata.add_result(constants.CAT_RELEASES, release_obj, 1,
+ constants.TECHNIQUE_BITBUCKET_API)
+
+ logging.info("Repository information successfully loaded.\n")
+ return repo_metadata, owner, repo_name, default_branch, "/".join(path_components)
+
+
+def download_bitbucket_files(directory, owner, repo_name, repo_branch, authorization=None):
+ repo_archive_url = f"https://bitbucket.org/{owner}/{repo_name}/get/{repo_branch}.zip"
+ logging.info(f"Downloading {repo_archive_url}")
+
+ headers = bitbucket_header_template(authorization)
+ repo_download, _ = rate_limit_get(repo_archive_url, headers=headers)
+ if repo_download is None:
+ logging.warning(f"Repository archive skipped due to size limit: {constants.SIZE_DOWNLOAD_LIMIT_MB} MB or no content-length")
+ return None
+ if repo_download.status_code != 200:
+ logging.error(f"Error downloading Bitbucket archive: HTTP {repo_download.status_code}")
+ return None
+
+ repo_zip = repo_download.content
+
+ repo_name_full = owner + "_" + repo_name
+ repo_zip_file = os.path.join(directory, repo_name_full + ".zip")
+ repo_extract_dir = os.path.join(directory, repo_name_full)
+
+ with open(repo_zip_file, "wb") as f:
+ f.write(repo_zip)
+
+ try:
+ with zipfile.ZipFile(repo_zip_file, "r") as zip_ref:
+ zip_ref.extractall(repo_extract_dir)
+ except zipfile.BadZipFile:
+ logging.error("Downloaded archive is not a valid zip")
+ return None
+
+ repo_folders = os.listdir(repo_extract_dir)
+ if not repo_folders:
+ logging.warning("Repository archive is empty")
+ return None
+
+ repo_dir = os.path.join(repo_extract_dir, repo_folders[0])
+ return repo_dir
\ No newline at end of file
diff --git a/src/somef/somef_cli.py b/src/somef/somef_cli.py
index 371489bb..dff53b03 100644
--- a/src/somef/somef_cli.py
+++ b/src/somef/somef_cli.py
@@ -78,16 +78,24 @@ def cli_get_data(threshold, ignore_classifiers, repo_url=None, doc_src=None, loc
url = urlparse(repo_url)
servidor = url.netloc
bGitLab = False
+ bCodeberg = False
+ bBitbucket = False
if process_repository.is_gitlab(servidor):
logging.info(f"{servidor} is GitLab.")
+ repo_type = constants.RepositoryType.GITLAB
bGitLab = True
- # if reconcile_authors:
- # logging.info("Author enrichment disabled: GitLab repositories are not supported for GitHub user enrichment.")
- # reconcile_authors = False
+ logging.info(f"DEBUG: {servidor} is_gitlab = {bGitLab}")
+ elif servidor == constants.CODEBERG_DOMAIN:
+ repo_type = constants.RepositoryType.CODEBERG
+ bCodeberg = True
+ logging.info(f"DEBUG: {servidor} is_codeberg = {bCodeberg}")
+ elif "bitbucket.org" in servidor:
+ repo_type = constants.RepositoryType.BITBUCKET
+ bBitbucket = True
+
- logging.info(f"DEBUG: {servidor} is_gitlab = {bGitLab}")
- if bGitLab:
- repo_type = constants.RepositoryType.GITLAB
+ # if bGitLab:
+ # repo_type = constants.RepositoryType.GITLAB
logging.info("Processing repository metadata.")
repository_metadata, owner, repo_name, def_branch, project_path = process_repository.load_online_repository_metadata(
diff --git a/src/somef/test/test_bitbucket.py b/src/somef/test/test_bitbucket.py
new file mode 100644
index 00000000..ef6bcbb4
--- /dev/null
+++ b/src/somef/test/test_bitbucket.py
@@ -0,0 +1,134 @@
+import os
+import unittest
+import json
+from pathlib import Path
+from .. import somef_cli
+from ..utils import constants
+from .. import process_repository
+from ..process_results import Result
+from unittest.mock import patch, MagicMock
+
+test_data_path = str(Path(__file__).parent / "test_data") + os.path.sep
+
+
+def _make_mock_response(status_code, content=b""):
+ """Helper: create a minimal mock requests.Response."""
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.content = content
+ resp.headers = {}
+ try:
+ resp.json.return_value = json.loads(content)
+ except json.JSONDecodeError:
+ pass
+ return resp
+
+
+class TestBitbucketRepository(unittest.TestCase):
+
+ @patch("somef.process_repository.requests.get")
+ def test_load_bitbucket_metadata(self, mock_get):
+ repo_json = json.load(open(test_data_path + "api_responses/bitbucket/bitbucket_response.json"))
+ tags_json = json.load(open(test_data_path + "api_responses/bitbucket/bitbucket_tags.json"))
+
+ mock_get.side_effect = [
+ _make_mock_response(200, json.dumps(repo_json).encode()),
+ _make_mock_response(200, json.dumps(tags_json).encode())
+ ]
+
+ result, owner, repo_name, branch, path = \
+ process_repository.load_bitbucket_repository_metadata(
+ Result(), "https://bitbucket.org/bitbucketpipelines/pipelines-guide-python"
+ )
+
+ self.assertIn(constants.CAT_NAME, result.results)
+ self.assertIn(constants.CAT_DESCRIPTION, result.results)
+ self.assertIn(constants.CAT_CODE_REPOSITORY, result.results)
+ self.assertIn(constants.CAT_FULL_NAME, result.results)
+ self.assertIn(constants.CAT_FORKS_URLS, result.results)
+ self.assertIn(constants.CAT_PROGRAMMING_LANGUAGES, result.results)
+ # this repo has not issues
+ self.assertNotIn(constants.CAT_ISSUE_TRACKER, result.results)
+ # this repo has not releases
+ self.assertNotIn(constants.CAT_RELEASES, result.results)
+ self.assertEqual(owner, "bitbucketpipelines")
+ self.assertEqual(repo_name, "pipelines-guide-python")
+ self.assertEqual(branch, "master")
+
+
+ @patch("somef.process_repository.requests.get")
+ def test_bitbucket_api_error_returns_empty(self, mock_get):
+ mock_get.return_value = _make_mock_response(404)
+ result, owner, repo_name, branch, path = \
+ process_repository.load_bitbucket_repository_metadata(
+ Result(), "https://bitbucket.org/nonexistent/repo"
+ )
+ self.assertEqual(owner, "")
+ self.assertNotIn(constants.CAT_NAME, result.results)
+
+
+ @unittest.skipIf(os.getenv("CI") == "true", "Skipped in CI because it is already verified locally")
+ def test_bitbucket_integration_cli(self):
+ output_file = test_data_path + "test-bitbucket-integration.json"
+ somef_cli.run_cli(
+ threshold=0.8,
+ ignore_classifiers=False,
+ repo_url="https://bitbucket.org/bitbucketpipelines/pipelines-guide-python",
+ local_repo=None,
+ doc_src=None,
+ in_file=None,
+ output=output_file,
+ graph_out=None,
+ graph_format="turtle",
+ codemeta_out=None,
+ pretty=True,
+ missing=True,
+ readme_only=False,
+ reconcile_authors=False
+ )
+
+ with open(output_file, "r") as f:
+ json_content = json.load(f)
+
+ name_entries = json_content.get(constants.CAT_NAME, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_BITBUCKET_API and e["result"]["value"] == "pipelines-guide-python"
+ for e in name_entries),
+ "Name from Bitbucket_API should be 'pipelines-guide-python'"
+ )
+
+ code_repo_entries = json_content.get(constants.CAT_CODE_REPOSITORY, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_BITBUCKET_API and e["result"]["value"] == "https://bitbucket.org/bitbucketpipelines/pipelines-guide-python"
+ for e in code_repo_entries)
+ )
+
+ full_name_entries = json_content.get(constants.CAT_FULL_NAME, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_BITBUCKET_API and e["result"]["value"] == "bitbucketpipelines/pipelines-guide-python"
+ for e in full_name_entries)
+ )
+
+ pl_entries = json_content.get(constants.CAT_PROGRAMMING_LANGUAGES, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_BITBUCKET_API and e["result"]["value"] == "python"
+ for e in pl_entries)
+ )
+
+
+ ft_entries = json_content.get(constants.CAT_FULL_TITLE, [])
+ self.assertTrue(
+ any(e.get("result", {}).get("value") == "Pipelines Python"
+ for e in ft_entries)
+ )
+
+ # print(json_content[constants.CAT_FULL_TITLE])
+ # print(json_content[constants.CAT_FORKS_URLS])
+ self.assertTrue(any(e["technique"] == constants.TECHNIQUE_BITBUCKET_API
+ for e in json_content.get(constants.CAT_FORKS_URLS, [])))
+
+ self.assertTrue(any(e["technique"] == "file_exploration"
+ for e in json_content.get(constants.CAT_HAS_BUILD_FILE, [])))
+
+ os.remove(output_file)
+
diff --git a/src/somef/test/test_codeberg.py b/src/somef/test/test_codeberg.py
new file mode 100644
index 00000000..4d36880d
--- /dev/null
+++ b/src/somef/test/test_codeberg.py
@@ -0,0 +1,128 @@
+import os
+import unittest
+import json
+from pathlib import Path
+from .. import somef_cli
+from ..utils import constants
+from .. import process_repository
+from ..process_results import Result
+from unittest.mock import patch, MagicMock
+
+test_data_path = str(Path(__file__).parent / "test_data") + os.path.sep
+
+
+def _make_mock_response(status_code, content=b""):
+ """Helper: create a minimal mock requests.Response."""
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.content = content
+ resp.headers = {}
+ try:
+ resp.json.return_value = json.loads(content)
+ except json.JSONDecodeError:
+ pass
+ return resp
+
+class TestCodebergRepository(unittest.TestCase):
+
+ @patch("somef.process_repository.requests.get")
+ def test_load_codeberg_metadata(self, mock_get):
+ """Load Codeberg repository metadata via mocked API. From the API response previously saved in local"""
+ # load api response
+ repo_json = json.load(open(test_data_path + "api_responses/codeberg/codeberg_forgejo.json"))
+ lang_json = json.load(open(test_data_path + "api_responses/codeberg/codeberg_forgejo_languages.json"))
+ releases_json = json.load(open(test_data_path + "api_responses/codeberg/codeberg_forgejo_releases.json"))
+
+ mock_get.side_effect = [
+ _make_mock_response(200, json.dumps(repo_json).encode()),
+ _make_mock_response(200, json.dumps(lang_json).encode()),
+ _make_mock_response(200, json.dumps(releases_json).encode())
+ ]
+
+ result, owner, repo_name, branch, path = process_repository.load_codeberg_repository_metadata(
+ Result(), "https://codeberg.org/forgejo/forgejo"
+ )
+
+ self.assertIn(constants.CAT_NAME, result.results)
+ self.assertEqual(result.results[constants.CAT_NAME][0]["result"]["value"], "forgejo")
+ self.assertIn(constants.CAT_STARS, result.results)
+ self.assertIn(constants.CAT_DESCRIPTION, result.results)
+ self.assertIn(constants.CAT_ISSUE_TRACKER, result.results)
+ self.assertEqual(owner, "forgejo")
+ self.assertEqual(repo_name, "forgejo")
+ # language
+ self.assertIn(constants.CAT_PROGRAMMING_LANGUAGES, result.results)
+ # releases
+ self.assertIn(constants.CAT_RELEASES, result.results)
+
+ @patch("somef.process_repository.requests.get")
+ def test_codeberg_api_error_returns_empty(self, mock_get):
+ """HTTP error must return empty tuples, not crash."""
+ mock_get.return_value = _make_mock_response(404)
+ result, owner, repo_name, branch, path = \
+ process_repository.load_codeberg_repository_metadata(
+ Result(), "https://codeberg.org/nonexistent/repo"
+ )
+ self.assertEqual(owner, "")
+ self.assertNotIn(constants.CAT_NAME, result.results)
+
+ @unittest.skipIf(os.getenv("CI") == "true", "Skipped in CI because it is already verified locally")
+ def test_codeberg_integration_cli(self):
+ """End-to-end CLI test against a real Codeberg repository."""
+ output_file = test_data_path + "test-codeberg-integration.json"
+
+ somef_cli.run_cli(
+ threshold=0.8,
+ ignore_classifiers=False,
+ repo_url="https://codeberg.org/forgejo/forgejo",
+ local_repo=None,
+ doc_src=None,
+ in_file=None,
+ output=output_file,
+ graph_out=None,
+ graph_format="turtle",
+ codemeta_out=None,
+ pretty=True,
+ missing=True,
+ readme_only=False,
+ reconcile_authors=False
+ )
+
+ with open(output_file, "r") as f:
+ json_content = json.load(f)
+
+ name_entries = json_content.get(constants.CAT_NAME, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_CODEBERG_API and e["result"]["value"] == "forgejo"
+ for e in name_entries),
+ "Name from Codeberg_API should be 'forgejo'"
+ )
+
+ desc_entries = json_content.get(constants.CAT_DESCRIPTION, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_CODEBERG_API and e["result"]["value"] == "Beyond coding. We forge."
+ for e in desc_entries),
+ "Description from Codeberg_API should match"
+ )
+
+ code_repo_entries = json_content.get(constants.CAT_CODE_REPOSITORY, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_CODEBERG_API and
+ e["result"]["value"] == "https://codeberg.org/forgejo/forgejo"
+ for e in code_repo_entries)
+ )
+
+ homepage_entries = json_content.get(constants.CAT_HOMEPAGE, [])
+ self.assertTrue(
+ any(e["technique"] == constants.TECHNIQUE_CODEBERG_API and
+ e["result"]["value"] == "https://forgejo.org"
+ for e in homepage_entries)
+ )
+ self.assertIn(constants.CAT_STARS, json_content)
+ self.assertIn(constants.CAT_FORK_COUNTS, json_content)
+
+
+ ci_entries = json_content.get(constants.CAT_CONTINUOUS_INTEGRATION, [])
+ self.assertTrue(len(ci_entries) >= 10, f"Expected at least 10 CI workflows, got {len(ci_entries)}")
+
+ os.remove(output_file)
\ No newline at end of file
diff --git a/src/somef/test/test_data/api_responses/bitbucket/bitbucket_response.json b/src/somef/test/test_data/api_responses/bitbucket/bitbucket_response.json
new file mode 100644
index 00000000..6d4f1d31
--- /dev/null
+++ b/src/somef/test/test_data/api_responses/bitbucket/bitbucket_response.json
@@ -0,0 +1,127 @@
+{
+ "type": "repository",
+ "full_name": "bitbucketpipelines/pipelines-guide-python",
+ "links": {
+ "self": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python"
+ },
+ "html": {
+ "href": "https://bitbucket.org/bitbucketpipelines/pipelines-guide-python"
+ },
+ "avatar": {
+ "href": "https://bytebucket.org/ravatar/%7Bcfe0b566-69b6-44a1-a3cf-c124a7ce0f24%7D?ts=python"
+ },
+ "pullrequests": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/pullrequests"
+ },
+ "commits": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/commits"
+ },
+ "forks": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/forks"
+ },
+ "watchers": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/watchers"
+ },
+ "branches": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/refs/branches"
+ },
+ "tags": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/refs/tags"
+ },
+ "downloads": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/downloads"
+ },
+ "source": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/src"
+ },
+ "clone": [
+ {
+ "name": "https",
+ "href": "https://bitbucket.org/bitbucketpipelines/pipelines-guide-python.git"
+ },
+ {
+ "name": "ssh",
+ "href": "git@bitbucket.org:bitbucketpipelines/pipelines-guide-python.git"
+ }
+ ],
+ "hooks": {
+ "href": "https://api.bitbucket.org/2.0/repositories/bitbucketpipelines/pipelines-guide-python/hooks"
+ }
+ },
+ "name": "pipelines-guide-python",
+ "slug": "pipelines-guide-python",
+ "description": "This is an example repo showing pipelines with python",
+ "scm": "git",
+ "website": "",
+ "owner": {
+ "display_name": "Bitbucket Pipelines",
+ "links": {
+ "self": {
+ "href": "https://api.bitbucket.org/2.0/workspaces/%7Bb2919ebc-eb34-4395-b60f-cd937aaa001c%7D"
+ },
+ "avatar": {
+ "href": "https://bitbucket.org/account/bitbucketpipelines/avatar/"
+ },
+ "html": {
+ "href": "https://bitbucket.org/%7Bb2919ebc-eb34-4395-b60f-cd937aaa001c%7D/"
+ }
+ },
+ "type": "team",
+ "uuid": "{b2919ebc-eb34-4395-b60f-cd937aaa001c}",
+ "username": "bitbucketpipelines"
+ },
+ "workspace": {
+ "type": "workspace",
+ "uuid": "{b2919ebc-eb34-4395-b60f-cd937aaa001c}",
+ "name": "Bitbucket Pipelines",
+ "slug": "bitbucketpipelines",
+ "links": {
+ "avatar": {
+ "href": "https://bitbucket.org/workspaces/bitbucketpipelines/avatar/?ts=1569518620"
+ },
+ "html": {
+ "href": "https://bitbucket.org/bitbucketpipelines/"
+ },
+ "self": {
+ "href": "https://api.bitbucket.org/2.0/workspaces/bitbucketpipelines"
+ }
+ }
+ },
+ "is_private": false,
+ "project": {
+ "type": "project",
+ "key": "DOC",
+ "uuid": "{92800567-05cc-4bbd-8485-114b0e689717}",
+ "name": "documentation",
+ "links": {
+ "self": {
+ "href": "https://api.bitbucket.org/2.0/workspaces/bitbucketpipelines/projects/DOC"
+ },
+ "html": {
+ "href": "https://bitbucket.org/bitbucketpipelines/workspace/projects/DOC"
+ },
+ "avatar": {
+ "href": "https://bitbucket.org/bitbucketpipelines/workspace/projects/DOC/avatar/32?ts=1506484642"
+ }
+ }
+ },
+ "fork_policy": "allow_forks",
+ "created_on": "2018-10-05T04:53:15.008411+00:00",
+ "updated_on": "2026-01-29T16:48:15.149268+00:00",
+ "size": 1373327,
+ "language": "python",
+ "uuid": "{cfe0b566-69b6-44a1-a3cf-c124a7ce0f24}",
+ "mainbranch": {
+ "name": "master",
+ "type": "branch"
+ },
+ "override_settings": {
+ "default_merge_strategy": false,
+ "branching_model": false
+ },
+ "parent": null,
+ "enforced_signed_commits": null,
+ "has_issues": false,
+ "has_wiki": false
+}
\ No newline at end of file
diff --git a/src/somef/test/test_data/api_responses/bitbucket/bitbucket_tags.json b/src/somef/test/test_data/api_responses/bitbucket/bitbucket_tags.json
new file mode 100644
index 00000000..3ba8febb
--- /dev/null
+++ b/src/somef/test/test_data/api_responses/bitbucket/bitbucket_tags.json
@@ -0,0 +1 @@
+{"values": [], "pagelen": 10, "size": 0, "page": 1}
\ No newline at end of file
diff --git a/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo.json b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo.json
new file mode 100644
index 00000000..20610ef3
--- /dev/null
+++ b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo.json
@@ -0,0 +1,104 @@
+{
+ "id": 73144,
+ "owner": {
+ "id": 70422,
+ "login": "forgejo",
+ "login_name": "",
+ "source_id": 0,
+ "full_name": "Forgejo",
+ "email": "forgejo@noreply.codeberg.org",
+ "avatar_url": "https://codeberg.org/avatars/dae8ab126a96f6fbd6942cf08ab92382",
+ "html_url": "https://codeberg.org/forgejo",
+ "language": "",
+ "is_admin": false,
+ "last_login": "0001-01-01T00:00:00Z",
+ "created": "2022-11-06T07:18:11+01:00",
+ "restricted": false,
+ "active": false,
+ "prohibit_login": false,
+ "location": "",
+ "pronouns": "",
+ "website": "https://forgejo.org",
+ "description": "Beyond coding. We forge.",
+ "visibility": "public",
+ "followers_count": 519,
+ "following_count": 0,
+ "starred_repos_count": 0,
+ "username": "forgejo"
+ },
+ "name": "forgejo",
+ "full_name": "forgejo/forgejo",
+ "description": "Beyond coding. We forge.",
+ "empty": false,
+ "private": false,
+ "fork": false,
+ "template": false,
+ "parent": null,
+ "mirror": false,
+ "size": 328744,
+ "language": "Go",
+ "languages_url": "https://codeberg.org/api/v1/repos/forgejo/forgejo/languages",
+ "html_url": "https://codeberg.org/forgejo/forgejo",
+ "url": "https://codeberg.org/api/v1/repos/forgejo/forgejo",
+ "link": "",
+ "ssh_url": "ssh://git@codeberg.org/forgejo/forgejo.git",
+ "clone_url": "https://codeberg.org/forgejo/forgejo.git",
+ "original_url": "https://github.com/go-gitea/gitea",
+ "website": "https://forgejo.org",
+ "stars_count": 4804,
+ "forks_count": 827,
+ "watchers_count": 118,
+ "open_issues_count": 1320,
+ "open_pr_counter": 139,
+ "release_counter": 105,
+ "default_branch": "forgejo",
+ "archived": false,
+ "created_at": "2022-11-06T07:24:57+01:00",
+ "updated_at": "2026-06-02T01:13:56+02:00",
+ "archived_at": "1970-01-01T01:00:00+01:00",
+ "permissions": {
+ "admin": false,
+ "push": false,
+ "pull": true
+ },
+ "has_issues": true,
+ "internal_tracker": {
+ "enable_time_tracker": false,
+ "allow_only_contributors_to_track_time": true,
+ "enable_issue_dependencies": true
+ },
+ "has_wiki": false,
+ "has_wiki_contents": false,
+ "wiki_branch": "master",
+ "wiki_ssh_url": "ssh://git@codeberg.org/forgejo/forgejo.wiki.git",
+ "wiki_clone_url": "https://codeberg.org/forgejo/forgejo.wiki.git",
+ "globally_editable_wiki": false,
+ "has_pull_requests": true,
+ "has_projects": true,
+ "has_releases": true,
+ "has_packages": true,
+ "has_actions": true,
+ "ignore_whitespace_conflicts": false,
+ "allow_merge_commits": true,
+ "allow_rebase": true,
+ "allow_rebase_explicit": true,
+ "allow_squash_merge": true,
+ "allow_fast_forward_only_merge": false,
+ "allow_rebase_update": true,
+ "default_delete_branch_after_merge": true,
+ "default_merge_style": "squash",
+ "default_allow_maintainer_edit": true,
+ "default_update_style": "merge",
+ "avatar_url": "https://codeberg.org/repo-avatars/73144-c883a242dec5299fbc06bbe3ee71d8c6",
+ "internal": false,
+ "mirror_interval": "",
+ "object_format_name": "sha1",
+ "mirror_updated": "0001-01-01T00:00:00Z",
+ "repo_transfer": null,
+ "topics": [
+ "forge",
+ "forgejo",
+ "git",
+ "self-hosted"
+ ]
+}
\ No newline at end of file
diff --git a/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_languages.json b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_languages.json
new file mode 100644
index 00000000..02acc451
--- /dev/null
+++ b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_languages.json
@@ -0,0 +1,16 @@
+{
+ "Go": 16886294,
+ "go-html-template": 2256939,
+ "JavaScript": 563991,
+ "CSS": 395828,
+ "TypeScript": 377005,
+ "Vue": 136433,
+ "Roff": 58206,
+ "Makefile": 46509,
+ "Shell": 44058,
+ "Jsonnet": 15443,
+ "Dockerfile": 7283,
+ "Less": 5467,
+ "Scheme": 2253,
+ "Nix": 705
+}
\ No newline at end of file
diff --git a/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_releases.json b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_releases.json
new file mode 100644
index 00000000..7bc17202
--- /dev/null
+++ b/src/somef/test/test_data/api_responses/codeberg/codeberg_forgejo_releases.json
@@ -0,0 +1,261 @@
+[
+ {
+ "id": 9279765,
+ "tag_name": "v15.0.2",
+ "target_commitish": "",
+ "name": "v15.0.2",
+ "body": "See https://codeberg.org/forgejo/forgejo/src/branch/forgejo/release-notes-published/15.0.2.md",
+ "url": "https://codeberg.org/api/v1/repos/forgejo/forgejo/releases/9279765",
+ "html_url": "https://codeberg.org/forgejo/forgejo/releases/tag/v15.0.2",
+ "tarball_url": "https://codeberg.org/forgejo/forgejo/archive/v15.0.2.tar.gz",
+ "zipball_url": "https://codeberg.org/forgejo/forgejo/archive/v15.0.2.zip",
+ "hide_archive_links": false,
+ "upload_url": "https://codeberg.org/api/v1/repos/forgejo/forgejo/releases/9279765/assets",
+ "draft": false,
+ "prerelease": false,
+ "created_at": "2026-05-12T12:09:16+02:00",
+ "published_at": "2026-05-12T12:09:16+02:00",
+ "author": {
+ "id": 70541,
+ "login": "release-team",
+ "login_name": "",
+ "source_id": 0,
+ "full_name": "Forgejo Release Team",
+ "email": "release-team@noreply.codeberg.org",
+ "avatar_url": "https://codeberg.org/avatars/83623afe083e82955f79b2dfeea58bd9f34897fae0f6fd001bc282d3a40efc8b",
+ "html_url": "https://codeberg.org/release-team",
+ "language": "",
+ "is_admin": false,
+ "last_login": "0001-01-01T00:00:00Z",
+ "created": "2022-11-07T10:25:48+01:00",
+ "restricted": false,
+ "active": false,
+ "prohibit_login": false,
+ "location": "",
+ "pronouns": "",
+ "website": "https://codeberg.org/forgejo/forgejo",
+ "description": "Account for publishing Forgejo releases using the Forgejo CI",
+ "visibility": "public",
+ "followers_count": 16,
+ "following_count": 0,
+ "starred_repos_count": 0,
+ "username": "release-team"
+ },
+ "assets": [
+ {
+ "id": 1404560,
+ "name": "forgejo-15.0.2-linux-amd64",
+ "size": 0,
+ "download_count": 0,
+ "created_at": "2026-06-03T14:03:49+02:00",
+ "uuid": "4ba17d97-2885-4c38-a5cc-7c9a7f8be084",
+ "browser_download_url": "https://code.forgejo.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64",
+ "type": "external"
+ },
+ {
+ "id": 1291356,
+ "name": "forgejo-15.0.2-linux-amd64.asc",
+ "size": 228,
+ "download_count": 1261,
+ "created_at": "2026-05-12T12:09:19+02:00",
+ "uuid": "4c7724d9-d9d4-41fc-9758-e2abf194fd61",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291359,
+ "name": "forgejo-15.0.2-linux-amd64.sha256",
+ "size": 93,
+ "download_count": 6882,
+ "created_at": "2026-05-12T12:09:20+02:00",
+ "uuid": "814194b6-26ee-4ce8-aa58-e9b854f0ef40",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404536,
+ "name": "forgejo-15.0.2-linux-amd64.xz",
+ "size": 33843308,
+ "download_count": 68,
+ "created_at": "2026-06-03T13:57:40+02:00",
+ "uuid": "de77717e-973e-44aa-847f-37b5bdc9c0dc",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64.xz",
+ "type": "attachment"
+ },
+ {
+ "id": 1291365,
+ "name": "forgejo-15.0.2-linux-amd64.xz.asc",
+ "size": 228,
+ "download_count": 158,
+ "created_at": "2026-05-12T12:09:21+02:00",
+ "uuid": "9d6858ef-6b0b-44c3-931f-e643448d71e2",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64.xz.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291368,
+ "name": "forgejo-15.0.2-linux-amd64.xz.sha256",
+ "size": 96,
+ "download_count": 251,
+ "created_at": "2026-05-12T12:09:22+02:00",
+ "uuid": "7e553e73-d979-4bd6-9b08-d1f0c14446c4",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-amd64.xz.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404557,
+ "name": "forgejo-15.0.2-linux-arm-6",
+ "size": 0,
+ "download_count": 0,
+ "created_at": "2026-06-03T14:03:49+02:00",
+ "uuid": "5fd81cd6-8afe-470b-9705-53a2b19908b2",
+ "browser_download_url": "https://code.forgejo.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6",
+ "type": "external"
+ },
+ {
+ "id": 1291374,
+ "name": "forgejo-15.0.2-linux-arm-6.asc",
+ "size": 228,
+ "download_count": 18,
+ "created_at": "2026-05-12T12:09:25+02:00",
+ "uuid": "c5cf70db-8b76-4ddb-aebf-9a08164a7d88",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291377,
+ "name": "forgejo-15.0.2-linux-arm-6.sha256",
+ "size": 93,
+ "download_count": 21,
+ "created_at": "2026-05-12T12:09:25+02:00",
+ "uuid": "51a849fa-543a-456b-81f0-70d2b54c08ac",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404542,
+ "name": "forgejo-15.0.2-linux-arm-6.xz",
+ "size": 30797300,
+ "download_count": 1,
+ "created_at": "2026-06-03T13:59:10+02:00",
+ "uuid": "4bfb8fad-8795-4da2-acb3-121bbec2aa8c",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6.xz",
+ "type": "attachment"
+ },
+ {
+ "id": 1291383,
+ "name": "forgejo-15.0.2-linux-arm-6.xz.asc",
+ "size": 228,
+ "download_count": 16,
+ "created_at": "2026-05-12T12:09:26+02:00",
+ "uuid": "2f3e578d-6fef-4335-9ca0-e5c61432d836",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6.xz.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291386,
+ "name": "forgejo-15.0.2-linux-arm-6.xz.sha256",
+ "size": 96,
+ "download_count": 19,
+ "created_at": "2026-05-12T12:09:27+02:00",
+ "uuid": "4ef9e8fc-8a72-4f0c-8133-2ce6042c0159",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm-6.xz.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404563,
+ "name": "forgejo-15.0.2-linux-arm64",
+ "size": 0,
+ "download_count": 0,
+ "created_at": "2026-06-03T14:03:49+02:00",
+ "uuid": "c3e8726e-087a-415d-bc73-4a9630f83c19",
+ "browser_download_url": "https://code.forgejo.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64",
+ "type": "external"
+ },
+ {
+ "id": 1291392,
+ "name": "forgejo-15.0.2-linux-arm64.asc",
+ "size": 228,
+ "download_count": 118,
+ "created_at": "2026-05-12T12:09:30+02:00",
+ "uuid": "ce472907-e213-41a1-bc58-c9998737cce4",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291395,
+ "name": "forgejo-15.0.2-linux-arm64.sha256",
+ "size": 93,
+ "download_count": 165,
+ "created_at": "2026-05-12T12:09:30+02:00",
+ "uuid": "df1a91a9-1c94-4545-bd35-710f87e695ad",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404539,
+ "name": "forgejo-15.0.2-linux-arm64.xz",
+ "size": 30275260,
+ "download_count": 6,
+ "created_at": "2026-06-03T13:59:10+02:00",
+ "uuid": "e7ef71c6-93a2-44ff-8af0-c07ba4d12a01",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64.xz",
+ "type": "attachment"
+ },
+ {
+ "id": 1291401,
+ "name": "forgejo-15.0.2-linux-arm64.xz.asc",
+ "size": 228,
+ "download_count": 44,
+ "created_at": "2026-05-12T12:09:32+02:00",
+ "uuid": "6f0b2d62-1a06-4bc2-bb4d-2d632a6b68e4",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64.xz.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291404,
+ "name": "forgejo-15.0.2-linux-arm64.xz.sha256",
+ "size": 96,
+ "download_count": 84,
+ "created_at": "2026-05-12T12:09:32+02:00",
+ "uuid": "a6b6e43d-0e40-4ff5-bf8d-f0793658b90a",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-15.0.2-linux-arm64.xz.sha256",
+ "type": "attachment"
+ },
+ {
+ "id": 1404545,
+ "name": "forgejo-src-15.0.2.tar.gz",
+ "size": 49711959,
+ "download_count": 20,
+ "created_at": "2026-06-03T14:00:54+02:00",
+ "uuid": "45465223-70f5-4705-9289-c2d5fe7d269c",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-src-15.0.2.tar.gz",
+ "type": "attachment"
+ },
+ {
+ "id": 1291410,
+ "name": "forgejo-src-15.0.2.tar.gz.asc",
+ "size": 228,
+ "download_count": 42,
+ "created_at": "2026-05-12T12:09:34+02:00",
+ "uuid": "6db70858-d902-4d68-8bf2-10359c325599",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-src-15.0.2.tar.gz.asc",
+ "type": "attachment"
+ },
+ {
+ "id": 1291413,
+ "name": "forgejo-src-15.0.2.tar.gz.sha256",
+ "size": 92,
+ "download_count": 36,
+ "created_at": "2026-05-12T12:09:34+02:00",
+ "uuid": "5e710ccf-62f2-499e-96e9-f4833eaa9c72",
+ "browser_download_url": "https://codeberg.org/forgejo/forgejo/releases/download/v15.0.2/forgejo-src-15.0.2.tar.gz.sha256",
+ "type": "attachment"
+ }
+ ],
+ "archive_download_count": {
+ "zip": 224,
+ "tar_gz": 363
+ }
+ }
+]
\ No newline at end of file
diff --git a/src/somef/test/test_data/expected/runtime_platform_repo.yaml b/src/somef/test/test_data/expected/runtime_platform_repo.yaml
index d0f08f14..6de7c6bb 100644
--- a/src/somef/test/test_data/expected/runtime_platform_repo.yaml
+++ b/src/somef/test/test_data/expected/runtime_platform_repo.yaml
@@ -6,4 +6,4 @@ CAT_PROGRAMMING_LANGUAGES:
name: Java
value: Java
version: "1.8"
- type: Language
+ type: Programming_language
diff --git a/src/somef/utils/constants.py b/src/somef/utils/constants.py
index bec49c19..fb2a1438 100644
--- a/src/somef/utils/constants.py
+++ b/src/somef/utils/constants.py
@@ -311,6 +311,8 @@
TECHNIQUE_CODE_CONFIG_PARSER = "code_parser"
TECHNIQUE_GITHUB_API = "GitHub_API"
TECHNIQUE_GITLAB_API = "GitLab_API"
+TECHNIQUE_CODEBERG_API = "Codeberg_API"
+TECHNIQUE_BITBUCKET_API = "Bitbucket_API"
TECHNIQUE_HEURISTICS = "software_type_heuristics"
# GitHub properties
@@ -318,6 +320,19 @@
GITHUB_ACCEPT_HEADER = "application/vnd.github.v3+json"
GITHUB_API = "https://api.github.com/repos"
+#Codeberg properties
+CODEBERG_DOMAIN = "codeberg.org"
+CODEBERG_API = "https://codeberg.org/api/v1/repos"
+
+# Bitbucket properties
+BITBUCKET_DOMAIN = "bitbucket.org"
+BITBUCKET_API = "https://api.bitbucket.org/2.0/repositories"
+
+# Token codeberg
+CONF_CODEBERG_AUTHORIZATION = "codeberg_authorization"
+# Token bitbucket
+CONF_BITBUCKET_AUTHORIZATION = "bitbucket_authorization"
+
# Software Heritage
SWH_ROOT = "https://archive.softwareheritage.org/"
REGEXP_SWH = r'\[\!\[SWH\]([^\]]+)\]\(([^)]+)\)'
@@ -349,6 +364,39 @@
CAT_HOMEPAGE: "homepage"
}
+# Crosswalk to retrieve easily contents of interest from the codeberg response
+codeberg_crosswalk_table = {
+ CAT_CODE_REPOSITORY: "html_url",
+ "languages_url": "languages_url",
+ CAT_OWNER: ["owner", "login"],
+ # AGENT_TYPE: ["owner", "type"],
+ CAT_DATE_CREATED: "created_at",
+ CAT_DATE_UPDATED: "updated_at",
+ # CAT_LICENSE: "license",
+ CAT_DESCRIPTION: "description",
+ CAT_NAME: "name",
+ CAT_FULL_NAME: "full_name",
+ # CAT_ISSUE_TRACKER: "issues_url",
+ CAT_STARS: "stars_count",
+ CAT_KEYWORDS: "topics",
+ CAT_FORK_COUNTS: "forks_count",
+ CAT_HOMEPAGE: "website"
+}
+
+
+bitbucket_crosswalk_table = {
+ CAT_FULL_NAME: "full_name",
+ CAT_NAME: "name",
+ CAT_DESCRIPTION: "description",
+ CAT_DATE_CREATED: "created_on",
+ CAT_DATE_UPDATED: "updated_on",
+ CAT_OWNER: ["owner", "nickname"],
+ CAT_CODE_REPOSITORY: ["links", "html", "href"],
+ CAT_HOMEPAGE: "website",
+ CAT_FORKS_URLS: ["links", "forks", "href"]
+ # CAT_PROGRAMMING_LANGUAGES: "language",
+}
+
# Mapping for releases
release_crosswalk_table = {
PROP_TAG: 'tag_name',
@@ -391,6 +439,36 @@
PROP_DOWNLOAD_COUNT: "download_count"
}
+release_assets_codeberg = {
+ PROP_URL: "url",
+ PROP_NAME: "name",
+ PROP_SIZE: "size",
+ PROP_BROWSER_URL: "browser_download_url",
+ PROP_CONTENT_TYPE: "content_type",
+ PROP_DATE_CREATED_AT: "created_at",
+ PROP_DOWNLOAD_COUNT: "download_count"
+}
+
+release_codeberg_crosswalk_table = {
+ PROP_TAG: 'tag_name',
+ PROP_NAME: 'name',
+ PROP_AUTHOR: ['author', 'login'],
+ # AGENT_TYPE: ['author', 'type'],
+ PROP_DESCRIPTION: 'body',
+ PROP_TARBALL_URL: 'tarball_url',
+ PROP_ZIPBALL_URL: 'zipball_url',
+ PROP_HTML_URL: 'html_url',
+ PROP_URL: 'url',
+ PROP_RELEASE_ID: 'id',
+ PROP_DATE_CREATED: 'created_at',
+ PROP_DATE_PUBLISHED: "published_at",
+ # CAT_ASSETS: "attachments"
+}
+
+release_bitbucket_crosswalk_table = {
+ PROP_TAG: "name",
+ PROP_NAME: "name",
+}
# Minimum percentage of total bytes a programming language must have to be considered relevant in CodeMeta file.
MINIMUM_PERCENTAGE_LANGUAGE_PROGRAMMING = 10
@@ -423,6 +501,8 @@ class RepositoryType(Enum):
GITHUB = 1
GITLAB = 2
LOCAL = 3
+ CODEBERG = 4
+ BITBUCKET = 5
# Media/script/non-software sets
workflow_extensions=('.ga','.cwl','.nf','.knwf','.t2flow','.dag','.kar','.wdl',".smk",".snake")