diff --git a/airbyte_cdk/cli/airbyte_cdk/_connector.py b/airbyte_cdk/cli/airbyte_cdk/_connector.py index d09ed4540..5c62dc44a 100644 --- a/airbyte_cdk/cli/airbyte_cdk/_connector.py +++ b/airbyte_cdk/cli/airbyte_cdk/_connector.py @@ -174,6 +174,10 @@ def test( ) +from airbyte_cdk.cli.airbyte_cdk._qa import pre_release_check + +connector_cli_group.add_command(pre_release_check) + __all__ = [ "connector_cli_group", ] diff --git a/airbyte_cdk/cli/airbyte_cdk/_qa.py b/airbyte_cdk/cli/airbyte_cdk/_qa.py new file mode 100644 index 000000000..166dea2f3 --- /dev/null +++ b/airbyte_cdk/cli/airbyte_cdk/_qa.py @@ -0,0 +1,94 @@ +"""CLI command for running QA checks on connectors using pytest.""" + +import os +import subprocess +import sys +from pathlib import Path +from typing import List, Optional + +import rich_click as click + +from airbyte_cdk.cli.airbyte_cdk._util import resolve_connector_name_and_directory + + +@click.command(name="pre-release-check") +@click.option( + "-c", + "--check", + "selected_checks", + multiple=True, + help="The name of the check to run. If not provided, all checks will be run.", +) +@click.option( + "--connector-name", + type=str, + help="Name of the connector to check. Ignored if --connector-directory is provided.", +) +@click.option( + "--connector-directory", + type=click.Path(exists=True, file_okay=False, path_type=Path), + help="Path to the connector directory.", +) +@click.option( + "-r", + "--report-path", + "report_path", + type=click.Path(file_okay=True, path_type=Path, writable=True, dir_okay=False), + help="The path to the report file to write the results to as JSON.", +) +def pre_release_check( + selected_checks: List[str], + connector_name: Optional[str] = None, + connector_directory: Optional[Path] = None, + report_path: Optional[Path] = None, +) -> None: + """Run pre-release checks on a connector using pytest. + + This command runs quality assurance checks on a connector to ensure it meets + Airbyte's standards for release. The checks include: + + - Documentation checks + - Metadata checks + - Packaging checks + - Security checks + - Asset checks + - Testing checks + + If no connector name or directory is provided, we will look within the current working + directory. If the current working directory is not a connector directory (e.g. starting + with 'source-') and no connector name or path is provided, the process will fail. + """ + connector_name, connector_directory = resolve_connector_name_and_directory( + connector_name=connector_name, + connector_directory=connector_directory, + ) + + pytest_args = ["-xvs"] + + if connector_name: + pytest_args.extend(["--connector-name", connector_name]) + if connector_directory: + pytest_args.extend(["--connector-directory", str(connector_directory)]) + + if report_path: + pytest_args.extend(["--report-path", str(report_path)]) + + if selected_checks: + for check in selected_checks: + pytest_args.extend(["-k", check]) + + qa_module_path = Path(__file__).parent.parent.parent / "qa" + pytest_args.extend(["-p", "airbyte_cdk.qa.pytest_plugin"]) + + test_paths = [] + for root, _, files in os.walk(qa_module_path / "checks"): + for file in files: + if file.endswith("_test.py"): + test_paths.append(os.path.join(root, file)) + + cmd = [sys.executable, "-m", "pytest"] + pytest_args + test_paths + click.echo(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd) + + if result.returncode != 0: + raise click.ClickException(f"Pytest failed with exit code {result.returncode}") diff --git a/airbyte_cdk/qa/__init__.py b/airbyte_cdk/qa/__init__.py new file mode 100644 index 000000000..1e2ec0f6d --- /dev/null +++ b/airbyte_cdk/qa/__init__.py @@ -0,0 +1,69 @@ +"""The `airbyte_cdk.qa` module provides quality assurance checks for Airbyte connectors. + +This module includes a framework for running pre-release checks on connectors to ensure +they meet Airbyte's quality standards. The checks are organized into categories and can +be run individually or as a group. + + +The QA module includes the following check categories: + +- **Packaging**: Checks related to connector packaging, including dependency management, + versioning, and licensing. +- **Metadata**: Checks related to connector metadata, including language tags, CDK tags, + and breaking changes deadlines. +- **Security**: Checks related to connector security, including HTTPS usage and base image + requirements. +- **Assets**: Checks related to connector assets, including icons and other visual elements. +- **Documentation**: Checks related to connector documentation, ensuring it exists and is + properly formatted. +- **Testing**: Checks related to connector testing, ensuring acceptance tests are present. + + +Checks can be configured based on various connector attributes: + +- **Connector Language**: Checks can be configured to run only on connectors of specific + languages (Python, Java, Low-Code, Manifest-Only). +- **Connector Type**: Checks can be configured to run only on specific connector types + (source, destination). +- **Support Level**: Checks can be configured to run only on connectors with specific + support levels (certified, community, etc.). +- **Cloud Usage**: Checks can be configured to run only on connectors with specific + cloud usage settings (enabled, disabled, etc.). +- **Internal SL**: Checks can be configured to run only on connectors with specific + internal service level requirements. + + +Checks can be run using the `airbyte-cdk connector pre-release-check` command: + +```bash +airbyte-cdk connector pre-release-check --connector-name source-example + +airbyte-cdk connector pre-release-check --connector-name source-example --check CheckConnectorUsesPoetry --check CheckVersionBump + +airbyte-cdk connector pre-release-check --connector-directory /path/to/connector + +airbyte-cdk connector pre-release-check --connector-name source-example --report-path report.json +``` + + +The QA module is designed to be extensible. New checks can be added by creating a new +class that inherits from the `Check` base class and implementing the required methods. + +Example: + +```python +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult +from airbyte_cdk.qa.connector import Connector + +class MyCustomCheck(Check): + name = "My custom check" + description = "Description of what my check verifies" + category = CheckCategory.TESTING + + def _run(self, connector: Connector) -> CheckResult: + if some_condition: + return self.pass_(connector, "Check passed message") + else: + return self.fail(connector, "Check failed message") +``` +""" diff --git a/airbyte_cdk/qa/checks/__init__.py b/airbyte_cdk/qa/checks/__init__.py new file mode 100644 index 000000000..0f2b7349f --- /dev/null +++ b/airbyte_cdk/qa/checks/__init__.py @@ -0,0 +1,17 @@ +"""QA checks for Airbyte connectors.""" + +from airbyte_cdk.qa.checks.assets import ENABLED_CHECKS as ASSETS_CHECKS +from airbyte_cdk.qa.checks.documentation import ENABLED_CHECKS as DOCUMENTATION_CHECKS +from airbyte_cdk.qa.checks.metadata import ENABLED_CHECKS as METADATA_CORRECTNESS_CHECKS +from airbyte_cdk.qa.checks.packaging import ENABLED_CHECKS as PACKAGING_CHECKS +from airbyte_cdk.qa.checks.security import ENABLED_CHECKS as SECURITY_CHECKS +from airbyte_cdk.qa.checks.testing import ENABLED_CHECKS as TESTING_CHECKS + +ENABLED_CHECKS = ( + DOCUMENTATION_CHECKS + + METADATA_CORRECTNESS_CHECKS + + PACKAGING_CHECKS + + ASSETS_CHECKS + + SECURITY_CHECKS + + TESTING_CHECKS +) diff --git a/airbyte_cdk/qa/checks/assets.py b/airbyte_cdk/qa/checks/assets.py new file mode 100644 index 000000000..414c3848f --- /dev/null +++ b/airbyte_cdk/qa/checks/assets.py @@ -0,0 +1,45 @@ +"""Asset checks for Airbyte connectors.""" + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult + + +class AssetCheck(Check): + """Base class for asset checks.""" + + category = CheckCategory.ASSETS + + +class CheckConnectorHasIcon(AssetCheck): + """Check that connectors have an icon.""" + + name = "Connectors must have an icon" + description = f"Connectors must have an icon file named `{consts.ICON_FILE_NAME}` in their code directory. This is to ensure that all connectors have a visual representation in the UI." + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + icon_path = connector.code_directory / consts.ICON_FILE_NAME + if not icon_path.exists(): + return self.create_check_result( + connector=connector, + passed=False, + message=f"Icon file {consts.ICON_FILE_NAME} does not exist", + ) + return self.create_check_result( + connector=connector, + passed=True, + message=f"Icon file {consts.ICON_FILE_NAME} exists", + ) + + +ENABLED_CHECKS = [ + CheckConnectorHasIcon(), +] diff --git a/airbyte_cdk/qa/checks/documentation/__init__.py b/airbyte_cdk/qa/checks/documentation/__init__.py new file mode 100644 index 000000000..ef50b0dcf --- /dev/null +++ b/airbyte_cdk/qa/checks/documentation/__init__.py @@ -0,0 +1,9 @@ +"""Documentation checks for Airbyte connectors.""" + +from airbyte_cdk.qa.checks.documentation.documentation import ( + CheckDocumentationExists, +) + +ENABLED_CHECKS = [ + CheckDocumentationExists(), +] diff --git a/airbyte_cdk/qa/checks/documentation/documentation.py b/airbyte_cdk/qa/checks/documentation/documentation.py new file mode 100644 index 000000000..70ee0a5d4 --- /dev/null +++ b/airbyte_cdk/qa/checks/documentation/documentation.py @@ -0,0 +1,54 @@ +"""Documentation checks for Airbyte connectors.""" + +from pathlib import Path + +from airbyte_cdk.qa.connector import Connector +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult + + +class DocumentationCheck(Check): + """Base class for documentation checks.""" + + category = CheckCategory.DOCUMENTATION + + +class CheckDocumentationExists(DocumentationCheck): + """Check that connectors have documentation.""" + + name = "Connectors must have documentation" + description = ( + "Connectors must have documentation to ensure that users can understand how to use them." + ) + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + docs_dir = Path("/home/ubuntu/repos/airbyte/docs/integrations") + connector_type_dir = docs_dir / (connector.connector_type + "s") + + doc_file = connector_type_dir / ( + connector.technical_name.replace("source-", "").replace("destination-", "") + ".md" + ) + + if not doc_file.exists(): + return self.create_check_result( + connector=connector, + passed=False, + message=f"Documentation file {doc_file} does not exist", + ) + return self.create_check_result( + connector=connector, + passed=True, + message=f"Documentation file {doc_file} exists", + ) + + +ENABLED_CHECKS = [ + CheckDocumentationExists(), +] diff --git a/airbyte_cdk/qa/checks/metadata.py b/airbyte_cdk/qa/checks/metadata.py new file mode 100644 index 000000000..a6d4e30bd --- /dev/null +++ b/airbyte_cdk/qa/checks/metadata.py @@ -0,0 +1,316 @@ +"""Metadata checks for Airbyte connectors.""" + +import os +from datetime import datetime, timedelta + +import toml +import yaml + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector, ConnectorLanguage +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult + + +class MetadataCheck(Check): + """Base class for metadata checks.""" + + category = CheckCategory.METADATA + + +class ValidateMetadata(MetadataCheck): + """Check that connectors have a valid metadata.yaml file.""" + + name = f"Connectors must have valid {consts.METADATA_FILE_NAME} file" + description = f"Connectors must have a `{consts.METADATA_FILE_NAME}` file at the root of their directory. This file is used to build our connector registry. Its structure must follow our metadata schema." + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + if not connector.metadata_file_path.exists(): + return self.fail( + connector=connector, + message=f"Metadata file {consts.METADATA_FILE_NAME} does not exist", + ) + + try: + with open(connector.metadata_file_path, "r") as f: + yaml.safe_load(f) + except yaml.YAMLError as e: + return self.fail( + connector=connector, message=f"Metadata file is invalid YAML: {str(e)}" + ) + + return self.pass_( + connector=connector, + message="Metadata file exists and is valid YAML.", + ) + + +class CheckConnectorLanguageTag(MetadataCheck): + """Check that connectors have a language tag in metadata.""" + + name = "Connector must have a language tag in metadata" + description = f"Connectors must have a language tag in their metadata. It must be set in the `tags` field in {consts.METADATA_FILE_NAME}. The values can be `language:python` or `language:java`." + + PYTHON_LANGUAGE_TAG = "language:python" + JAVA_LANGUAGE_TAG = "language:java" + MANIFEST_ONLY_LANGUAGE_TAG = "language:manifest-only" + + def get_expected_language_tag(self, connector: Connector) -> str: + """Get the expected language tag for the connector. + + Args: + connector: The connector to check + + Returns: + str: The expected language tag + + Raises: + ValueError: If the language tag cannot be inferred + """ + if (connector.code_directory / "manifest.yaml").exists(): + return self.MANIFEST_ONLY_LANGUAGE_TAG + if (connector.code_directory / "setup.py").exists() or ( + connector.code_directory / consts.PYPROJECT_FILE_NAME + ).exists(): + return self.PYTHON_LANGUAGE_TAG + elif (connector.code_directory / "build.gradle").exists() or ( + connector.code_directory / "build.gradle.kts" + ).exists(): + return self.JAVA_LANGUAGE_TAG + else: + raise ValueError("Could not infer the language tag from the connector directory") + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + try: + expected_language_tag = self.get_expected_language_tag(connector) + except ValueError: + return self.fail( + connector=connector, + message="Could not infer the language tag from the connector directory", + ) + + current_language_tags = [ + t + for t in (connector.metadata.get("tags", []) if connector.metadata else []) + if t.startswith("language:") + ] + if not current_language_tags: + return self.fail( + connector=connector, + message="Language tag is missing in the metadata file", + ) + if len(current_language_tags) > 1: + return self.fail( + connector=connector, + message=f"Multiple language tags found in the metadata file: {current_language_tags}", + ) + current_language_tag = current_language_tags[0] + if current_language_tag != expected_language_tag: + return self.fail( + connector=connector, + message=f"Expected language tag '{expected_language_tag}' in the {consts.METADATA_FILE_NAME} file, but found '{current_language_tag}'", + ) + return self.pass_( + connector=connector, + message=f"Language tag {expected_language_tag} is present in the metadata file", + ) + + +class CheckConnectorCDKTag(MetadataCheck): + """Check that Python connectors have a CDK tag in metadata.""" + + name = "Python connectors must have a CDK tag in metadata" + description = f"Python connectors must have a CDK tag in their metadata. It must be set in the `tags` field in {consts.METADATA_FILE_NAME}. The values can be `cdk:low-code`, `cdk:python`, or `cdk:file`." + applies_to_connector_languages = [ConnectorLanguage.PYTHON, ConnectorLanguage.LOW_CODE] + + class CDKTag: + """CDK tag values.""" + + LOW_CODE = "cdk:low-code" + PYTHON = "cdk:python" + FILE = "cdk:python-file-based" + + def get_expected_cdk_tag(self, connector: Connector) -> str: + """Get the expected CDK tag for the connector. + + Args: + connector: The connector to check + + Returns: + str: The expected CDK tag + """ + manifest_file = ( + connector.code_directory / connector.technical_name.replace("-", "_") / "manifest.yaml" + ) + pyproject_file = connector.code_directory / consts.PYPROJECT_FILE_NAME + setup_py_file = connector.code_directory / "setup.py" + if manifest_file.exists(): + return self.CDKTag.LOW_CODE + if pyproject_file.exists(): + pyproject = toml.load((connector.code_directory / consts.PYPROJECT_FILE_NAME)) + cdk_deps = pyproject["tool"]["poetry"]["dependencies"].get("airbyte-cdk", None) + if ( + cdk_deps + and isinstance(cdk_deps, dict) + and "file-based" in cdk_deps.get("extras", []) + ): + return self.CDKTag.FILE + if setup_py_file.exists(): + if "airbyte-cdk[file-based]" in (connector.code_directory / "setup.py").read_text(): + return self.CDKTag.FILE + return self.CDKTag.PYTHON + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + current_cdk_tags = [ + t + for t in (connector.metadata.get("tags", []) if connector.metadata else []) + if t.startswith("cdk:") + ] + expected_cdk_tag = self.get_expected_cdk_tag(connector) + if not current_cdk_tags: + return self.fail( + connector=connector, + message="CDK tag is missing in the metadata file", + ) + if len(current_cdk_tags) > 1: + return self.fail( + connector=connector, + message=f"Multiple CDK tags found in the metadata file: {current_cdk_tags}", + ) + if current_cdk_tags[0] != expected_cdk_tag: + return self.fail( + connector=connector, + message=f"Expected CDK tag '{self.get_expected_cdk_tag(connector)}' in the {consts.METADATA_FILE_NAME} file, but found '{current_cdk_tags[0]}'", + ) + return self.pass_( + connector=connector, + message=f"CDK tag {self.get_expected_cdk_tag(connector)} is present in the metadata file", + ) + + +class ValidateBreakingChangesDeadlines(MetadataCheck): + """Check that breaking change deadlines are at least a week in the future.""" + + name = "Breaking change deadline should be a week in the future" + description = "If the connector version has a breaking change, the deadline field must be set to at least a week in the future." + runs_on_released_connectors = False + minimum_days_until_deadline = 7 + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + current_version = connector.version + if current_version is None: + return self.fail( + connector=connector, + message="Can't verify breaking changes deadline: connector version is not defined.", + ) + + breaking_changes = ( + connector.metadata.get("releases", {}) if connector.metadata else {} + ).get("breakingChanges") + + if not breaking_changes: + return self.pass_( + connector=connector, + message="No breaking changes found on this connector.", + ) + + current_version_breaking_changes = breaking_changes.get(current_version) + + if not current_version_breaking_changes: + return self.pass_( + connector=connector, + message="No breaking changes found for the current version.", + ) + + upgrade_deadline = current_version_breaking_changes.get("upgradeDeadline") + + if not upgrade_deadline: + return self.fail( + connector=connector, + message=f"No upgrade deadline found for the breaking changes in {current_version}.", + ) + + upgrade_deadline_datetime = datetime.strptime(upgrade_deadline, "%Y-%m-%d") + one_week_from_now = datetime.utcnow() + timedelta(days=self.minimum_days_until_deadline) + + if upgrade_deadline_datetime <= one_week_from_now: + return self.fail( + connector=connector, + message=f"The upgrade deadline for the breaking changes in {current_version} is less than {self.minimum_days_until_deadline} days from today. Please extend the deadline", + ) + + return self.pass_( + connector=connector, + message="The upgrade deadline is set to at least a week in the future", + ) + + +class CheckConnectorMaxSecondsBetweenMessagesValue(MetadataCheck): + """Check that certified source connectors have a maxSecondsBetweenMessages value.""" + + name = "Certified source connector must have a value filled out for maxSecondsBetweenMessages in metadata" + description = "Certified source connectors must have a value filled out for `maxSecondsBetweenMessages` in metadata. This value represents the maximum number of seconds we could expect between messages for API connectors." + applies_to_connector_types = ["source"] + applies_to_connector_support_levels = ["certified"] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + max_seconds_between_messages = ( + connector.metadata.get("maxSecondsBetweenMessages") if connector.metadata else None + ) + if not max_seconds_between_messages: + return self.fail( + connector=connector, + message="Missing required for certified connectors field 'maxSecondsBetweenMessages'", + ) + return self.pass_( + connector=connector, + message="Value for maxSecondsBetweenMessages is set", + ) + + +ENABLED_CHECKS = [ + ValidateMetadata(), + CheckConnectorLanguageTag(), + CheckConnectorCDKTag(), + ValidateBreakingChangesDeadlines(), + CheckConnectorMaxSecondsBetweenMessagesValue(), +] diff --git a/airbyte_cdk/qa/checks/metadata_test.py b/airbyte_cdk/qa/checks/metadata_test.py new file mode 100644 index 000000000..f7d3c80e5 --- /dev/null +++ b/airbyte_cdk/qa/checks/metadata_test.py @@ -0,0 +1,175 @@ +"""Metadata checks for Airbyte connectors implemented as pytest tests.""" + +import os +from datetime import datetime, timedelta + +import pytest +import toml +import yaml + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector, ConnectorLanguage + + +@pytest.mark.qa_check +@pytest.mark.check_category("metadata") +@pytest.mark.requires_metadata +class TestMetadata: + """Test class for metadata checks.""" + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + def test_validate_metadata(self, connector: Connector) -> None: + """Check that connectors have a valid metadata.yaml file. + + Args: + connector: The connector to check + """ + assert connector.metadata_file_path.exists(), f"Metadata file {consts.METADATA_FILE_NAME} does not exist" + + try: + with open(connector.metadata_file_path, "r") as f: + yaml.safe_load(f) + except yaml.YAMLError as e: + pytest.fail(f"Metadata file is invalid YAML: {str(e)}") + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + def test_connector_language_tag(self, connector: Connector) -> None: + """Check that connectors have a language tag in metadata. + + Args: + connector: The connector to check + """ + PYTHON_LANGUAGE_TAG = "language:python" + JAVA_LANGUAGE_TAG = "language:java" + MANIFEST_ONLY_LANGUAGE_TAG = "language:manifest-only" + + def get_expected_language_tag(connector: Connector) -> str: + """Get the expected language tag for the connector. + + Args: + connector: The connector to check + + Returns: + str: The expected language tag + + Raises: + ValueError: If the language tag cannot be inferred + """ + if (connector.code_directory / "manifest.yaml").exists(): + return MANIFEST_ONLY_LANGUAGE_TAG + if (connector.code_directory / "setup.py").exists() or ( + connector.code_directory / consts.PYPROJECT_FILE_NAME + ).exists(): + return PYTHON_LANGUAGE_TAG + elif (connector.code_directory / "build.gradle").exists() or ( + connector.code_directory / "build.gradle.kts" + ).exists(): + return JAVA_LANGUAGE_TAG + else: + raise ValueError("Could not infer the language tag from the connector directory") + + try: + expected_language_tag = get_expected_language_tag(connector) + except ValueError: + pytest.fail("Could not infer the language tag from the connector directory") + + current_language_tags = [t for t in (connector.metadata.get("tags", []) if connector.metadata else []) if t.startswith("language:")] + assert current_language_tags, "Language tag is missing in the metadata file" + assert len(current_language_tags) == 1, f"Multiple language tags found in the metadata file: {current_language_tags}" + + current_language_tag = current_language_tags[0] + assert current_language_tag == expected_language_tag, f"Expected language tag '{expected_language_tag}' in the {consts.METADATA_FILE_NAME} file, but found '{current_language_tag}'" + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.connector_language(["python", "low-code"]) + def test_connector_cdk_tag(self, connector: Connector) -> None: + """Check that Python connectors have a CDK tag in metadata. + + Args: + connector: The connector to check + """ + class CDKTag: + """CDK tag values.""" + + LOW_CODE = "cdk:low-code" + PYTHON = "cdk:python" + FILE = "cdk:python-file-based" + + def get_expected_cdk_tag(connector: Connector) -> str: + """Get the expected CDK tag for the connector. + + Args: + connector: The connector to check + + Returns: + str: The expected CDK tag + """ + manifest_file = connector.code_directory / connector.technical_name.replace("-", "_") / "manifest.yaml" + pyproject_file = connector.code_directory / consts.PYPROJECT_FILE_NAME + setup_py_file = connector.code_directory / "setup.py" + if manifest_file.exists(): + return CDKTag.LOW_CODE + if pyproject_file.exists(): + pyproject = toml.load((connector.code_directory / consts.PYPROJECT_FILE_NAME)) + cdk_deps = pyproject["tool"]["poetry"]["dependencies"].get("airbyte-cdk", None) + if cdk_deps and isinstance(cdk_deps, dict) and "file-based" in cdk_deps.get("extras", []): + return CDKTag.FILE + if setup_py_file.exists(): + if "airbyte-cdk[file-based]" in (connector.code_directory / "setup.py").read_text(): + return CDKTag.FILE + return CDKTag.PYTHON + + current_cdk_tags = [t for t in (connector.metadata.get("tags", []) if connector.metadata else []) if t.startswith("cdk:")] + expected_cdk_tag = get_expected_cdk_tag(connector) + + assert current_cdk_tags, "CDK tag is missing in the metadata file" + assert len(current_cdk_tags) == 1, f"Multiple CDK tags found in the metadata file: {current_cdk_tags}" + assert current_cdk_tags[0] == expected_cdk_tag, f"Expected CDK tag '{get_expected_cdk_tag(connector)}' in the {consts.METADATA_FILE_NAME} file, but found '{current_cdk_tags[0]}'" + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.runs_on_released_connectors(False) + def test_breaking_changes_deadlines(self, connector: Connector) -> None: + """Check that breaking change deadlines are at least a week in the future. + + Args: + connector: The connector to check + """ + minimum_days_until_deadline = 7 + + current_version = connector.version + assert current_version is not None, "Can't verify breaking changes deadline: connector version is not defined." + + breaking_changes = (connector.metadata.get("releases", {}) if connector.metadata else {}).get("breakingChanges") + + if not breaking_changes: + pytest.skip("No breaking changes found on this connector.") + + current_version_breaking_changes = breaking_changes.get(current_version) + + if not current_version_breaking_changes: + pytest.skip("No breaking changes found for the current version.") + + upgrade_deadline = current_version_breaking_changes.get("upgradeDeadline") + + assert upgrade_deadline, f"No upgrade deadline found for the breaking changes in {current_version}." + + upgrade_deadline_datetime = datetime.strptime(upgrade_deadline, "%Y-%m-%d") + one_week_from_now = datetime.utcnow() + timedelta(days=minimum_days_until_deadline) + + assert upgrade_deadline_datetime > one_week_from_now, f"The upgrade deadline for the breaking changes in {current_version} is less than {minimum_days_until_deadline} days from today. Please extend the deadline" + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.connector_type(["source"]) + @pytest.mark.connector_support_level(["certified"]) + def test_connector_max_seconds_between_messages_value(self, connector: Connector) -> None: + """Check that certified source connectors have a maxSecondsBetweenMessages value. + + Args: + connector: The connector to check + """ + max_seconds_between_messages = connector.metadata.get("maxSecondsBetweenMessages") if connector.metadata else None + assert max_seconds_between_messages, "Missing required for certified connectors field 'maxSecondsBetweenMessages'" diff --git a/airbyte_cdk/qa/checks/packaging.py b/airbyte_cdk/qa/checks/packaging.py new file mode 100644 index 000000000..4cce96be7 --- /dev/null +++ b/airbyte_cdk/qa/checks/packaging.py @@ -0,0 +1,360 @@ +"""Packaging checks for Airbyte connectors.""" + +import semver +import toml +from pydash.objects import get + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector, ConnectorLanguage +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult + + +class PackagingCheck(Check): + """Base class for packaging checks.""" + + category = CheckCategory.PACKAGING + + +class CheckConnectorUsesPoetry(PackagingCheck): + """Check that connectors use Poetry for dependency management.""" + + name = "Connectors must use Poetry for dependency management" + description = "Connectors must use [Poetry](https://python-poetry.org/) for dependency management. This is to ensure that all connectors use a dependency management tool which locks dependencies and ensures reproducible installs." + requires_metadata = False + runs_on_released_connectors = False + applies_to_connector_languages = [ + ConnectorLanguage.PYTHON, + ConnectorLanguage.LOW_CODE, + ] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + if not (connector.code_directory / consts.PYPROJECT_FILE_NAME).exists(): + return self.create_check_result( + connector=connector, + passed=False, + message=f"{consts.PYPROJECT_FILE_NAME} file is missing", + ) + if not (connector.code_directory / consts.POETRY_LOCK_FILE_NAME).exists(): + return self.fail( + connector=connector, message=f"{consts.POETRY_LOCK_FILE_NAME} file is missing" + ) + if (connector.code_directory / "setup.py").exists(): + return self.fail( + connector=connector, + message=f"setup.py file exists. Please remove it and use {consts.PYPROJECT_FILE_NAME} instead", + ) + return self.pass_( + connector=connector, + message="Poetry is used for dependency management", + ) + + +class CheckPublishToPyPiIsDeclared(PackagingCheck): + """Check that Python connectors have PyPi publishing declared.""" + + name = "Python connectors must have PyPi publishing declared." + description = f"Python connectors must have [PyPi](https://pypi.org/) publishing enabled in their `{consts.METADATA_FILE_NAME}` file. This is declared by setting `remoteRegistries.pypi.enabled` to `true` in {consts.METADATA_FILE_NAME}. This is to ensure that all connectors can be published to PyPi and can be used in `PyAirbyte`." + applies_to_connector_languages = [ + ConnectorLanguage.PYTHON, + ConnectorLanguage.LOW_CODE, + ] + applies_to_connector_types = ["source"] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + publish_to_pypi_is_enabled = get(connector.metadata, "remoteRegistries.pypi.enabled") + if publish_to_pypi_is_enabled is None: + return self.create_check_result( + connector=connector, + passed=False, + message=f"PyPi publishing is not declared. Please set it in the {consts.METADATA_FILE_NAME} file", + ) + return self.create_check_result( + connector=connector, passed=True, message="PyPi publishing is declared" + ) + + +class CheckManifestOnlyConnectorBaseImage(PackagingCheck): + """Check that manifest-only connectors use the correct base image.""" + + name = "Manifest-only connectors must use `source-declarative-manifest` as their base image" + description = "Manifest-only connectors must use `airbyte/source-declarative-manifest` as their base image." + applies_to_connector_languages = [ConnectorLanguage.MANIFEST_ONLY] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + base_image = get(connector.metadata, "connectorBuildOptions.baseImage") + base_image_name = base_image.split(":")[0] if base_image else None + + if base_image_name != "docker.io/airbyte/source-declarative-manifest": + return self.create_check_result( + connector=connector, + passed=False, + message=f"A manifest-only connector must use `source-declarative-manifest` base image. Replace the base image in {consts.METADATA_FILE_NAME} file", + ) + return self.create_check_result( + connector=connector, + passed=True, + message="Connector uses source-declarative-manifest base image", + ) + + +class CheckConnectorLicense(PackagingCheck): + """Check that connectors are licensed under MIT or Elv2.""" + + name = "Connectors must be licensed under MIT or Elv2" + description = "Connectors must be licensed under the MIT or Elv2 license. This is to ensure that all connectors are licensed under a permissive license." + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + VALID_LICENSES = ["MIT", "ELV2"] + metadata_license = get(connector.metadata, "license") + if metadata_license is None: + return self.fail( + connector=connector, + message="License is missing in the metadata file", + ) + elif metadata_license.upper() not in VALID_LICENSES: + return self.fail( + connector=connector, + message=f"Connector is not using a valid license. Please use any of: {', '.join(VALID_LICENSES)}", + ) + else: + return self.pass_( + connector=connector, + message=f"Connector is licensed under {metadata_license}", + ) + + +class CheckConnectorLicenseMatchInPyproject(PackagingCheck): + """Check that connector license in metadata.yaml and pyproject.toml match.""" + + name = f"Connector license in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file must match" + description = f"Connectors license in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file must match. This is to ensure that all connectors are consistently licensed." + applies_to_connector_languages = [ + ConnectorLanguage.PYTHON, + ConnectorLanguage.LOW_CODE, + ] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + metadata_license = get(connector.metadata, "license") + if metadata_license is None: + return self.fail( + connector=connector, + message=f"License is missing in the {consts.METADATA_FILE_NAME} file", + ) + if not (connector.code_directory / consts.PYPROJECT_FILE_NAME).exists(): + return self.fail( + connector=connector, + message=f"{consts.PYPROJECT_FILE_NAME} file is missing", + ) + try: + pyproject = toml.load((connector.code_directory / consts.PYPROJECT_FILE_NAME)) + except toml.TomlDecodeError: + return self.fail( + connector=connector, + message=f"{consts.PYPROJECT_FILE_NAME} is invalid toml file", + ) + + poetry_license = get(pyproject, "tool.poetry.license") + + if poetry_license is None: + return self.fail( + connector=connector, + message=f"Connector is missing license in {consts.PYPROJECT_FILE_NAME}. Please add it", + ) + + if poetry_license.lower() != metadata_license.lower(): + return self.fail( + connector=connector, + message=f"Connector is licensed under {poetry_license} in {consts.PYPROJECT_FILE_NAME}, but licensed under {metadata_license} in {consts.METADATA_FILE_NAME}. These two files have to be consistent", + ) + + return self.pass_( + connector=connector, + message=f"License in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file match", + ) + + +class CheckConnectorVersionMatchInPyproject(PackagingCheck): + """Check that connector version in metadata.yaml and pyproject.toml match.""" + + name = f"Connector version in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file must match" + description = f"Connector version in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file must match. This is to ensure that connector release is consistent." + applies_to_connector_languages = [ + ConnectorLanguage.PYTHON, + ConnectorLanguage.LOW_CODE, + ] + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + metadata_version = get(connector.metadata, "dockerImageTag") + if metadata_version is None: + return self.fail( + connector=connector, + message=f"dockerImageTag field is missing in the {consts.METADATA_FILE_NAME} file", + ) + + if not (connector.code_directory / consts.PYPROJECT_FILE_NAME).exists(): + return self.fail( + connector=connector, + message=f"{consts.PYPROJECT_FILE_NAME} file is missing", + ) + + try: + pyproject = toml.load((connector.code_directory / consts.PYPROJECT_FILE_NAME)) + except toml.TomlDecodeError: + return self.fail( + connector=connector, + message=f"{consts.PYPROJECT_FILE_NAME} is invalid toml file", + ) + + poetry_version = get(pyproject, "tool.poetry.version") + + if poetry_version is None: + return self.fail( + connector=connector, + message=f"Version field is missing in the {consts.PYPROJECT_FILE_NAME} file", + ) + + if poetry_version != metadata_version: + return self.fail( + connector=connector, + message=f"Version is {metadata_version} in {consts.METADATA_FILE_NAME}, but version is {poetry_version} in {consts.PYPROJECT_FILE_NAME}. These two files have to be consistent", + ) + + return self.pass_( + connector=connector, + message=f"Version in {consts.METADATA_FILE_NAME} and {consts.PYPROJECT_FILE_NAME} file match", + ) + + +class CheckVersionFollowsSemver(PackagingCheck): + """Check that connector version follows Semantic Versioning.""" + + name = "Connector version must follow Semantic Versioning" + description = "Connector version must follow the Semantic Versioning scheme. This is to ensure that all connectors follow a consistent versioning scheme." + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + if not connector.metadata or "dockerImageTag" not in connector.metadata: + return self.create_check_result( + connector=connector, + passed=False, + message=f"dockerImageTag is missing in {consts.METADATA_FILE_NAME}", + ) + try: + semver.Version.parse(str(connector.metadata["dockerImageTag"])) + except ValueError: + return self.create_check_result( + connector=connector, + passed=False, + message=f"Connector version {connector.metadata['dockerImageTag']} does not follow semantic versioning", + ) + return self.create_check_result( + connector=connector, + passed=True, + message="Connector version follows semantic versioning", + ) + + +class CheckVersionBump(PackagingCheck): + """Check that connector version has been bumped.""" + + name = "Connector version must be bumped" + description = "Connector version must be bumped when making changes to the connector. This is to ensure that connector releases are properly versioned." + requires_metadata = True + + def _run(self, connector: Connector) -> CheckResult: + """Run the check. + + Args: + connector: The connector to check + + Returns: + CheckResult: The result of the check + """ + if not connector.metadata or "dockerImageTag" not in connector.metadata: + return self.create_check_result( + connector=connector, + passed=False, + message=f"dockerImageTag is missing in {consts.METADATA_FILE_NAME}", + ) + + try: + semver.Version.parse(str(connector.metadata["dockerImageTag"])) + except ValueError: + return self.create_check_result( + connector=connector, + passed=False, + message=f"Connector version {connector.metadata['dockerImageTag']} does not follow semantic versioning", + ) + + return self.create_check_result( + connector=connector, + passed=True, + message="Connector version is valid. Note: This check does not verify if the version has been bumped from the previous version.", + ) + + +ENABLED_CHECKS = [ + CheckConnectorUsesPoetry(), + CheckConnectorLicense(), + CheckConnectorLicenseMatchInPyproject(), + CheckVersionFollowsSemver(), + CheckConnectorVersionMatchInPyproject(), + CheckPublishToPyPiIsDeclared(), + CheckManifestOnlyConnectorBaseImage(), + CheckVersionBump(), # Added version bump check +] diff --git a/airbyte_cdk/qa/checks/packaging_test.py b/airbyte_cdk/qa/checks/packaging_test.py new file mode 100644 index 000000000..c0dab8e63 --- /dev/null +++ b/airbyte_cdk/qa/checks/packaging_test.py @@ -0,0 +1,100 @@ +"""Packaging checks for Airbyte connectors implemented as pytest tests.""" + +import os +import re +from pathlib import Path + +import pytest +import semver +import toml +import yaml + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector, ConnectorLanguage + + +@pytest.mark.qa_check +@pytest.mark.check_category("packaging") +class TestPackaging: + """Test class for packaging checks.""" + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.connector_language(["python"]) + def test_connector_uses_poetry(self, connector: Connector) -> None: + """Check that Python connectors use Poetry for dependency management. + + Args: + connector: The connector to check + """ + assert connector.pyproject_file_path.exists(), f"Python connector must use Poetry. {consts.PYPROJECT_FILE_NAME} file not found." + + try: + with open(connector.pyproject_file_path, "r") as f: + pyproject = toml.load(f) + except Exception as e: + pytest.fail(f"Failed to parse {consts.PYPROJECT_FILE_NAME}: {str(e)}") + + assert "tool" in pyproject, f"{consts.PYPROJECT_FILE_NAME} must have a [tool] section" + assert "poetry" in pyproject["tool"], f"{consts.PYPROJECT_FILE_NAME} must have a [tool.poetry] section" + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.connector_language(["python"]) + def test_version_bump(self, connector: Connector) -> None: + """Check that the connector version has been bumped. + + Args: + connector: The connector to check + """ + current_version = connector.version + assert current_version is not None, "Could not determine current version from metadata" + + try: + import subprocess + + cmd = ["git", "show", "HEAD~1:metadata.yaml"] + process = subprocess.run(cmd, cwd=connector.code_directory, capture_output=True, text=True) + + if process.returncode != 0: + pytest.skip("Could not get previous version from git history") + + previous_metadata = yaml.safe_load(process.stdout) + previous_version = previous_metadata.get("data", {}).get("dockerImageTag") + + if not previous_version: + pytest.skip("Could not determine previous version from git history") + + try: + current_semver = semver.Version.parse(current_version) + previous_semver = semver.Version.parse(previous_version) + + assert current_semver > previous_semver, f"Version must be bumped. Current: {current_version}, Previous: {previous_version}" + except ValueError: + assert current_version != previous_version, f"Version must be bumped. Current: {current_version}, Previous: {previous_version}" + + except Exception as e: + pytest.skip(f"Could not check version bump: {str(e)}") + + + @pytest.mark.parametrize("connector_fixture", ["connector"], indirect=True) + @pytest.mark.connector_language(["python"]) + def test_license_in_pyproject(self, connector: Connector) -> None: + """Check that Python connectors have a license in pyproject.toml. + + Args: + connector: The connector to check + """ + assert connector.pyproject_file_path.exists(), f"{consts.PYPROJECT_FILE_NAME} file not found" + + try: + with open(connector.pyproject_file_path, "r") as f: + pyproject = toml.load(f) + except Exception as e: + pytest.fail(f"Failed to parse {consts.PYPROJECT_FILE_NAME}: {str(e)}") + + assert "tool" in pyproject, f"{consts.PYPROJECT_FILE_NAME} must have a [tool] section" + assert "poetry" in pyproject["tool"], f"{consts.PYPROJECT_FILE_NAME} must have a [tool.poetry] section" + assert "license" in pyproject["tool"]["poetry"], f"{consts.PYPROJECT_FILE_NAME} must have a license field" + + license_value = pyproject["tool"]["poetry"]["license"] + assert license_value.lower() in ["mit", "elv2", "elastic license v2"], f"License must be MIT, ELv2, or Elastic License v2, got {license_value}" diff --git a/airbyte_cdk/qa/checks/security.py b/airbyte_cdk/qa/checks/security.py new file mode 100644 index 000000000..d57a5b515 --- /dev/null +++ b/airbyte_cdk/qa/checks/security.py @@ -0,0 +1,201 @@ +"""Security checks for Airbyte connectors.""" + +from pathlib import Path +from typing import Iterable, Optional, Set, Tuple + +from airbyte_cdk.qa import consts +from airbyte_cdk.qa.connector import Connector, ConnectorLanguage +from airbyte_cdk.qa.models import Check, CheckCategory, CheckResult + + +class SecurityCheck(Check): + """Base class for security checks.""" + + category = CheckCategory.SECURITY + + +class CheckConnectorUsesHTTPSOnly(SecurityCheck): + """Check that connectors use HTTPS only.""" + + name = "Connectors must use HTTPS only" + description = "Connectors must use HTTPS only when making requests to external services." + requires_metadata = False + runs_on_released_connectors = False + + ignore_comment = "# ignore-https-check" # Define the ignore comment pattern + + ignored_directories_for_https_checks = { + ".venv", + "tests", + "unit_tests", + "integration_tests", + "test-integration", + "build", + "source-file", + ".pytest_cache", + "acceptance_tests_logs", + ".hypothesis", + ".ruff_cache", + "htmlcov", + } + + ignored_file_name_pattern_for_https_checks = { + "*Test.java", + "*.jar", + "*.pyc", + "*.gz", + "*.svg", + "expected_records.jsonl", + "expected_records.json", + } + + ignored_url_prefixes = { + "http://json-schema.org", + "http://localhost", + } + + @staticmethod + def _read_all_files_in_directory( + directory: Path, + ignored_directories: Optional[Set[str]] = None, + ignored_filename_patterns: Optional[Set[str]] = None, + ) -> Iterable[Tuple[Path, str]]: + """Read all files in a directory, ignoring specified directories and filename patterns. + + Args: + directory: The directory to read files from + ignored_directories: Directories to ignore + ignored_filename_patterns: Filename patterns to ignore + + Yields: + Tuple[Path, str]: The file path and line content + """ + ignored_directories = ignored_directories if ignored_directories is not None else set() + ignored_filename_patterns = ( + ignored_filename_patterns if ignored_filename_patterns is not None else set() + ) + + for path in directory.rglob("*"): + ignore_directory = any( + [ignored_directory in path.parts for ignored_directory in ignored_directories] + ) + ignore_filename = any( + [ + path.match(ignored_filename_pattern) + for ignored_filename_pattern in ignored_filename_patterns + ] + ) + ignore = ignore_directory or ignore_filename + if path.is_file() and not ignore: + try: + for line in open(path, "r"): + yield path, line + except UnicodeDecodeError: + continue + + @staticmethod + def _line_is_comment(line: str, file_path: Path) -> bool: + """Check if a line is a comment. + + Args: + line: The line to check + file_path: The file path + + Returns: + bool: Whether the line is a comment + """ + language_comments = { + ".py": "#", + ".yml": "#", + ".yaml": "#", + ".java": "//", + ".md": "