diff --git a/bin/generate-component-manifest-dagger.sh b/bin/generate-component-manifest-dagger.sh deleted file mode 100755 index f920ff727..000000000 --- a/bin/generate-component-manifest-dagger.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -# We need to run this script in a docker container because we need to use a -# specific version of datamodel-codegen that generates pydantic v1 models (correctly). -# The newer datamodel-codegen's "pydantic v1" models are different than those v1 models -# generated by the older version of datamodel-codegen. - -set -e - -pip install dagger-io==0.13.3 -python bin/generate_component_manifest_files.py diff --git a/bin/generate_component_manifest_files.py b/bin/generate_component_manifest_files.py index 2041c6810..82bbaf441 100755 --- a/bin/generate_component_manifest_files.py +++ b/bin/generate_component_manifest_files.py @@ -1,25 +1,33 @@ +#!/usr/bin/env python3 +# +# Usage: +# > uv run bin/generate_component_manifest_files.py +# +# /// script +# dependencies = [ +# "datamodel-code-generator==0.26.3", +# "PyYAML>=6.0.1", +# ] +# /// + # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import json +import os import re +import shutil +import subprocess import sys +import tempfile from glob import glob from pathlib import Path -import anyio -import dagger import yaml -PYTHON_IMAGE = "python:3.10" LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative" LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models" -PIP_DEPENDENCIES = [ - "datamodel_code_generator==0.26.3", -] - - def get_all_yaml_files_without_ext() -> list[str]: return [Path(f).stem for f in glob(f"{LOCAL_YAML_DIR_PATH}/*.yaml")] @@ -119,15 +127,18 @@ def replace_base_model_for_classes_with_deprecated_fields(post_processed_content return post_processed_content -async def post_process_codegen(codegen_container: dagger.Container): - codegen_container = codegen_container.with_exec( - ["mkdir", "/generated_post_processed"], use_entrypoint=True - ) - for generated_file in await codegen_container.directory("/generated").entries(): +def post_process_codegen(generated_dir: str, post_processed_dir: str): + """Post-process generated files to fix pydantic imports and deprecated fields.""" + os.makedirs(post_processed_dir, exist_ok=True) + + for generated_file in os.listdir(generated_dir): if generated_file.endswith(".py"): - original_content = await codegen_container.file( - f"/generated/{generated_file}" - ).contents() + input_path = os.path.join(generated_dir, generated_file) + output_path = os.path.join(post_processed_dir, generated_file) + + with open(input_path, "r") as f: + original_content = f.read() + # the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:` post_processed_content = original_content.replace( " _parameters:", " parameters:" @@ -137,56 +148,65 @@ async def post_process_codegen(codegen_container: dagger.Container): post_processed_content ) - codegen_container = codegen_container.with_new_file( - f"/generated_post_processed/{generated_file}", contents=post_processed_content - ) - return codegen_container + with open(output_path, "w") as f: + f.write(post_processed_content) -async def main(): +def main(): generate_json_schema() init_module_content = generate_init_module_content() - async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client: - codegen_container = ( - dagger_client.container() - .from_(PYTHON_IMAGE) - .with_exec(["mkdir", "/generated"], use_entrypoint=True) - .with_exec(["pip", "install", " ".join(PIP_DEPENDENCIES)], use_entrypoint=True) - .with_mounted_directory( - "/yaml", dagger_client.host().directory(LOCAL_YAML_DIR_PATH, include=["*.yaml"]) - ) - .with_new_file("/generated/__init__.py", contents=init_module_content) - ) - for yaml_file in get_all_yaml_files_without_ext(): - codegen_container = codegen_container.with_exec( - [ - "datamodel-codegen", - "--input", - f"/yaml/{yaml_file}.yaml", - "--output", - f"/generated/{yaml_file}.py", - "--disable-timestamp", - "--enum-field-as-literal", - "one", - "--set-default-enum-member", - "--use-double-quotes", - "--remove-special-field-name-prefix", - # allow usage of the extra key such as `deprecated`, etc. - "--field-extra-keys", - # account the `deprecated` flag provided for the field. - "deprecated", - # account the `deprecation_message` provided for the field. - "deprecation_message", - ], - use_entrypoint=True, - ) + with tempfile.TemporaryDirectory() as temp_dir: + generated_dir = os.path.join(temp_dir, "generated") + post_processed_dir = os.path.join(temp_dir, "generated_post_processed") - await ( - (await post_process_codegen(codegen_container)) - .directory("/generated_post_processed") - .export(LOCAL_OUTPUT_DIR_PATH) - ) + os.makedirs(generated_dir, exist_ok=True) + init_file_path = os.path.join(generated_dir, "__init__.py") + with open(init_file_path, "w") as f: + f.write(init_module_content) -anyio.run(main) + for yaml_file in get_all_yaml_files_without_ext(): + input_yaml = os.path.join(LOCAL_YAML_DIR_PATH, f"{yaml_file}.yaml") + output_py = os.path.join(generated_dir, f"{yaml_file}.py") + + cmd = [ + "datamodel-codegen", + "--input", + input_yaml, + "--output", + output_py, + "--disable-timestamp", + "--enum-field-as-literal", + "one", + "--set-default-enum-member", + "--use-double-quotes", + "--remove-special-field-name-prefix", + # allow usage of the extra key such as `deprecated`, etc. + "--field-extra-keys", + # account the `deprecated` flag provided for the field. + "deprecated", + # account the `deprecation_message` provided for the field. + "deprecation_message", + ] + + try: + result = subprocess.run(cmd, check=True, capture_output=True, text=True) + print(f"Generated {output_py}") + except subprocess.CalledProcessError as e: + print(f"Error generating {output_py}: {e}") + print(f"stdout: {e.stdout}") + print(f"stderr: {e.stderr}") + sys.exit(1) + + post_process_codegen(generated_dir, post_processed_dir) + + if os.path.exists(LOCAL_OUTPUT_DIR_PATH): + shutil.rmtree(LOCAL_OUTPUT_DIR_PATH) + shutil.copytree(post_processed_dir, LOCAL_OUTPUT_DIR_PATH) + + print(f"Generated models exported to {LOCAL_OUTPUT_DIR_PATH}") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 9d6e13902..a54c747d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,7 +138,7 @@ lock = { shell = "poetry lock", help = "Lock all dependencies." } pre-commit = {cmd = "poetry run pre-commit run --all-files", help = "Run all pre-commit hooks on all files."} # Build tasks -assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} +assemble = {cmd = "uv run bin/generate_component_manifest_files.py", help = "Generate component manifest files."} build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} build = {sequence = ["assemble", "build-package"], help = "Run all tasks to build the package."}