Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions bin/generate-component-manifest-dagger.sh
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file was just a shim and is deleted. Replaced in poe-tasks assemble reference with simply uv run bin/generate_component_manifest_files.py.

This file was deleted.

142 changes: 81 additions & 61 deletions bin/generate_component_manifest_files.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
#!/usr/bin/env python3
#
# Usage:
# > uv run bin/generate_component_manifest_files.py
#
# /// script
Comment thread
aaronsteers marked this conversation as resolved.
# dependencies = [
# "datamodel-code-generator==0.26.3",
# "PyYAML>=6.0.1",
# ]
# ///

# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from glob import glob
from pathlib import Path

import anyio
import dagger
import yaml

PYTHON_IMAGE = "python:3.10"
LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative"
LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models"


PIP_DEPENDENCIES = [
"datamodel_code_generator==0.26.3",
]


def get_all_yaml_files_without_ext() -> list[str]:
return [Path(f).stem for f in glob(f"{LOCAL_YAML_DIR_PATH}/*.yaml")]

Expand Down Expand Up @@ -119,15 +127,18 @@ def replace_base_model_for_classes_with_deprecated_fields(post_processed_content
return post_processed_content


async def post_process_codegen(codegen_container: dagger.Container):
codegen_container = codegen_container.with_exec(
["mkdir", "/generated_post_processed"], use_entrypoint=True
)
for generated_file in await codegen_container.directory("/generated").entries():
def post_process_codegen(generated_dir: str, post_processed_dir: str):
"""Post-process generated files to fix pydantic imports and deprecated fields."""
os.makedirs(post_processed_dir, exist_ok=True)

for generated_file in os.listdir(generated_dir):
if generated_file.endswith(".py"):
original_content = await codegen_container.file(
f"/generated/{generated_file}"
).contents()
input_path = os.path.join(generated_dir, generated_file)
output_path = os.path.join(post_processed_dir, generated_file)

with open(input_path, "r") as f:
original_content = f.read()

# the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:`
post_processed_content = original_content.replace(
" _parameters:", " parameters:"
Expand All @@ -137,56 +148,65 @@ async def post_process_codegen(codegen_container: dagger.Container):
post_processed_content
)

codegen_container = codegen_container.with_new_file(
f"/generated_post_processed/{generated_file}", contents=post_processed_content
)
return codegen_container
with open(output_path, "w") as f:
f.write(post_processed_content)


async def main():
def main():
generate_json_schema()
init_module_content = generate_init_module_content()

async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client:
codegen_container = (
dagger_client.container()
.from_(PYTHON_IMAGE)
.with_exec(["mkdir", "/generated"], use_entrypoint=True)
.with_exec(["pip", "install", " ".join(PIP_DEPENDENCIES)], use_entrypoint=True)
.with_mounted_directory(
"/yaml", dagger_client.host().directory(LOCAL_YAML_DIR_PATH, include=["*.yaml"])
)
.with_new_file("/generated/__init__.py", contents=init_module_content)
)
for yaml_file in get_all_yaml_files_without_ext():
codegen_container = codegen_container.with_exec(
[
"datamodel-codegen",
"--input",
f"/yaml/{yaml_file}.yaml",
"--output",
f"/generated/{yaml_file}.py",
"--disable-timestamp",
"--enum-field-as-literal",
"one",
"--set-default-enum-member",
"--use-double-quotes",
"--remove-special-field-name-prefix",
# allow usage of the extra key such as `deprecated`, etc.
"--field-extra-keys",
# account the `deprecated` flag provided for the field.
"deprecated",
# account the `deprecation_message` provided for the field.
"deprecation_message",
],
use_entrypoint=True,
)
with tempfile.TemporaryDirectory() as temp_dir:
generated_dir = os.path.join(temp_dir, "generated")
post_processed_dir = os.path.join(temp_dir, "generated_post_processed")

await (
(await post_process_codegen(codegen_container))
.directory("/generated_post_processed")
.export(LOCAL_OUTPUT_DIR_PATH)
)
os.makedirs(generated_dir, exist_ok=True)

init_file_path = os.path.join(generated_dir, "__init__.py")
with open(init_file_path, "w") as f:
f.write(init_module_content)

anyio.run(main)
for yaml_file in get_all_yaml_files_without_ext():
input_yaml = os.path.join(LOCAL_YAML_DIR_PATH, f"{yaml_file}.yaml")
output_py = os.path.join(generated_dir, f"{yaml_file}.py")

cmd = [
"datamodel-codegen",
"--input",
input_yaml,
"--output",
output_py,
"--disable-timestamp",
"--enum-field-as-literal",
"one",
"--set-default-enum-member",
"--use-double-quotes",
"--remove-special-field-name-prefix",
# allow usage of the extra key such as `deprecated`, etc.
"--field-extra-keys",
# account the `deprecated` flag provided for the field.
"deprecated",
# account the `deprecation_message` provided for the field.
"deprecation_message",
]

try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"Generated {output_py}")
except subprocess.CalledProcessError as e:
print(f"Error generating {output_py}: {e}")
print(f"stdout: {e.stdout}")
print(f"stderr: {e.stderr}")
sys.exit(1)

post_process_codegen(generated_dir, post_processed_dir)

if os.path.exists(LOCAL_OUTPUT_DIR_PATH):
shutil.rmtree(LOCAL_OUTPUT_DIR_PATH)
shutil.copytree(post_processed_dir, LOCAL_OUTPUT_DIR_PATH)

print(f"Generated models exported to {LOCAL_OUTPUT_DIR_PATH}")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ lock = { shell = "poetry lock", help = "Lock all dependencies." }
pre-commit = {cmd = "poetry run pre-commit run --all-files", help = "Run all pre-commit hooks on all files."}

# Build tasks
assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."}
assemble = {cmd = "uv run bin/generate_component_manifest_files.py", help = "Generate component manifest files."}
build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."}
build = {sequence = ["assemble", "build-package"], help = "Run all tasks to build the package."}

Expand Down
Loading