-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathgenerate_component_manifest_files.py
More file actions
executable file
·212 lines (167 loc) · 7.65 KB
/
generate_component_manifest_files.py
File metadata and controls
executable file
·212 lines (167 loc) · 7.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python3
#
# Usage:
# > uv run bin/generate_component_manifest_files.py
#
# /// script
# dependencies = [
# "datamodel-code-generator==0.26.3",
# "PyYAML>=6.0.1",
# ]
# ///
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from glob import glob
from pathlib import Path
import yaml
LOCAL_YAML_DIR_PATH = "airbyte_cdk/sources/declarative"
LOCAL_OUTPUT_DIR_PATH = "airbyte_cdk/sources/declarative/models"
def get_all_yaml_files_without_ext() -> list[str]:
return [Path(f).stem for f in glob(f"{LOCAL_YAML_DIR_PATH}/*.yaml")]
def generate_init_module_content() -> str:
header = "# generated by bin/generate_component_manifest_files.py\n"
for module_name in get_all_yaml_files_without_ext():
header += f"from .{module_name} import *\n"
return header
def generate_json_schema():
"""Generate JSON schema from the YAML file for schemastore.org registration.
When registered with schemastore.org, a number of IDEs and libraries
automatically apply the JSON Schema validation features such as:
- auto-complete for keys and enums
- hover-tooltips for descriptions and examples
- linting squiggles for validation errors
"""
yaml_file_path = f"{LOCAL_YAML_DIR_PATH}/declarative_component_schema.yaml"
json_file_path = f"{LOCAL_YAML_DIR_PATH}/generated/declarative_component_schema.json"
with open(yaml_file_path, "r") as yaml_file:
schema_data = yaml.safe_load(yaml_file)
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, "isoformat"):
return obj.isoformat()
return super().default(obj)
import os
os.makedirs(os.path.dirname(json_file_path), exist_ok=True)
with open(json_file_path, "w") as json_file:
json.dump(schema_data, json_file, indent=2, cls=DateTimeEncoder)
print(f"Generated JSON schema: {json_file_path}")
def replace_base_model_for_classes_with_deprecated_fields(post_processed_content: str) -> str:
"""
Replace the base model for classes with deprecated fields.
This function looks for classes that inherit from `BaseModel` and have fields marked as deprecated.
It replaces the base model with `BaseModelWithDeprecations` for those classes.
"""
# Find classes with deprecated fields
classes_with_deprecated_fields = set()
class_matches = re.finditer(r"class (\w+)\(BaseModel\):", post_processed_content)
for class_match in class_matches:
class_name = class_match.group(1)
class_start = class_match.start()
# Find the next class definition or end of file
next_class_match = re.search(
r"class \w+\(",
post_processed_content[class_start + len(class_match.group(0)) :],
)
class_end = (
len(post_processed_content)
if next_class_match is None
else class_start + len(class_match.group(0)) + next_class_match.start()
)
class_content = post_processed_content[class_start:class_end]
# Check if any field has deprecated=True
if re.search(r"deprecated\s*=\s*True", class_content):
classes_with_deprecated_fields.add(class_name)
# update the imports to include the new base model with deprecation warinings
# only if there are classes with the fields marked as deprecated.
if len(classes_with_deprecated_fields) > 0:
# Find where to insert the base model - after imports but before class definitions
imports_end = post_processed_content.find(
"\n\n",
post_processed_content.find("from pydantic.v1 import"),
)
if imports_end > 0:
post_processed_content = (
post_processed_content[:imports_end]
+ "\n\n"
+ "from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import (\n"
+ " BaseModelWithDeprecations,\n"
+ ")"
+ post_processed_content[imports_end:]
)
# Use the `BaseModelWithDeprecations` base model for the classes with deprecated fields
for class_name in classes_with_deprecated_fields:
pattern = rf"class {class_name}\(BaseModel\):"
replacement = f"class {class_name}(BaseModelWithDeprecations):"
post_processed_content = re.sub(pattern, replacement, post_processed_content)
return post_processed_content
def post_process_codegen(generated_dir: str, post_processed_dir: str):
"""Post-process generated files to fix pydantic imports and deprecated fields."""
os.makedirs(post_processed_dir, exist_ok=True)
for generated_file in os.listdir(generated_dir):
if generated_file.endswith(".py"):
input_path = os.path.join(generated_dir, generated_file)
output_path = os.path.join(post_processed_dir, generated_file)
with open(input_path, "r") as f:
original_content = f.read()
# the space before _parameters is intentional to avoid replacing things like `request_parameters:` with `requestparameters:`
post_processed_content = original_content.replace(
" _parameters:", " parameters:"
).replace("from pydantic", "from pydantic.v1")
post_processed_content = replace_base_model_for_classes_with_deprecated_fields(
post_processed_content
)
with open(output_path, "w") as f:
f.write(post_processed_content)
def main():
generate_json_schema()
init_module_content = generate_init_module_content()
with tempfile.TemporaryDirectory() as temp_dir:
generated_dir = os.path.join(temp_dir, "generated")
post_processed_dir = os.path.join(temp_dir, "generated_post_processed")
os.makedirs(generated_dir, exist_ok=True)
init_file_path = os.path.join(generated_dir, "__init__.py")
with open(init_file_path, "w") as f:
f.write(init_module_content)
for yaml_file in get_all_yaml_files_without_ext():
input_yaml = os.path.join(LOCAL_YAML_DIR_PATH, f"{yaml_file}.yaml")
output_py = os.path.join(generated_dir, f"{yaml_file}.py")
cmd = [
"datamodel-codegen",
"--input",
input_yaml,
"--output",
output_py,
"--disable-timestamp",
"--enum-field-as-literal",
"one",
"--set-default-enum-member",
"--use-double-quotes",
"--remove-special-field-name-prefix",
# allow usage of the extra key such as `deprecated`, etc.
"--field-extra-keys",
# account the `deprecated` flag provided for the field.
"deprecated",
# account the `deprecation_message` provided for the field.
"deprecation_message",
]
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"Generated {output_py}")
except subprocess.CalledProcessError as e:
print(f"Error generating {output_py}: {e}")
print(f"stdout: {e.stdout}")
print(f"stderr: {e.stderr}")
sys.exit(1)
post_process_codegen(generated_dir, post_processed_dir)
if os.path.exists(LOCAL_OUTPUT_DIR_PATH):
shutil.rmtree(LOCAL_OUTPUT_DIR_PATH)
shutil.copytree(post_processed_dir, LOCAL_OUTPUT_DIR_PATH)
print(f"Generated models exported to {LOCAL_OUTPUT_DIR_PATH}")
if __name__ == "__main__":
main()