|
14 | 14 | # See the License for the specific language governing permissions and |
15 | 15 | # limitations under the License. |
16 | 16 | # |
| 17 | +import logging |
| 18 | +import re |
17 | 19 | from typing import Union |
18 | 20 |
|
19 | 21 | from vendor.dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogV1 |
|
41 | 43 | from vendor.dbt_artifacts_parser.parsers.utils import get_dbt_schema_version |
42 | 44 | from vendor.dbt_artifacts_parser.parsers.version_map import ArtifactTypes |
43 | 45 |
|
| 46 | +logger = logging.getLogger(__name__) |
| 47 | + |
| 48 | +# Fields with strict discriminated unions that break on dbt schema changes |
| 49 | +# but are not consumed by downstream wrappers |
| 50 | +_UNUSED_STRICT_FIELDS = {"disabled"} |
| 51 | + |
| 52 | +# Regex to extract manifest version number from schema URL |
| 53 | +_MANIFEST_VERSION_RE = re.compile(r"https://schemas\.getdbt\.com/dbt/manifest/v(\d+)\.json") |
| 54 | + |
| 55 | +# The latest manifest class we support, used as fallback for unknown versions |
| 56 | +_LATEST_MANIFEST_CLASS = ManifestV12 |
| 57 | + |
44 | 58 |
|
45 | 59 | # |
46 | 60 | # catalog |
@@ -71,6 +85,27 @@ def parse_catalog_v1(catalog: dict) -> CatalogV1: |
71 | 85 | # |
72 | 86 | # manifest |
73 | 87 | # |
| 88 | +def _strip_unused_fields(manifest: dict) -> dict: |
| 89 | + """Remove fields that have strict discriminated unions but are unused downstream. |
| 90 | +
|
| 91 | + These fields (e.g. `disabled`) use complex Pydantic unions that break when |
| 92 | + dbt Cloud changes its schema, but our wrappers never read them. |
| 93 | + """ |
| 94 | + return {k: v for k, v in manifest.items() if k not in _UNUSED_STRICT_FIELDS} |
| 95 | + |
| 96 | + |
| 97 | +def _try_parse_manifest(manifest: dict, model_class): |
| 98 | + """Attempt to parse manifest, falling back to stripping unused fields on failure.""" |
| 99 | + try: |
| 100 | + return model_class(**manifest) |
| 101 | + except Exception: |
| 102 | + stripped = _strip_unused_fields(manifest) |
| 103 | + try: |
| 104 | + return model_class(**stripped) |
| 105 | + except Exception: |
| 106 | + raise |
| 107 | + |
| 108 | + |
74 | 109 | def parse_manifest( |
75 | 110 | manifest: dict, |
76 | 111 | ) -> Union[ |
@@ -100,31 +135,36 @@ def parse_manifest( |
100 | 135 | ] |
101 | 136 | """ |
102 | 137 | dbt_schema_version = get_dbt_schema_version(artifact_json=manifest) |
103 | | - if dbt_schema_version == ArtifactTypes.MANIFEST_V1.value.dbt_schema_version: |
104 | | - return ManifestV1(**manifest) |
105 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V2.value.dbt_schema_version: |
106 | | - return ManifestV2(**manifest) |
107 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V3.value.dbt_schema_version: |
108 | | - return ManifestV3(**manifest) |
109 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V4.value.dbt_schema_version: |
110 | | - return ManifestV4(**manifest) |
111 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V5.value.dbt_schema_version: |
112 | | - return ManifestV5(**manifest) |
113 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V6.value.dbt_schema_version: |
114 | | - return ManifestV6(**manifest) |
115 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V7.value.dbt_schema_version: |
116 | | - return ManifestV7(**manifest) |
117 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V8.value.dbt_schema_version: |
118 | | - return ManifestV8(**manifest) |
119 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V9.value.dbt_schema_version: |
120 | | - return ManifestV9(**manifest) |
121 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V10.value.dbt_schema_version: |
122 | | - return ManifestV10(**manifest) |
123 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V11.value.dbt_schema_version: |
124 | | - return ManifestV11(**manifest) |
125 | | - elif dbt_schema_version == ArtifactTypes.MANIFEST_V12.value.dbt_schema_version: |
126 | | - return ManifestV12(**manifest) |
127 | | - raise ValueError("Not a manifest.json") |
| 138 | + |
| 139 | + version_to_class = { |
| 140 | + ArtifactTypes.MANIFEST_V1.value.dbt_schema_version: ManifestV1, |
| 141 | + ArtifactTypes.MANIFEST_V2.value.dbt_schema_version: ManifestV2, |
| 142 | + ArtifactTypes.MANIFEST_V3.value.dbt_schema_version: ManifestV3, |
| 143 | + ArtifactTypes.MANIFEST_V4.value.dbt_schema_version: ManifestV4, |
| 144 | + ArtifactTypes.MANIFEST_V5.value.dbt_schema_version: ManifestV5, |
| 145 | + ArtifactTypes.MANIFEST_V6.value.dbt_schema_version: ManifestV6, |
| 146 | + ArtifactTypes.MANIFEST_V7.value.dbt_schema_version: ManifestV7, |
| 147 | + ArtifactTypes.MANIFEST_V8.value.dbt_schema_version: ManifestV8, |
| 148 | + ArtifactTypes.MANIFEST_V9.value.dbt_schema_version: ManifestV9, |
| 149 | + ArtifactTypes.MANIFEST_V10.value.dbt_schema_version: ManifestV10, |
| 150 | + ArtifactTypes.MANIFEST_V11.value.dbt_schema_version: ManifestV11, |
| 151 | + ArtifactTypes.MANIFEST_V12.value.dbt_schema_version: ManifestV12, |
| 152 | + } |
| 153 | + |
| 154 | + model_class = version_to_class.get(dbt_schema_version) |
| 155 | + if model_class: |
| 156 | + return _try_parse_manifest(manifest, model_class) |
| 157 | + |
| 158 | + # Forward-compatibility: unknown manifest version — try latest known class |
| 159 | + match = _MANIFEST_VERSION_RE.match(dbt_schema_version) |
| 160 | + if match: |
| 161 | + logger.warning( |
| 162 | + "Unknown manifest schema version %s, attempting parse with latest known class", |
| 163 | + dbt_schema_version, |
| 164 | + ) |
| 165 | + return _try_parse_manifest(manifest, _LATEST_MANIFEST_CLASS) |
| 166 | + |
| 167 | + raise ValueError(f"Not a manifest.json (schema version: {dbt_schema_version})") |
128 | 168 |
|
129 | 169 |
|
130 | 170 | def parse_manifest_v1(manifest: dict) -> ManifestV1: |
|
0 commit comments