diff --git a/CHANGELOG.md b/CHANGELOG.md index 9943468..9ea00b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ Recent and upcoming changes to dbt2looker +## Unreleased +### Added +- support ephemeral models (#57) +- warnings if there is a discrepancy between manifest and catalog (#5) +- more descriptive error message when a column's data type can't be inferred due to not being in the catalog + +### Changed +- only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57) + +### Fixed +- now supports `pydantic` v2 (#97) + ## 0.11.0 ### Added - support label and hidden fields (#49) diff --git a/dbt2looker/models.py b/dbt2looker/models.py index 3430eaf..a0d4746 100644 --- a/dbt2looker/models.py +++ b/dbt2looker/models.py @@ -1,16 +1,17 @@ from enum import Enum -from typing import Union, Dict, List, Optional +from typing import Any, Union, Dict, List, Optional try: from typing import Literal except ImportError: from typing_extensions import Literal -from pydantic import BaseModel, Field, PydanticValueError, validator +from pydantic import BaseModel, Field, validator # dbt2looker utility types -class UnsupportedDbtAdapterError(PydanticValueError): - code = 'unsupported_dbt_adapter' - msg_template = '{wrong_value} is not a supported dbt adapter' +class UnsupportedDbtAdapterError(ValueError): + def __init__(self, wrong_value: str): + msg = f'{wrong_value} is not a supported dbt adapter' + super().__init__(msg) class SupportedDbtAdapters(str, Enum): @@ -82,12 +83,12 @@ class LookerHiddenType(str, Enum): class Dbt2LookerMeasure(BaseModel): type: LookerMeasureType filters: Optional[List[Dict[str, str]]] = [] - description: Optional[str] - sql: Optional[str] - value_format_name: Optional[LookerValueFormatName] - group_label: Optional[str] - label: Optional[str] - hidden: Optional[LookerHiddenType] + description: Optional[str] = None + sql: Optional[str] = None + value_format_name: Optional[LookerValueFormatName] = None + group_label: Optional[str] = None + label: Optional[str] = None + hidden: Optional[LookerHiddenType] = None @validator('filters') def filters_are_singular_dicts(cls, v: List[Dict[str, str]]): @@ -100,10 +101,10 @@ def filters_are_singular_dicts(cls, v: List[Dict[str, str]]): class Dbt2LookerDimension(BaseModel): enabled: Optional[bool] = True - name: Optional[str] - sql: Optional[str] - description: Optional[str] - value_format_name: Optional[LookerValueFormatName] + name: Optional[str] = None + sql: Optional[str] = None + description: Optional[str] = None + value_format_name: Optional[LookerValueFormatName] = None class Dbt2LookerMeta(BaseModel): @@ -137,13 +138,14 @@ class DbtModelColumnMeta(Dbt2LookerMeta): class DbtModelColumn(BaseModel): name: str description: str - data_type: Optional[str] + data_type: Optional[str] = None meta: DbtModelColumnMeta class DbtNode(BaseModel): unique_id: str resource_type: str + config: Dict[str, Any] class Dbt2LookerExploreJoin(BaseModel): @@ -200,13 +202,13 @@ class DbtCatalogNodeMetadata(BaseModel): type: str db_schema: str = Field(..., alias='schema') name: str - comment: Optional[str] - owner: Optional[str] + comment: Optional[str] = None + owner: Optional[str] = None class DbtCatalogNodeColumn(BaseModel): type: str - comment: Optional[str] + comment: Optional[str] = None index: int name: str @@ -224,4 +226,4 @@ def case_insensitive_column_names(cls, v: Dict[str, DbtCatalogNodeColumn]): class DbtCatalog(BaseModel): - nodes: Dict[str, DbtCatalogNode] \ No newline at end of file + nodes: Dict[str, DbtCatalogNode] diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py index ed310f3..9e29d5b 100644 --- a/dbt2looker/parser.py +++ b/dbt2looker/parser.py @@ -31,21 +31,24 @@ def tags_match(query_tag: str, model: models.DbtModel) -> bool: def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]: manifest = models.DbtManifest(**raw_manifest) - all_models: List[models.DbtModel] = [ + materialized_models: List[models.DbtModel] = [ node for node in manifest.nodes.values() - if node.resource_type == 'model' + if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral' ] + if tag is None: + selected_models = materialized_models + else: + selected_models = [model for model in materialized_models if tags_match(tag, model)] + # Empty model files have many missing parameters - for model in all_models: + for model in selected_models: if not hasattr(model, 'name'): logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id) raise SystemExit('Failed') - if tag is None: - return all_models - return [model for model in all_models if tags_match(tag, model)] + return selected_models def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]): @@ -54,6 +57,33 @@ def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel logging.debug('Model %s has no typed columns, no dimensions will be generated. %s', model.unique_id, model) +def compare_model_vs_node_columns(model: models.DbtModel, node: models.DbtCatalogNode): + model_columns = set(model.columns.keys()) # as defined in YML config + catalogued_columns = set(node.columns.keys()) # as defined in SQL + + # if the YML and SQL columns exactly match, return early + if not model_columns.symmetric_difference(catalogued_columns): + return + + if model_columns.issubset(catalogued_columns): + for undocumented_column in sorted(catalogued_columns.difference(model_columns)): + logging.warning( + f'Column {model.unique_id}.{undocumented_column} has not been documented in YML, ' + 'but is present in the catalog. You should add it to your YML config, ' + 'or (if it is not required) remove it from the model SQL file, run the model, ' + 'and run `dbt docs generate` again') + # after warning the user, return early + return + + # otherwise, there are columns defined in YML that don't match what's defined in SQL + for missing_column in sorted(model_columns.difference(catalogued_columns)): + logging.warning( + f'Column {model.unique_id}.{missing_column} documented in YML, ' + 'but is not defined in the DBT catalog. Check the model SQL file ' + 'and ensure you have run the model and `dbt docs generate`') + return # final return explicitly included for clarity + + def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] = None): catalog_nodes = parse_catalog_nodes(raw_catalog) dbt_models = parse_models(raw_manifest, tag=tag) @@ -74,6 +104,11 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] logging.warning( f'Model {model.unique_id} not found in catalog. No looker view will be generated. ' f'Check if model has materialized in {adapter_type} at {model.relation_name}') + else: + # we know that the model is included in the catalog - extract it + corresponding_catalog_node = catalog_nodes[model.unique_id] + # issue warnings if the catalog columns (defined via SQL) don't match what's documented in YML + compare_model_vs_node_columns(model, corresponding_catalog_node) # Update dbt models with data types from catalog dbt_typed_models = [ @@ -92,7 +127,18 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] return dbt_typed_models +class ColumnNotInCatalogError(Exception): + def __init__(self, model_id: str, column_name: str): + super().__init__( + f'Column {column_name} not found in catalog for model {model_id}, ' + 'cannot find a data type for Looker. Is the column selected in the model SQL file, ' + 'and have you run the model since adding the column to it?') + + def get_column_type_from_catalog(catalog_nodes: Dict[str, models.DbtCatalogNode], model_id: str, column_name: str): node = catalog_nodes.get(model_id) column = None if node is None else node.columns.get(column_name) - return None if column is None else column.type + if column: + return column.type + # otherwise this will fail later when we try to map the data type to a Looker type + raise ColumnNotInCatalogError(model_id, column_name)