diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 74e5d0724..0f00f7452 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -26,16 +26,6 @@ on: type: string required: false description: dbt's version to test with - should-run-tests: - type: boolean - required: false - default: true - description: Whether to run E2E tests - clear-tests: - type: boolean - required: false - default: true - description: Whether to clean test environment generate-data: type: boolean required: false @@ -47,10 +37,6 @@ on: warehouse-type: type: string required: true - should-run-tests: - type: boolean - required: false - default: true elementary-ref: type: string required: false @@ -60,10 +46,6 @@ on: dbt-version: type: string required: false - clear-tests: - type: boolean - required: false - default: true generate-data: type: boolean required: false @@ -71,8 +53,9 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - DBT_PKG_INTEG_TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests/deprecated_tests - ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project + ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability + CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project + E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project jobs: # PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets. @@ -117,12 +100,12 @@ jobs: - name: Start Postgres if: inputs.warehouse-type == 'postgres' - working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} run: docker compose up -d postgres # - name: Start Clickhouse # if: inputs.warehouse-type == 'clickhouse' - # working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + # working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} # run: docker compose up -d clickhouse - name: Setup Python @@ -170,12 +153,46 @@ jobs: rm -rf "$DBT_PKGS_PATH/elementary" ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" - - name: Run dbt package integration tests - if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests - working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + - name: Run deps for E2E dbt project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + env: + ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }} run: | dbt deps - python run_e2e_tests.py -t "${{ inputs.warehouse-type }}" -g "${{ inputs.warehouse-type == 'postgres' || inputs.generate-data }}" --clear-tests "${{ inputs.clear-tests }}" + + - name: Seed e2e dbt project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + if: inputs.warehouse-type == 'postgres' || inputs.generate-data + run: | + python generate_data.py + dbt seed -f --target "${{ inputs.warehouse-type }}" + + - name: Run e2e dbt project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + run: | + dbt run --target "${{ inputs.warehouse-type }}" || true + + # Validate run_results.json: only error_model should be non-success + jq -e ' + [.results[] | select(.status != "success") | .unique_id] + | length == 1 and .[0] == "model.elementary_integration_tests.error_model" + ' target/run_results.json > /dev/null + jq_exit=$? + + if [ $jq_exit -eq 0 ]; then + echo "✅ Validation passed: only error_model failed." + else + echo "❌ Validation failed. Unexpected failures:" + jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json + fi + + exit $jq_exit + + - name: Test e2e dbt project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + continue-on-error: true + run: | + dbt test --target "${{ inputs.warehouse-type }}" - name: Run help run: edr --help @@ -187,12 +204,12 @@ jobs: edr monitor -t "${{ inputs.warehouse-type }}" --group-by table - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" --slack-webhook "$SLACK_WEBHOOK" - name: Validate alerts statuses were updated - working-directory: ${{ env.ELMENTARY_INTERNAL_DBT_PKG_DIR }} + working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} run: | dbt deps dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}" @@ -201,7 +218,7 @@ jobs: run: > edr monitor report -t "${{ inputs.warehouse-type }}" - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" - name: Set report artifact name @@ -230,7 +247,7 @@ jobs: run: > edr monitor send-report -t "${{ inputs.warehouse-type }}" - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" --slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html" --slack-token "$SLACK_TOKEN" @@ -259,5 +276,4 @@ jobs: path: elementary/edr_target/edr.log - name: Run Python package e2e tests - if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }} diff --git a/.gitignore b/.gitignore index d37765858..30d4f2d51 100644 --- a/.gitignore +++ b/.gitignore @@ -96,4 +96,4 @@ venv/ # elementary outputs edr_target/ -tests/tests_with_db/dbt_project/dbt_packages/ +**/dbt_packages/ diff --git a/tests/e2e_dbt_project/README.md b/tests/e2e_dbt_project/README.md new file mode 100644 index 000000000..353d624a2 --- /dev/null +++ b/tests/e2e_dbt_project/README.md @@ -0,0 +1,2 @@ +This dbt project is intended for our CLI e2e. +The "edr report" and "edr monitor" commands in `test-warehouse.yml` will run on the data generated by this project. diff --git a/tests/e2e_dbt_project/data/training/any_type_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/any_type_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/backfill_days_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/backfill_days_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/dimension_anomalies_training.csv b/tests/e2e_dbt_project/data/training/dimension_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/groups_training.csv b/tests/e2e_dbt_project/data/training/groups_training.csv new file mode 100644 index 000000000..ddef977cd --- /dev/null +++ b/tests/e2e_dbt_project/data/training/groups_training.csv @@ -0,0 +1,5 @@ +group_a,group_b,group_c,group_d +Poland,Netherlands,Spain,Ukraine +Greece,Denmark,Italy,Sweden +Russia,Germany,Ireland,France +Czech Republic,Portugal,Croatia,England diff --git a/tests/e2e_dbt_project/data/training/numeric_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/numeric_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/stats_players_training.csv b/tests/e2e_dbt_project/data/training/stats_players_training.csv new file mode 100644 index 000000000..906cf0c95 --- /dev/null +++ b/tests/e2e_dbt_project/data/training/stats_players_training.csv @@ -0,0 +1,180 @@ +Player,Team,Goals,shots_on_target,Passes,Shots_without_scoring,Crosses,Tackles,Interceptions,Goals_conceded,Saves_made,Offsides,Yellow_cards +Darijo Srna,Croatia,,,,,16,13,,,,, +Gordon Schildenfeld,Croatia,,,,,,,,,,, +Ivan Perisic,Croatia,,,,,,,,,,, +Ivan Rakitic,Croatia,,,,,,,,,,, +Ivan Strinic,Croatia,,,,,14,,,,,, +Luka Modric,Croatia,,,,,,,,,,, +Mario Mandzukic,Croatia,3,4,,,,,,,,, +Nikica Jelavic,Croatia,,,,,,,,,,, +Stipe Pletikosa,Croatia,,,,,,,,3,13,, +Vedran Corluka,Croatia,,,,,,,,,,, +David Limbersky,Czech Republic,,,,,,,,,,,2 +Jaroslav Plasil,Czech Republic,,,,,,,,,,, +Michal Kadlec,Czech Republic,,,,,,,,,,, +Milan Baros,Czech Republic,,,,,,,,,,3, +Petr Cech,Czech Republic,,,,,,,,6,9,, +Petr Jiracek,Czech Republic,2,,,,,,,,,, +Theodor Gebre Selassie,Czech Republic,,,,,,,,,,, +Tomás Hübschman,Czech Republic,,,,,,,12,,,, +Tomas Pekhart,Czech Republic,,,,,,,,,,, +Tomas Rosicky,Czech Republic,,,,,,,,,,, +Tomas Sivok,Czech Republic,,,,,,,,,,, +Vaclav Pilar,Czech Republic,2,4,,,,,,,,, +Daniel Agger,Denmark,,,,,,,21,,,, +Dennis Rommedahl,Denmark,,,,,,,,,,3, +Jakob Poulsen,Denmark,,,,,,,,,,, +Michael Krohn-Dehli,Denmark,2,,,,,,,,,, +Nicklas Bendtner,Denmark,2,,,,,,,,,, +Niki Zimling,Denmark,,,,,,,,,,, +Simon Kjaer,Denmark,,,,,,,,,,, +Stephan Andersen,Denmark,,,,,,,,5,10,, +Tobias Mikkelsen,Denmark,,,,,,,,,,, +William Kvist,Denmark,,,,,,,12,,,, +Ashley Young,England,,,,,,,,,,, +Danny Welbeck,England,,,,,,,,,,, +Glen Johnson,England,,,,,,,,,,, +James Milner,England,,,,,17,,,,,, +Joe Hart,England,,,,,,,,3,22,, +John Terry,England,,,,,,,14,,,, +Joleon Lescott,England,,,,,,,,,,, +Scott Parker,England,,,,,,13,,,,, +Steven Gerrard,England,,,,,,18,,,,, +Theo Walcott,England,,,,,,,,,,, +Wayne Rooney,England,,,,,,,,,,, +Alou Diarra,France,,,,,,15,,,,, +Franck Ribéry,France,,,,6,14,,,,,, +Hugo Lloris,France,,,,,,,,5,6,, +Jérémy Menez,France,,4,,,,,,,,,2 +Karim Benzema,France,,6,,19,,,,,,3, +Mathieu Debuchy,France,,,,,,,,,,, +Olivier Giroud,France,,,,,,,,,,, +Patrice Evra,France,,,,,,,,,,, +Philippe Mexes,France,,,,,,,,,,,2 +Samir Nasri,France,,,,,,,,,,, +Yann M'Vila,France,,,,,,,,,,, +Yohan Cabaye,France,,,,,,,,,,, +Andre Schürrle,Germany,,,,7,,,,,,, +Bastian Schweinsteiger,Germany,,,415,,,13,,,,, +Holger Badstuber,Germany,,,,,,,,,,, +Jerome Boateng,Germany,,,,,17,,,,,, +Manuel Neuer,Germany,,,,,,,,6,10,, +Marco Reus,Germany,,4,,,,,,,,, +Mario Gomez,Germany,3,,,,,,,,,3, +Mats Hummels,Germany,,,,,,18,19,,,, +Mesut Özil,Germany,,5,319,,17,,,,,, +Miroslav Klose,Germany,,,,,,,,,,5, +Philipp Lahm,Germany,,,316,,,,,,,, +Sami Khedira,Germany,,,310,,,,,,,, +Thomas Müller,Germany,,,,,14,,,,,, +Toni Kroos,Germany,,,,,,,,,,, +Dimitris Salpingidis,Greece,2,,,,,,,,,5, +Georgios Samaras,Greece,,,,,,,,,,, +Georgios Tzavellas,Greece,,,,,,,,,,, +Giorgios Karagounis,Greece,,,,,,,,,,,2 +Jose Holebas,Greece,,,,,,,,,,,2 +Konstantinos Chalkias,Greece,,,,,,,,3,,, +Konstantinos Katsouranis,Greece,,,,,,,12,,,, +Kyriakos Papadopoulos,Greece,,,,,,,,,,, +Michalis Sifakis,Greece,,,,,,,,4,11,, +Sokratis Papastathopoulos,Greece,,,,,,,,,,, +Theofanis Gekas,Greece,,,,,,,,,,3, +Vassilas Torosidis,Greece,,,,,,,12,,,, +Alessandro Diamanti,Italy,,,,8,,,,,,, +Andrea Pirlo,Italy,,,423,,,,13,,,, +Antonio Cassano,Italy,,7,,,14,,,,,3, +Antonio Di Natale,Italy,,,,,,,,,,6, +Christian Maggio,Italy,,,,,,,,,,,2 +Claudio Marchisio,Italy,,4,,11,,14,,,,, +Daniele De Rossi,Italy,,,345,9,,,22,,,, +Gianluigi Buffon,Italy,,,,,,,,7,20,, +Giorgio Chiellini,Italy,,,,,,,16,,,, +Leonardo Bonucci,Italy,,,,,,,15,,,, +Mario Balotelli,Italy,3,10,,,,,,,,, +Riccardo Montolivo,Italy,,,,7,,,13,,,, +Thiago Motta,Italy,,,,,,,,,,,2 +Arjen Robben,Netherlands,,,,11,,,,,,, +Ibrahim Afellay,Netherlands,,,,,,,,,,, +Jetro Willems,Netherlands,,,,,,,,,,,2 +Johnny Heitinga,Netherlands,,,,,,,,,,, +Joris Mathijsen,Netherlands,,,,,,,,,,, +Maarten Stekelenburg,Netherlands,,,,,,,,5,12,, +Mark van Bommel,Netherlands,,,,,,,,,,, +Nigel de Jong,Netherlands,,,,,,,,,,, +Rafael van der Vaart,Netherlands,,,,,,,,,,, +Robin van Persie,Netherlands,,5,,,,,,,,, +Ron Vlaar,Netherlands,,,,,,,,,,, +Wesley Sneijder,Netherlands,,,,8,17,,,,,, +Damien Perquis,Poland,,,,,,,,,,, +Eugen Polanski,Poland,,,,,,16,,,,,2 +Jakub Blaszczykowski,Poland,,,,,,,,,,, +Ludovic Obraniak,Poland,,,,,19,,,,,, +Marcin Wasilewski,Poland,,,,,,,,,,, +Przemyslaw Tyton,Poland,,,,,,,,,6,, +Robert Lewandowski,Poland,,,,,,,,,,, +Wojciech Szczesny,Poland,,,,,,,,,,, +Bruno Alves,Portugal,,,,,,,,,,, +Cristiano Ronaldo,Portugal,3,9,,,,,,,,, +Fábio Coentrão,Portugal,,,,,18,14,,,,,2 +Helder Postiga,Portugal,,,,,,,,,,3, +Hugo Almeida,Portugal,,,,,,,,,,5, +João Moutinho,Portugal,,,,,,,15,,,, +João Pereira,Portugal,,,,,,17,,,,,2 +Miguel Veloso,Portugal,,,,,,,,,,,2 +Nani,Portugal,,,,9,19,,,,,, +Nélson Oliveira,Portugal,,,,,,,,,,, +Pepe,Portugal,,,,,,,16,,,, +Raul Meireles,Portugal,,,,,,,11,,,, +Rolando,Portugal,,,,,,,,,,, +Rui Patricio,Portugal,,,,,,,,4,10,, +Silvestre Varela,Portugal,,,,,,,,,,, +Damien Duff,Republic of Ireland,,,,,16,,,,,, +Glenn Whelan,Republic of Ireland,,,,,,,,,,, +Jonathan Walters,Republic of Ireland,,,,,,,,,,, +Keiren Westwood,Republic of Ireland,,,,,,,,2,,, +Keith Andrews,Republic of Ireland,,,,12,,,,,,, +Richard Dunne,Republic of Ireland,,,,,,,,,,, +Robbie Keane,Republic of Ireland,,,,,,,,,,7, +Sean St Ledger,Republic of Ireland,,,,,,,,,,,2 +Shay Given,Republic of Ireland,,,,,,,,9,17,, +Alan Dzagoev,Russia,3,,,,,,,,,, +Alexander Kerzhakov,Russia,,,,14,,,,,,, +Alexei Berezoutski,Russia,,,,,,,,,,, +Andrey Arshavin,Russia,,,,,15,,,,,, +Igor Denisov,Russia,,,303,,,,,,,, +Konstantin Zyryanov,Russia,,,,,,,,,,, +Roman Pavlyuchenko,Russia,,,,,,,,,,, +Roman Shirokov,Russia,,,,,,,,,,, +Sergei Ignashevitch,Russia,,,,,,,,,,, +Vyacheslav Malafeev,Russia,,,,,,,,3,10,, +Yuri Zhirkov,Russia,,,,,,,,,,, +Álvaro Arbeloa,Spain,,,319,,,20,,,,3, +Andrés Iniesta,Spain,,9,446,20,,,,,,, +David Silva,Spain,2,8,283,,,,,,,3, +Fernando Torres,Spain,3,5,,,,,,,,5, +Francesc Fábregas,Spain,2,,,,,,,,,, +Gerard Piqué,Spain,,,312,,,19,,,,, +Iker Casillas,Spain,,,,,,,,,15,, +Jesús Navas,Spain,,,,,16,,,,,, +Jordi Alba,Spain,,,410,,,,,,,, +Pedro,Spain,,,,,,,,,,, +Sergio Busquets,Spain,,,455,,,16,18,,,, +Sergio Ramos,Spain,,,394,7,,16,,,,,2 +Xabi Alonso,Spain,2,,585,,,16,,,,, +Xavi,Spain,,,592,12,13,,,,,, +Anders Svensson,Sweden,,,,,,,,,,,2 +Andreas Isaksson,Sweden,,,,,,,,5,8,, +Christian Wilhelmsson,Sweden,,,,,,,,,,, +Kim Källström,Sweden,,,,,,,,,,, +Ola Toivonen,Sweden,,,,,,,,,,3, +Olof Mellberg,Sweden,,,,,,,,,,, +Samuel Holmén,Sweden,,,,,,,,,,, +Sebastian Larsson,Sweden,,4,,,,,,,,, +Zlatan Ibrahimovic,Sweden,2,7,,,,,,,,, +Anatoliy Tymoshchuk,Ukraine,,,,,,14,,,,,2 +Andriy Pyatov,Ukraine,,,,,,,,4,13,, +Andriy Shevchenko,Ukraine,2,,,,,,,,,, +Andriy Yarmolenko,Ukraine,,,,,,,,,,, +Yevgen Selin,Ukraine,,,,,,,,,,, +Yevhen Khacheridi,Ukraine,,,,,,,,,,, +Yevhen Konoplyanka,Ukraine,,,,13,,,,,,, diff --git a/tests/e2e_dbt_project/data/training/stats_team_training.csv b/tests/e2e_dbt_project/data/training/stats_team_training.csv new file mode 100644 index 000000000..0413c75d5 --- /dev/null +++ b/tests/e2e_dbt_project/data/training/stats_team_training.csv @@ -0,0 +1,17 @@ +Team,Goals,Shots_on_target,Shots_off_target,Total_shots,Hit_Woodwork,Penalty_goals,Penalties_not_scored,Headed_goals,Passes,Passes_completed,Touches,Crosses,Dribbles,Corners_Taken,Tackles,Clearances,Interceptions,Clearances_off_line,Clean_Sheets,Blocks,Goals_conceded,Saves_made,Fouls_Won,Fouls_Conceded,Offsides,Yellow_Cards,Red_Cards,Subs_on,Subs_off,Players_Used +Croatia,4,13,12,32,0,0,0,2,1076,828,1706,60,42,14,49,83,56,,0,10,3,13,41,62,2,9,0,9,9,16 +Czech Republic,4,13,18,39,0,0,0,0,1565,1223,2358,46,68,21,62,98,37,2,1,10,6,9,53,73,8,7,0,11,11,19 +Denmark,4,10,10,27,1,0,0,3,1298,1082,1873,43,32,16,40,61,59,0,1,10,5,10,25,38,8,4,0,7,7,15 +England,5,11,18,40,0,0,0,3,1488,1200,2440,58,60,16,86,106,72,1,2,29,3,22,43,45,6,5,0,11,11,16 +France,3,22,24,65,1,0,0,0,2066,1803,2909,55,76,28,71,76,58,0,1,7,5,6,36,51,5,6,0,11,11,19 +Germany,10,32,32,80,2,1,0,2,2774,2427,3761,101,60,35,91,73,69,0,1,11,6,10,63,49,12,4,0,15,15,17 +Greece,5,8,18,32,1,1,1,0,1187,911,2016,52,53,10,65,123,87,0,1,23,7,13,67,48,12,9,1,12,12,20 +Italy,6,34,45,110,2,0,0,2,3016,2531,4363,75,75,30,98,137,136,1,2,18,7,20,101,89,16,16,0,18,18,19 +Netherlands,2,12,36,60,2,0,0,0,1556,1381,2163,50,49,22,34,41,41,0,0,9,5,12,35,30,3,5,0,7,7,15 +Poland,2,15,23,48,0,0,0,1,1059,852,1724,55,39,14,67,87,62,0,0,8,3,6,48,56,3,7,1,7,7,17 +Portugal,6,22,42,82,6,0,0,2,1891,1461,2958,91,64,41,78,92,86,0,2,11,4,10,73,90,10,12,0,14,14,16 +Republic of Ireland,1,7,12,28,0,0,0,1,851,606,1433,43,18,8,45,78,43,1,0,23,9,17,43,51,11,6,1,10,10,17 +Russia,5,9,31,59,2,0,0,1,1602,1345,2278,40,40,21,65,74,58,0,0,8,3,10,34,43,4,6,0,7,7,16 +Spain,12,42,33,100,0,1,0,2,4317,3820,5585,69,106,44,122,102,79,0,5,8,1,15,102,83,19,11,0,17,17,18 +Sweden,5,17,19,39,3,0,0,1,1192,965,1806,44,29,7,56,54,45,0,1,12,5,8,35,51,7,7,0,9,9,18 +Ukraine,2,7,26,38,0,0,0,2,1276,1043,1894,33,26,18,65,97,29,0,0,4,4,13,48,31,4,5,0,9,9,18 diff --git a/tests/e2e_dbt_project/data/training/string_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/string_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/any_type_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/any_type_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/backfill_days_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/backfill_days_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/dimension_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/dimension_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/groups_validation.csv b/tests/e2e_dbt_project/data/validation/groups_validation.csv new file mode 100644 index 000000000..1bf171a69 --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/groups_validation.csv @@ -0,0 +1,5 @@ +group_b,group_c,group_d +Netherlands,Spain,Ukraine +Denmark,Italy,Sweden +Germany,Ireland,France +Portugal,Croatia,England diff --git a/tests/e2e_dbt_project/data/validation/numeric_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/numeric_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/stats_players_validation.csv b/tests/e2e_dbt_project/data/validation/stats_players_validation.csv new file mode 100644 index 000000000..53806a69e --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/stats_players_validation.csv @@ -0,0 +1,2 @@ +key_crosses,red_cards,Player,Team,Goals,shots_on_target,Passes,Shots_without_scoring,Crosses,Tackles,Interceptions,Goals_conceded,Saves_made,Yellow_cards +bla,bla,Darijo Srna,Croatia,2,4,5,7,16,13,4,4,5,1 diff --git a/tests/e2e_dbt_project/data/validation/stats_team_validation.csv b/tests/e2e_dbt_project/data/validation/stats_team_validation.csv new file mode 100644 index 000000000..9d8dea2ff --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/stats_team_validation.csv @@ -0,0 +1,17 @@ +Team,Goals,Shots_on_target,Shots_off_target,Total_shots,Hit_Woodwork,Penalty_goals,Penalties_not_scored,Headed_goals,Passes,Passes_completed,Touches,Crosses,Dribbles,Corners_Taken,Tackles,Clearances,Interceptions,Clearances_off_line,Clean_Sheets,Blocks,Goals_conceded,Saves_made,Fouls_Won,Fouls_Conceded,Offsides,Yellow_Cards,Red_Cards,Subs_on,Subs_off,Players_Used +Croatia,bla,13,12,32,0,0,0,2,1076,828,1706,60,42,14,49,83,56,,0,10,3,13,41,62,2,9,0,9,9,16 +Czech,bla,13,18,39,0,0,0,0,1565,1223,2358,46,68,21,62,98,37,2,1,10,6,9,53,73,8,7,0,11,11,19 +Denmark,bla,10,10,27,1,0,0,3,1298,1082,1873,43,32,16,40,61,59,0,1,10,5,10,25,38,8,4,0,7,7,15 +England,bla,11,18,40,0,0,0,3,1488,1200,2440,58,60,16,86,106,72,1,2,29,3,22,43,45,6,5,0,11,11,16 +France,bla,22,24,65,1,0,0,0,2066,1803,2909,55,76,28,71,76,58,0,1,7,5,6,36,51,5,6,0,11,11,19 +Germany,bla0,32,32,80,2,1,0,2,2774,2427,3761,101,60,35,91,73,69,0,1,11,6,10,63,49,12,4,0,15,15,17 +Greece,bla,8,18,32,1,1,1,0,1187,911,2016,52,53,10,65,123,87,0,1,23,7,13,67,48,12,9,1,12,12,20 +Italy,bla,34,45,110,2,0,0,2,3016,2531,4363,75,75,30,98,137,136,1,2,18,7,20,101,89,16,16,0,18,18,19 +Netherlands,bla,12,36,60,2,0,0,0,1556,1381,2163,50,49,22,34,41,41,0,0,9,5,12,35,30,3,5,0,7,7,15 +Poland,bla,15,23,48,0,0,0,1,1059,852,1724,55,39,14,67,87,62,0,0,8,3,6,48,56,3,7,1,7,7,17 +Portugal,bla,22,42,82,6,0,0,2,1891,1461,2958,91,64,41,78,92,86,0,2,11,4,10,73,90,10,12,0,14,14,16 +Republic,bla,7,12,28,0,0,0,1,851,606,1433,43,18,8,45,78,43,1,0,23,9,17,43,51,11,6,1,10,10,17 +Russia,bla,9,31,59,2,0,0,1,1602,1345,2278,40,40,21,65,74,58,0,0,8,3,10,34,43,4,6,0,7,7,16 +Spain,bla2,42,33,100,0,1,0,2,4317,3820,5585,69,106,44,122,102,79,0,5,8,1,15,102,83,19,11,0,17,17,18 +Sweden,bla,17,19,39,3,0,0,1,1192,965,1806,44,29,7,56,54,45,0,1,12,5,8,35,51,7,7,0,9,9,18 +Ukraine,bla,7,26,38,0,0,0,2,1276,1043,1894,33,26,18,65,97,29,0,0,4,4,13,48,31,4,5,0,9,9,18 diff --git a/tests/e2e_dbt_project/data/validation/string_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/string_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/dbt_project.yml b/tests/e2e_dbt_project/dbt_project.yml new file mode 100644 index 000000000..4c677dcfb --- /dev/null +++ b/tests/e2e_dbt_project/dbt_project.yml @@ -0,0 +1,29 @@ +name: "elementary_integration_tests" +version: "1.0.0" +config-version: 2 +profile: "elementary_tests" + +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + - "dbt_modules" + +vars: + days_back: 30 + debug_logs: "{{ env_var('DBT_EDR_DEBUG', False) }}" + custom_run_started_at: "{{ modules.datetime.datetime.utcfromtimestamp(0) }}" + +seeds: + +schema: test_seeds + +models: + elementary: + +schema: elementary diff --git a/tests/e2e_dbt_project/debug.sh b/tests/e2e_dbt_project/debug.sh new file mode 100644 index 000000000..fa26f9077 --- /dev/null +++ b/tests/e2e_dbt_project/debug.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +export DBT_EDR_DEBUG=1 +export DBT_MACRO_DEBUGGING=1 + diff --git a/tests/e2e_dbt_project/docker-compose.yml b/tests/e2e_dbt_project/docker-compose.yml new file mode 100644 index 000000000..6bdc21ad5 --- /dev/null +++ b/tests/e2e_dbt_project/docker-compose.yml @@ -0,0 +1,33 @@ +version: "3.8" + +services: + postgres: + image: postgres + ports: + - "127.0.0.1:5432:5432" + command: postgres -c max_connections=500 + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: admin + volumes: + - postgres:/var/lib/postgresql/data + + clickhouse: + image: clickhouse/clickhouse-server:latest + container_name: clickhouse + ports: + - "8123:8123" + - "9000:9000" + volumes: + - ./clickhouse-data:/var/lib/clickhouse + environment: + CLICKHOUSE_DB: default + CLICKHOUSE_USER: default + CLICKHOUSE_PASSWORD: "default" + ulimits: + nofile: + soft: 262144 + hard: 262144 + +volumes: + postgres: diff --git a/tests/e2e_dbt_project/generate_data.py b/tests/e2e_dbt_project/generate_data.py new file mode 100644 index 000000000..e6b3f8faf --- /dev/null +++ b/tests/e2e_dbt_project/generate_data.py @@ -0,0 +1,456 @@ +import csv +import os +import random +import string +from datetime import datetime, timedelta +from pathlib import Path +from typing import List + +FILE_DIR = os.path.dirname(os.path.realpath(__file__)) + +EPOCH = datetime.utcfromtimestamp(0) +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + + +def generate_fake_data(): + generate_string_anomalies_training_and_validation_files() + generate_numeric_anomalies_training_and_validation_files() + generate_any_type_anomalies_training_and_validation_files() + generate_dimension_anomalies_training_and_validation_files() + generate_backfill_days_training_and_validation_files() + generate_seasonality_volume_anomalies_files() + + +def generate_rows_timestamps(base_date, period="days", count=1, days_back=30): + min_date = base_date - timedelta(days=days_back) + dates = [] + while base_date > min_date: + dates.append(base_date) + base_date = base_date - timedelta(**{period: count}) + return dates + + +def write_rows_to_csv(csv_path, rows, header): + # Creates the csv file directories if needed. + directory_path = Path(csv_path).parent.resolve() + Path(directory_path).mkdir(parents=True, exist_ok=True) + + with open(csv_path, "w") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=header) + writer.writeheader() + writer.writerows(rows) + + +def generate_rows(rows_count_per_day, dates, get_row_callback): + rows = [] + for date in dates: + for i in range(0, rows_count_per_day): + row = get_row_callback(date, i, rows_count_per_day) + rows.append(row) + return rows + + +def generate_string_anomalies_training_and_validation_files(rows_count_per_day=100): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + "max_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + "average_length": "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_count": "" + if row_index < (3 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_percent": "" + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(1, 10)) + ), + "max_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 15)) + ), + "average_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 8)) + ), + "missing_count": "" + if row_index < (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_percent": "" + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + } + + string_columns = [ + "updated_at", + "occurred_at", + "min_length", + "max_length", + "average_length", + "missing_count", + "missing_percent", + ] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "string_column_anomalies_training.csv" + ), + training_rows, + string_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "string_column_anomalies_validation.csv" + ), + validation_rows, + string_columns, + ) + + +def generate_numeric_anomalies_training_and_validation_files(rows_count_per_day=200): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min": random.randint(100, 200), + "max": random.randint(100, 200), + "zero_count": 0 + if row_index < (3 / 100 * rows_count) + else random.randint(100, 200), + "zero_percent": 0 + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.randint(100, 200), + "average": random.randint(99, 101), + "standard_deviation": random.randint(99, 101), + "variance": random.randint(99, 101), + "sum": random.randint(100, 200), + } + + def get_validation_row(date, row_index, rows_count): + row_index += -(rows_count / 2) + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min": random.randint(10, 200), + "max": random.randint(100, 300), + "zero_count": 0 + if row_index < (80 / 100 * rows_count) + else random.randint(100, 200), + "zero_percent": 0 + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.randint(100, 200), + "average": random.randint(101, 110), + "standard_deviation": random.randint(80, 120), + "variance": random.randint(80, 120), + "sum": random.randint(300, 400), + } + + numeric_columns = [ + "updated_at", + "occurred_at", + "min", + "max", + "zero_count", + "zero_percent", + "average", + "standard_deviation", + "variance", + "sum", + ] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "numeric_column_anomalies_training.csv" + ), + training_rows, + numeric_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "numeric_column_anomalies_validation.csv" + ), + validation_rows, + numeric_columns, + ) + + +def generate_any_type_anomalies_training_and_validation_files(rows_count_per_day=300): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "null_count_str": None + if row_index < (3 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_percent_str": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_count_float": None + if row_index < (3 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_percent_float": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_count_int": None + if row_index < (3 / 100 * rows_count) + else random.randint(100, 200), + "null_percent_int": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.randint(100, 200), + "null_count_bool": None + if row_index < (3 / 100 * rows_count) + else bool(random.getrandbits(1)), + "null_percent_bool": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else bool(random.getrandbits(1)), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "null_count_str": None + if row_index < (80 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_percent_str": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_count_float": None + if row_index < (80 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_percent_float": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_count_int": None + if row_index < (80 / 100 * rows_count) + else random.randint(100, 200), + "null_percent_int": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.randint(100, 200), + "null_count_bool": None + if row_index < (80 / 100 * rows_count) + else bool(random.getrandbits(1)), + "null_percent_bool": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else bool(random.getrandbits(1)), + } + + any_type_columns = [ + "updated_at", + "occurred_at", + "null_count_str", + "null_percent_str", + "null_count_float", + "null_percent_float", + "null_count_int", + "null_percent_int", + "null_count_bool", + "null_percent_bool", + ] + dates = generate_rows_timestamps( + base_date=EPOCH - timedelta(days=2), period="hours", count=4 + ) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "any_type_column_anomalies_training.csv" + ), + training_rows, + any_type_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "any_type_column_anomalies_validation.csv" + ), + validation_rows, + any_type_columns, + ) + + +def generate_dimension_anomalies_training_and_validation_files(): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "platform": "android" if row_index < (rows_count - 20) else "ios", + "version": row_index % 3, + "user_id": random.randint(1, rows_count), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "platform": "android" if row_index < (rows_count - 1) else "ios", + "version": row_index % 3, + "user_id": random.randint(1, rows_count), + } + + dimension_columns = ["updated_at", "platform", "version", "user_id"] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(1020, dates, get_training_row) + write_rows_to_csv( + os.path.join(FILE_DIR, "data", "training", "dimension_anomalies_training.csv"), + training_rows, + dimension_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows(1001, [validation_date], get_validation_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "dimension_anomalies_validation.csv" + ), + validation_rows, + dimension_columns, + ) + + +def generate_seasonal_data_files( + table_name: str, training_dates: List[datetime], validation_dates: List[datetime] +): + columns = ["updated_at", "user_id"] + training_rows = [] + for date in training_dates: + training_rows.extend( + [ + { + "updated_at": date.strftime(DATE_FORMAT), + "user_id": random.randint(1000, 9999), + } + for _ in range(700) + ] + ) + write_rows_to_csv( + csv_path=os.path.join( + FILE_DIR, "data", "training", f"{table_name}_training.csv" + ), + rows=training_rows, + header=columns, + ) + + validation_rows = [] + for date in validation_dates: + validation_rows.extend( + [ + { + "updated_at": date.strftime(DATE_FORMAT), + "user_id": random.randint(1000, 9999), + } + for _ in range(100) + ] + ) + write_rows_to_csv( + csv_path=os.path.join( + FILE_DIR, + "data", + "validation", + f"{table_name}_validation.csv", + ), + rows=validation_rows, + header=columns, + ) + + +def generate_day_of_week_data(): + training_dates = generate_rows_timestamps( + base_date=EPOCH - timedelta(days=1), period="weeks", days_back=(7 * 30) + ) + validation_dates = generate_rows_timestamps(base_date=EPOCH, days_back=1) + generate_seasonal_data_files( + "users_per_day_weekly_seasonal", training_dates, validation_dates + ) + + +def generate_hour_of_day_data(): + training_dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=1)) + validation_dates = generate_rows_timestamps(base_date=EPOCH, days_back=1) + generate_seasonal_data_files( + "users_per_hour_daily_seasonal", training_dates, validation_dates + ) + + +def generate_seasonality_volume_anomalies_files(): + generate_day_of_week_data() + generate_hour_of_day_data() + + +def generate_backfill_days_training_and_validation_files(rows_count_per_day=100): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(1, 10)) + ), + } + + string_columns = ["updated_at", "occurred_at", "min_length"] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=1)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "backfill_days_column_anomalies_training.csv" + ), + training_rows, + string_columns, + ) + + validation_date = EPOCH - timedelta(days=5) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, + "data", + "validation", + "backfill_days_column_anomalies_validation.csv", + ), + validation_rows, + string_columns, + ) + + +def main(): + print("Generating fake data!") + generate_fake_data() + print("Done. Please run 'dbt seed -f' to load the data into your database.") + + +if __name__ == "__main__": + main() diff --git a/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql new file mode 100644 index 000000000..fa80e67a8 --- /dev/null +++ b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql @@ -0,0 +1,7 @@ +{%- test generic_test_on_column(model, column_name) -%} + {% set query_with_rows %} + with nothing as (select 1 as num) + select * from nothing where num = 1 + {%- endset -%} + {{ query_with_rows }} +{%- endtest -%} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql new file mode 100644 index 000000000..a78366532 --- /dev/null +++ b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql @@ -0,0 +1,7 @@ +{%- test generic_test_on_model(model) -%} + {% set query_with_rows %} + with nothing as (select 1 as num) + select * from nothing where num = 1 + {%- endset -%} + {{ query_with_rows }} +{%- endtest -%} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/system/dbg.sql b/tests/e2e_dbt_project/macros/system/dbg.sql new file mode 100644 index 000000000..5dcd88dfe --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/dbg.sql @@ -0,0 +1,3 @@ +{% macro dbg() %} + {% do debug() %} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/generate_schema_name.sql b/tests/e2e_dbt_project/macros/system/generate_schema_name.sql new file mode 100644 index 000000000..e30a0e706 --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/generate_schema_name.sql @@ -0,0 +1,12 @@ +{% macro generate_schema_name(custom_schema_name, node) -%} + {%- set default_schema = target.schema -%} + {% if not custom_schema_name %} + {% do return(default_schema) %} + {% endif %} + + {% if node.resource_type == "seed" %} + {% do return(custom_schema_name) %} + {% endif %} + + {% do return("{}_{}".format(default_schema, custom_schema_name)) %} +{%- endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/materializations.sql b/tests/e2e_dbt_project/macros/system/materializations.sql new file mode 100644 index 000000000..89de786c2 --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/materializations.sql @@ -0,0 +1,7 @@ +{% materialization test, default %} + {% do return(elementary.materialization_test_default()) %} +{% endmaterialization %} + +{% materialization test, adapter="snowflake" %} + {% do return(elementary.materialization_test_snowflake()) %} +{% endmaterialization %} diff --git a/tests/e2e_dbt_project/models/any_type_column_anomalies.sql b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql new file mode 100644 index 000000000..2714303fa --- /dev/null +++ b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql @@ -0,0 +1,30 @@ +with training as ( + select * from {{ ref('any_type_column_anomalies_training') }} +), + +validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + occurred_at, + null_count_str, + null_percent_str, + null_count_float, + null_percent_float, + null_count_int, + null_percent_int, + null_count_bool, + null_percent_bool + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql new file mode 100644 index 000000000..56cfb2357 --- /dev/null +++ b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql @@ -0,0 +1,23 @@ +with training as ( + select * from {{ ref('backfill_days_column_anomalies_training') }} +), + +validation as ( + select * from {{ ref('backfill_days_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + +final as ( + select + updated_at, + occurred_at, + min_length + from source +) + +select * from final diff --git a/tests/e2e_dbt_project/models/config_levels_project.sql b/tests/e2e_dbt_project/models/config_levels_project.sql new file mode 100644 index 000000000..3ed516051 --- /dev/null +++ b/tests/e2e_dbt_project/models/config_levels_project.sql @@ -0,0 +1 @@ +select * from {{ ref('any_type_column_anomalies_validation') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/config_levels_test_and_model.sql b/tests/e2e_dbt_project/models/config_levels_test_and_model.sql new file mode 100644 index 000000000..3ed516051 --- /dev/null +++ b/tests/e2e_dbt_project/models/config_levels_test_and_model.sql @@ -0,0 +1 @@ +select * from {{ ref('any_type_column_anomalies_validation') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql b/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql new file mode 100644 index 000000000..b0db44fcc --- /dev/null +++ b/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql @@ -0,0 +1 @@ +select * from {{ ref("numeric_column_anomalies") }} diff --git a/tests/e2e_dbt_project/models/dimension_anomalies.sql b/tests/e2e_dbt_project/models/dimension_anomalies.sql new file mode 100644 index 000000000..cee7364db --- /dev/null +++ b/tests/e2e_dbt_project/models/dimension_anomalies.sql @@ -0,0 +1,24 @@ +with training as ( + select * from {{ ref('dimension_anomalies_training') }} +), + +validation as ( + select * from {{ ref('dimension_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + platform, + version, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/ephemeral_model.sql b/tests/e2e_dbt_project/models/ephemeral_model.sql new file mode 100644 index 000000000..a4c2b477b --- /dev/null +++ b/tests/e2e_dbt_project/models/ephemeral_model.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='ephemeral' + ) +}} + +select * from {{ ref('any_type_column_anomalies_training') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/error_model.sql b/tests/e2e_dbt_project/models/error_model.sql new file mode 100644 index 000000000..e76eb266d --- /dev/null +++ b/tests/e2e_dbt_project/models/error_model.sql @@ -0,0 +1 @@ +select 'a's as string diff --git a/tests/e2e_dbt_project/models/groups.sql b/tests/e2e_dbt_project/models/groups.sql new file mode 100644 index 000000000..bfda126dd --- /dev/null +++ b/tests/e2e_dbt_project/models/groups.sql @@ -0,0 +1 @@ +select * from {{ ref('groups_validation') }} diff --git a/tests/e2e_dbt_project/models/nested/models/tree/nested.sql b/tests/e2e_dbt_project/models/nested/models/tree/nested.sql new file mode 100644 index 000000000..ec17f8541 --- /dev/null +++ b/tests/e2e_dbt_project/models/nested/models/tree/nested.sql @@ -0,0 +1 @@ +select 1 as one diff --git a/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql new file mode 100644 index 000000000..2714303fa --- /dev/null +++ b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql @@ -0,0 +1,30 @@ +with training as ( + select * from {{ ref('any_type_column_anomalies_training') }} +), + +validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + occurred_at, + null_count_str, + null_percent_str, + null_count_float, + null_percent_float, + null_count_int, + null_percent_int, + null_count_bool, + null_percent_bool + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/non_dbt_model.sql b/tests/e2e_dbt_project/models/non_dbt_model.sql new file mode 100644 index 000000000..4a7301931 --- /dev/null +++ b/tests/e2e_dbt_project/models/non_dbt_model.sql @@ -0,0 +1,3 @@ +{{ config(materialized='non_dbt') }} + SELECT 1 +-- depends_on: {{ ref('one') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/numeric_column_anomalies.sql b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql new file mode 100644 index 000000000..8eebe37d8 --- /dev/null +++ b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql @@ -0,0 +1,30 @@ +with training as ( + select * from {{ ref('numeric_column_anomalies_training') }} +), + +validation as ( + select * from {{ ref('numeric_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + occurred_at, + min, + max, + zero_count, + zero_percent, + average, + standard_deviation, + variance, + sum + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/one.sql b/tests/e2e_dbt_project/models/one.sql new file mode 100644 index 000000000..ec17f8541 --- /dev/null +++ b/tests/e2e_dbt_project/models/one.sql @@ -0,0 +1 @@ +select 1 as one diff --git a/tests/e2e_dbt_project/models/schema.yml b/tests/e2e_dbt_project/models/schema.yml new file mode 100644 index 000000000..31f376284 --- /dev/null +++ b/tests/e2e_dbt_project/models/schema.yml @@ -0,0 +1,734 @@ +version: 2 + +models: + - name: one + config: + tags: "{{ var('one_tags', []) }}" + meta: + owner: "{{ var('one_owner', none) }}" + columns: + - name: one + tests: + - accepted_values: + meta: + owner: "@elon" + values: [2, 3] + + - name: any_type_column_anomalies + meta: + owner: ["@edr"] + subscribers: "@egk" + description: > + This is a very weird description + with breaklines + and comma, + and even a string like this 'wow'. You know, these $##$34#@#!^ can also be helpful + WDYT? + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + time_bucket: + period: hour + count: 4 + meta: + description: > + This is a very weird description + with breaklines + and comma, + and even a string like this 'wow'. You know, these $##$34#@#!^ can also be helpful + WDYT? + config: + severity: warn + tags: ["table_anomalies"] + - elementary.volume_anomalies: + time_bucket: + period: week + count: 1 + config: + severity: warn + where: 1=1 + tags: ["table_anomalies"] + - elementary.all_columns_anomalies: + tags: ["all_any_type_columns_anomalies", "column_anomalies"] + #This here is to simulate a long test name as test params are part of the test name + exclude_regexp: ".*column1|column2|column3|column4|column5|column6|column7|column8|column9|column10|column11|column12|column13|column14|column15|column16|column17.*" + - generic_test_on_model: + tags: ["regular_tests"] + - elementary.all_columns_anomalies: + anomaly_direction: "drop" + where: 1=1 + tags: ["directional_anomalies", "drop"] + - elementary.all_columns_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + + - name: no_timestamp_anomalies + meta: + owner: "elon@elementary-data.com, or@elementary-data.com" + subscribers: ["elon@elementary-data.com"] + description: This is a description. + description: We use this model to test anomalies when there is no timestamp column + tests: + - elementary.volume_anomalies: + tags: ["no_timestamp"] + + columns: + - name: "null_count_str" + tests: + - elementary.column_anomalies: + tags: ["no_timestamp"] + where: 1=1 + column_anomalies: + - null_count + + - name: dimension_anomalies + meta: + owner: "egk" + subscribers: "elon, egk" + description: We use this model to test dimension anomalies + tests: + - elementary.dimension_anomalies: + tags: ["dimension_anomalies", "should_fail"] + alias: "dimension_anomalies_platform" + timestamp_column: updated_at + where: 1=1 + dimensions: + - platform + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_where_expression" + tags: ["dimension_anomalies"] + timestamp_column: updated_at + dimensions: + - platform + where_expression: "platform = 'android'" + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_new_dimension" + tags: ["dimension_anomalies"] + timestamp_column: updated_at + dimensions: + - platform + where_expression: "platform = 'windows'" + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_new_dimension_no_timestamp" + tags: ["dimension_anomalies"] + dimensions: + - platform + where_expression: "platform = 'windows'" + - elementary.dimension_anomalies: + tags: ["dimension_anomalies", "should_fail"] + alias: "dimension_anomalies_platform_version" + timestamp_column: updated_at + dimensions: + - platform + - version + - elementary.dimension_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + timestamp_column: updated_at + dimensions: + - platform + - elementary.dimension_anomalies: + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + timestamp_column: updated_at + dimensions: + - platform + - elementary.dimension_anomalies: + dimensions: + - platform + tags: ["dimension_anomalies"] + alias: "dimension_anomalies_no_timestamp" + + - name: error_model + description: We use this model to create error runs and tests + meta: + owner: ["elon@elementary-data.com", "@elon", "egk"] + config: + tags: ["error_model"] + columns: + - name: "missing_column" + tests: + - uniques: + tags: ["error_test", "regular_tests"] + + - name: backfill_days_column_anomalies + config: + elementary: + timestamp_column: updated_at + columns: + - name: "min_length" + tests: + - elementary.column_anomalies: + column_anomalies: + - min_length + - max_length + tags: ["backfill_days"] + - elementary.column_anomalies: + backfill_days: 7 + column_anomalies: + - min_length + - max_length + tags: ["backfill_days"] + + - name: string_column_anomalies + meta: + owner: "@or" + tags: ["marketing"] + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - elementary.all_columns_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - elementary.schema_changes: + where: 1=1 + tags: ["schema_changes"] + columns: + - name: "min_length" + tests: + - relationships: + tags: ["regular_tests"] + to: source('training', 'string_column_anomalies_training') + field: max_length + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + column_anomalies: + - min_length + - max_length + - missing_count + - name: max_length + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: average_length + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + column_anomalies: + - average_length + - null_count + - name: missing_count + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: missing_percent + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: updated_at + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + + - name: numeric_column_anomalies + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.volume_anomalies: + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + - elementary.volume_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.all_columns_anomalies: + tags: ["all_numeric_columns_anomalies"] + column_anomalies: + - average_length + - null_count + columns: + - name: min + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - name: max + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - elementary.column_anomalies: + column_anomalies: + - average + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + - elementary.column_anomalies: + column_anomalies: + - average + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + - name: average + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - name: zero_count + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: zero_percent + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: updated_at + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: variance + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: standard_deviation + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: sum + tests: + - elementary.column_anomalies: + column_anomalies: + - sum + tags: ["numeric_column_anomalies", "column_anomalies"] + + - name: copy_numeric_column_anomalies + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.all_columns_anomalies: + column_anomalies: + - zero_count + tags: ["numeric_column_anomalies", "column_anomalies"] + + - name: groups + columns: + - name: group_a + data_type: "{{ 'strIng' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'CHArACTER varying' if target.type == 'redshift' else 'teXt' }}" + - name: group_b + data_type: double + - name: group_c + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.schema_changes_from_baseline: + fail_on_added: true + where: 1=1 + tags: ["schema_changes_from_baseline"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline", "error_test"] + enforce_types: true + + - name: stats_players + columns: + - name: player + data_type: "{{ 'STRING' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'character varying' if target.type == 'redshift' else 'TEXT' }}" + - name: goals + data_type: BOOLEAN + - name: coffee_cups_consumed + data_type: INTEGER + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline"] + enforce_types: true + + - name: stats_team + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + + - name: users_per_day_weekly_seasonal + config: + elementary: + backfill_days: 14 + tests: + - elementary.volume_anomalies: + alias: day_of_week_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + - elementary.volume_anomalies: + alias: day_of_week_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + seasonality: day_of_week + - elementary.volume_anomalies: + alias: hour_of_week_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + - elementary.volume_anomalies: + alias: hour_of_week_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + seasonality: hour_of_week + + - name: users_per_hour_daily_seasonal + tests: + - elementary.volume_anomalies: + alias: hour_of_day_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + - elementary.volume_anomalies: + alias: hour_of_day_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + seasonality: hour_of_day + + - name: ephemeral_model + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + tags: ["ephemeral_model", "error_test"] + - elementary.all_columns_anomalies: + tags: ["ephemeral_model", "error_test"] + - elementary.freshness_anomalies: + where: 1=1 + tags: ["ephemeral_model", "error_test"] + - elementary.schema_changes: + tags: ["ephemeral_model", "error_test"] + + - name: config_levels_test_and_model + tags: ["config_levels"] + config: + elementary: + min_training_set_size: 22 + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + tests: + - config_levels: + tags: ["config_levels"] + alias: "test_level_config" + min_training_set_size: 18 + days_back: 5 + backfill_days: 5 + seasonality: "day_of_week" + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + expected_config: ## Test level expected config + seasonality: "day_of_week" + min_training_set_size: 18 + days_back: 35 ## *7 because of seasonality + backfill_days: 5 + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + - config_levels: + tags: ["config_levels"] + alias: "model_level_config" + expected_config: ## Model level expected config + min_training_set_size: 22 + seasonality: null + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + - name: config_levels_project + tags: ["config_levels"] + tests: + - config_levels: + tags: ["config_levels"] + alias: "project_level_config" + expected_config: ## Project level expected config + min_training_set_size: 14 + seasonality: null + days_back: 30 + backfill_days: 2 + anomaly_direction: both + anomaly_sensitivity: 3 + where_expression: null + timestamp_column: null + time_bucket: + period: day + count: 1 + +sources: + - name: training + schema: test_seeds + tables: + - name: users_per_hour_daily_seasonal_training + columns: + - name: "user_id" + tests: + - relationships: + tags: ["regular_tests"] + to: source('training', 'users_per_day_weekly_seasonal_training') + field: user_id + - name: any_type_column_anomalies_training + meta: + owner: ["@edr", "egk"] + freshness: + error_after: + count: 1 + period: minute + loaded_at_field: updated_at + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.freshness_anomalies: + tags: ["table_anomalies", "error_test"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + - name: string_column_anomalies_training + meta: + owner: "@edr" + elementary: + timestamp_column: updated_at + freshness: + error_after: + count: 1 + period: minute + loaded_at_field: no_such_column + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - name: numeric_column_anomalies_training + meta: + elementary: + min_training_set_size: 22 + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + tests: + - config_levels: + tags: ["config_levels"] + alias: "test_level_config" + min_training_set_size: 18 + days_back: 5 + backfill_days: 5 + seasonality: "day_of_week" + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + expected_config: ## Test level expected config + seasonality: "day_of_week" + min_training_set_size: 18 + days_back: 35 ## *7 because of seasonality + backfill_days: 5 + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + - config_levels: + tags: ["config_levels"] + alias: "model_level_config" + expected_config: ## Model level expected config + min_training_set_size: 22 + seasonality: null + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + + - name: users_per_day_weekly_seasonal_training + - name: validation + schema: test_seeds + tables: + - name: users_per_hour_daily_seasonal_validation + - name: any_type_column_anomalies_validation + meta: + owner: "hello, world" + freshness: + warn_after: + count: 1 + period: minute + loaded_at_field: updated_at + tests: + - elementary.all_columns_anomalies: + tags: ["elementary_source"] + columns: + - name: null_count_int + tests: + - generic_test_on_column: + tags: ["regular_tests"] + - name: users_per_day_weekly_seasonal_validation + +exposures: + - name: elementary_exposure + type: application + maturity: medium + url: https://elementary.not.really + description: > + Keep calm, Elementary tests exposures. + depends_on: + - ref('error_model') + - source('training', 'any_type_column_anomalies_training') + owner: + name: Complete Nonsense + email: fake@fakerson.com + tags: + - marketing + + - name: weekly_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/1 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: Tableau + workbook: By the Week + path: ByTheWeek/Jaffles + + - name: monthly_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/2 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: Looker + workbook: By the Month + path: ByTheMonth/Jaffles + + - name: daily_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/3 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: bi.tool + workbook: By the Day + path: ByTheDay/Jaffles diff --git a/tests/e2e_dbt_project/models/stats_players.sql b/tests/e2e_dbt_project/models/stats_players.sql new file mode 100644 index 000000000..72ed8c603 --- /dev/null +++ b/tests/e2e_dbt_project/models/stats_players.sql @@ -0,0 +1 @@ +select * from {{ ref('stats_players_validation') }} diff --git a/tests/e2e_dbt_project/models/stats_team.sql b/tests/e2e_dbt_project/models/stats_team.sql new file mode 100644 index 000000000..10dc3cb20 --- /dev/null +++ b/tests/e2e_dbt_project/models/stats_team.sql @@ -0,0 +1 @@ +select * from {{ ref('stats_team_validation') }} diff --git a/tests/e2e_dbt_project/models/string_column_anomalies.sql b/tests/e2e_dbt_project/models/string_column_anomalies.sql new file mode 100644 index 000000000..29e8e43fe --- /dev/null +++ b/tests/e2e_dbt_project/models/string_column_anomalies.sql @@ -0,0 +1,27 @@ +with training as ( + select * from {{ ref('string_column_anomalies_training') }} +), + +validation as ( + select * from {{ ref('string_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + +final as ( + select + updated_at, + occurred_at, + min_length, + max_length, + average_length, + missing_count, + missing_percent + from source +) + +select * from final diff --git a/tests/e2e_dbt_project/models/test_alerts_union.sql b/tests/e2e_dbt_project/models/test_alerts_union.sql new file mode 100644 index 000000000..0567ab927 --- /dev/null +++ b/tests/e2e_dbt_project/models/test_alerts_union.sql @@ -0,0 +1,18 @@ +with dbt as ( + select * from {{ ref('alerts_dbt_tests') }} +), +{%- if target.type != 'databricks' %} +schema_changes as ( + select * from {{ ref('alerts_schema_changes') }} +), +{%- endif %} +anomalies as ( + select * from {{ ref('alerts_anomaly_detection') }} +) +select * from dbt +union all +select * from anomalies +{%- if target.type != 'databricks' %} +union all +select * from schema_changes +{%- endif %} diff --git a/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql new file mode 100644 index 000000000..f988e4e53 --- /dev/null +++ b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql @@ -0,0 +1,22 @@ +with training as ( + select * from {{ source('training', 'users_per_day_weekly_seasonal_training') }} +), + +validation as ( + select * from {{ source('validation', 'users_per_day_weekly_seasonal_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql new file mode 100644 index 000000000..82f550e13 --- /dev/null +++ b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql @@ -0,0 +1,22 @@ +with training as ( + select * from {{ source('training', 'users_per_hour_daily_seasonal_training') }} +), + +validation as ( + select * from {{ source('validation', 'users_per_hour_daily_seasonal_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), + + final as ( + select + updated_at, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/packages.yml b/tests/e2e_dbt_project/packages.yml new file mode 100644 index 000000000..ea515e82a --- /dev/null +++ b/tests/e2e_dbt_project/packages.yml @@ -0,0 +1,4 @@ +packages: + - local: "{{ env_var('ELEMENTARY_DBT_PACKAGE_PATH') }}" + - package: dbt-labs/dbt_utils + version: [">=0.8.0", "<2.0.0"] diff --git a/tests/e2e_dbt_project/snapshots/failed_snapshot.sql b/tests/e2e_dbt_project/snapshots/failed_snapshot.sql new file mode 100644 index 000000000..0c92e6afa --- /dev/null +++ b/tests/e2e_dbt_project/snapshots/failed_snapshot.sql @@ -0,0 +1,12 @@ +{% snapshot failed_snapshot() %} + +{{ + config( + target_schema='snapshots', + unique_key='unique_id', + strategy='timestamp', + updated_at='generated_at', + ) +}} + SELECT FAILED_SNAPSHOT +{% endsnapshot %} diff --git a/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql new file mode 100644 index 000000000..f7dfdc589 --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql @@ -0,0 +1,2 @@ +{% set relation = api.Relation.create(database=elementary.target_database(), schema=target.schema, identifier='numeric_column_anomalies') %} +select min from {{ relation }} where min < 100 diff --git a/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql new file mode 100644 index 000000000..22931a7fb --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql @@ -0,0 +1 @@ +select min from {{ ref('numeric_column_anomalies') }} where min < 100 \ No newline at end of file diff --git a/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql new file mode 100644 index 000000000..2d61ce9a1 --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql @@ -0,0 +1 @@ +select min from {{ source('training', 'numeric_column_anomalies_training') }} where min < 105 \ No newline at end of file diff --git a/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql b/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql new file mode 100644 index 000000000..2524955af --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql @@ -0,0 +1,15 @@ +with min_len_issues as ( + select null_count_int as min_issue from {{ ref('any_type_column_anomalies') }} where null_count_int < 100 +), + +min_issues as ( + select min as min_issue from {{ ref('numeric_column_anomalies') }} where min < 100 +), + +all_issues as ( + select * from min_len_issues + union all + select * from min_issues +) + +select * from all_issues