From b5d8e089ffa9cbf1788ac22f4f19d011c2729489 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 13:54:11 +0300 Subject: [PATCH 01/11] add the former deprecated tests project from the package to the CLI as the e2e project --- .gitignore | 2 +- tests/e2e_dbt_project/README.md | 2 + .../any_type_column_anomalies_training.csv | 0 ...ackfill_days_column_anomalies_training.csv | 0 .../training/dimension_anomalies_training.csv | 0 .../data/training/groups_training.csv | 5 + .../numeric_column_anomalies_training.csv | 0 .../data/training/stats_players_training.csv | 180 +++++ .../data/training/stats_team_training.csv | 17 + .../string_column_anomalies_training.csv | 0 .../any_type_column_anomalies_validation.csv | 0 ...kfill_days_column_anomalies_validation.csv | 0 .../dimension_anomalies_validation.csv | 0 .../data/validation/groups_validation.csv | 5 + .../numeric_column_anomalies_validation.csv | 0 .../validation/stats_players_validation.csv | 2 + .../data/validation/stats_team_validation.csv | 17 + .../string_column_anomalies_validation.csv | 0 tests/e2e_dbt_project/dbt_project.yml | 29 + tests/e2e_dbt_project/debug.sh | 5 + tests/e2e_dbt_project/generate_data.py | 456 +++++++++++ tests/e2e_dbt_project/macros/asserts.sql | 19 + .../macros/e2e_tests/clear_tests.sql | 39 + .../macros/e2e_tests/test_config_levels.sql | 53 ++ .../macros/e2e_tests/tests_validation.sql | 163 ++++ .../e2e_tests/utils/list_assertions.sql | 65 ++ .../e2e_tests/utils/table_assertions.sql | 19 + .../e2e_tests/validate_backfill_days.sql | 10 + .../e2e_tests/validate_column_anomalies.sql | 79 ++ .../e2e_tests/validate_config_levels.sql | 18 + .../validate_dimensions_anomalies.sql | 41 + .../validate_directional_anomalies.sql | 40 + .../validate_freshness_anomalies.sql | 14 + .../e2e_tests/validate_schema_changes.sql | 61 ++ .../validate_seasonal_volume_anomalies.sql | 16 + .../e2e_tests/validate_table_anomalies.sql | 25 + .../generic_tests/generic_test_on_column.sql | 7 + .../generic_tests/generic_test_on_model.sql | 7 + tests/e2e_dbt_project/macros/system/dbg.sql | 3 + .../macros/system/generate_schema_name.sql | 12 + .../macros/system/materializations.sql | 7 + .../macros/system/read_table.sql | 18 + .../macros/system/return_config_var.sql | 6 + ...fic_macros_have_default_implementation.sql | 14 + .../models/any_type_column_anomalies.sql | 36 + .../models/backfill_days_column_anomalies.sql | 29 + .../models/config_levels_project.sql | 1 + .../models/config_levels_test_and_model.sql | 1 + .../models/copy_numeric_column_anomalies.sql | 1 + .../models/dimension_anomalies.sql | 30 + .../models/ephemeral_model.sql | 7 + tests/e2e_dbt_project/models/error_model.sql | 1 + tests/e2e_dbt_project/models/groups.sql | 5 + .../models/nested/models/tree/nested.sql | 1 + .../models/no_timestamp_anomalies.sql | 36 + .../e2e_dbt_project/models/non_dbt_model.sql | 3 + .../models/numeric_column_anomalies.sql | 36 + tests/e2e_dbt_project/models/one.sql | 1 + tests/e2e_dbt_project/models/schema.yml | 734 ++++++++++++++++++ .../e2e_dbt_project/models/stats_players.sql | 5 + tests/e2e_dbt_project/models/stats_team.sql | 5 + .../models/string_column_anomalies.sql | 33 + .../models/test_alerts_union.sql | 18 + .../models/users_per_day_weekly_seasonal.sql | 28 + .../models/users_per_hour_daily_seasonal.sql | 28 + tests/e2e_dbt_project/packages.yml | 10 + .../snapshots/failed_snapshot.sql | 12 + .../tests/singular_test_with_no_ref.sql | 2 + .../tests/singular_test_with_one_ref.sql | 1 + .../tests/singular_test_with_source_ref.sql | 1 + .../tests/singular_test_with_two_refs.sql | 15 + 71 files changed, 2535 insertions(+), 1 deletion(-) create mode 100644 tests/e2e_dbt_project/README.md create mode 100644 tests/e2e_dbt_project/data/training/any_type_column_anomalies_training.csv create mode 100644 tests/e2e_dbt_project/data/training/backfill_days_column_anomalies_training.csv create mode 100644 tests/e2e_dbt_project/data/training/dimension_anomalies_training.csv create mode 100644 tests/e2e_dbt_project/data/training/groups_training.csv create mode 100644 tests/e2e_dbt_project/data/training/numeric_column_anomalies_training.csv create mode 100644 tests/e2e_dbt_project/data/training/stats_players_training.csv create mode 100644 tests/e2e_dbt_project/data/training/stats_team_training.csv create mode 100644 tests/e2e_dbt_project/data/training/string_column_anomalies_training.csv create mode 100644 tests/e2e_dbt_project/data/validation/any_type_column_anomalies_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/backfill_days_column_anomalies_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/dimension_anomalies_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/groups_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/numeric_column_anomalies_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/stats_players_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/stats_team_validation.csv create mode 100644 tests/e2e_dbt_project/data/validation/string_column_anomalies_validation.csv create mode 100644 tests/e2e_dbt_project/dbt_project.yml create mode 100644 tests/e2e_dbt_project/debug.sh create mode 100644 tests/e2e_dbt_project/generate_data.py create mode 100644 tests/e2e_dbt_project/macros/asserts.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql create mode 100644 tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql create mode 100644 tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql create mode 100644 tests/e2e_dbt_project/macros/system/dbg.sql create mode 100644 tests/e2e_dbt_project/macros/system/generate_schema_name.sql create mode 100644 tests/e2e_dbt_project/macros/system/materializations.sql create mode 100644 tests/e2e_dbt_project/macros/system/read_table.sql create mode 100644 tests/e2e_dbt_project/macros/system/return_config_var.sql create mode 100644 tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql create mode 100644 tests/e2e_dbt_project/models/any_type_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/config_levels_project.sql create mode 100644 tests/e2e_dbt_project/models/config_levels_test_and_model.sql create mode 100644 tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/dimension_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/ephemeral_model.sql create mode 100644 tests/e2e_dbt_project/models/error_model.sql create mode 100644 tests/e2e_dbt_project/models/groups.sql create mode 100644 tests/e2e_dbt_project/models/nested/models/tree/nested.sql create mode 100644 tests/e2e_dbt_project/models/no_timestamp_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/non_dbt_model.sql create mode 100644 tests/e2e_dbt_project/models/numeric_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/one.sql create mode 100644 tests/e2e_dbt_project/models/schema.yml create mode 100644 tests/e2e_dbt_project/models/stats_players.sql create mode 100644 tests/e2e_dbt_project/models/stats_team.sql create mode 100644 tests/e2e_dbt_project/models/string_column_anomalies.sql create mode 100644 tests/e2e_dbt_project/models/test_alerts_union.sql create mode 100644 tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql create mode 100644 tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql create mode 100644 tests/e2e_dbt_project/packages.yml create mode 100644 tests/e2e_dbt_project/snapshots/failed_snapshot.sql create mode 100644 tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql create mode 100644 tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql create mode 100644 tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql create mode 100644 tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql diff --git a/.gitignore b/.gitignore index d37765858..30d4f2d51 100644 --- a/.gitignore +++ b/.gitignore @@ -96,4 +96,4 @@ venv/ # elementary outputs edr_target/ -tests/tests_with_db/dbt_project/dbt_packages/ +**/dbt_packages/ diff --git a/tests/e2e_dbt_project/README.md b/tests/e2e_dbt_project/README.md new file mode 100644 index 000000000..353d624a2 --- /dev/null +++ b/tests/e2e_dbt_project/README.md @@ -0,0 +1,2 @@ +This dbt project is intended for our CLI e2e. +The "edr report" and "edr monitor" commands in `test-warehouse.yml` will run on the data generated by this project. diff --git a/tests/e2e_dbt_project/data/training/any_type_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/any_type_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/backfill_days_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/backfill_days_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/dimension_anomalies_training.csv b/tests/e2e_dbt_project/data/training/dimension_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/groups_training.csv b/tests/e2e_dbt_project/data/training/groups_training.csv new file mode 100644 index 000000000..ddef977cd --- /dev/null +++ b/tests/e2e_dbt_project/data/training/groups_training.csv @@ -0,0 +1,5 @@ +group_a,group_b,group_c,group_d +Poland,Netherlands,Spain,Ukraine +Greece,Denmark,Italy,Sweden +Russia,Germany,Ireland,France +Czech Republic,Portugal,Croatia,England diff --git a/tests/e2e_dbt_project/data/training/numeric_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/numeric_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/training/stats_players_training.csv b/tests/e2e_dbt_project/data/training/stats_players_training.csv new file mode 100644 index 000000000..906cf0c95 --- /dev/null +++ b/tests/e2e_dbt_project/data/training/stats_players_training.csv @@ -0,0 +1,180 @@ +Player,Team,Goals,shots_on_target,Passes,Shots_without_scoring,Crosses,Tackles,Interceptions,Goals_conceded,Saves_made,Offsides,Yellow_cards +Darijo Srna,Croatia,,,,,16,13,,,,, +Gordon Schildenfeld,Croatia,,,,,,,,,,, +Ivan Perisic,Croatia,,,,,,,,,,, +Ivan Rakitic,Croatia,,,,,,,,,,, +Ivan Strinic,Croatia,,,,,14,,,,,, +Luka Modric,Croatia,,,,,,,,,,, +Mario Mandzukic,Croatia,3,4,,,,,,,,, +Nikica Jelavic,Croatia,,,,,,,,,,, +Stipe Pletikosa,Croatia,,,,,,,,3,13,, +Vedran Corluka,Croatia,,,,,,,,,,, +David Limbersky,Czech Republic,,,,,,,,,,,2 +Jaroslav Plasil,Czech Republic,,,,,,,,,,, +Michal Kadlec,Czech Republic,,,,,,,,,,, +Milan Baros,Czech Republic,,,,,,,,,,3, +Petr Cech,Czech Republic,,,,,,,,6,9,, +Petr Jiracek,Czech Republic,2,,,,,,,,,, +Theodor Gebre Selassie,Czech Republic,,,,,,,,,,, +Tomás Hübschman,Czech Republic,,,,,,,12,,,, +Tomas Pekhart,Czech Republic,,,,,,,,,,, +Tomas Rosicky,Czech Republic,,,,,,,,,,, +Tomas Sivok,Czech Republic,,,,,,,,,,, +Vaclav Pilar,Czech Republic,2,4,,,,,,,,, +Daniel Agger,Denmark,,,,,,,21,,,, +Dennis Rommedahl,Denmark,,,,,,,,,,3, +Jakob Poulsen,Denmark,,,,,,,,,,, +Michael Krohn-Dehli,Denmark,2,,,,,,,,,, +Nicklas Bendtner,Denmark,2,,,,,,,,,, +Niki Zimling,Denmark,,,,,,,,,,, +Simon Kjaer,Denmark,,,,,,,,,,, +Stephan Andersen,Denmark,,,,,,,,5,10,, +Tobias Mikkelsen,Denmark,,,,,,,,,,, +William Kvist,Denmark,,,,,,,12,,,, +Ashley Young,England,,,,,,,,,,, +Danny Welbeck,England,,,,,,,,,,, +Glen Johnson,England,,,,,,,,,,, +James Milner,England,,,,,17,,,,,, +Joe Hart,England,,,,,,,,3,22,, +John Terry,England,,,,,,,14,,,, +Joleon Lescott,England,,,,,,,,,,, +Scott Parker,England,,,,,,13,,,,, +Steven Gerrard,England,,,,,,18,,,,, +Theo Walcott,England,,,,,,,,,,, +Wayne Rooney,England,,,,,,,,,,, +Alou Diarra,France,,,,,,15,,,,, +Franck Ribéry,France,,,,6,14,,,,,, +Hugo Lloris,France,,,,,,,,5,6,, +Jérémy Menez,France,,4,,,,,,,,,2 +Karim Benzema,France,,6,,19,,,,,,3, +Mathieu Debuchy,France,,,,,,,,,,, +Olivier Giroud,France,,,,,,,,,,, +Patrice Evra,France,,,,,,,,,,, +Philippe Mexes,France,,,,,,,,,,,2 +Samir Nasri,France,,,,,,,,,,, +Yann M'Vila,France,,,,,,,,,,, +Yohan Cabaye,France,,,,,,,,,,, +Andre Schürrle,Germany,,,,7,,,,,,, +Bastian Schweinsteiger,Germany,,,415,,,13,,,,, +Holger Badstuber,Germany,,,,,,,,,,, +Jerome Boateng,Germany,,,,,17,,,,,, +Manuel Neuer,Germany,,,,,,,,6,10,, +Marco Reus,Germany,,4,,,,,,,,, +Mario Gomez,Germany,3,,,,,,,,,3, +Mats Hummels,Germany,,,,,,18,19,,,, +Mesut Özil,Germany,,5,319,,17,,,,,, +Miroslav Klose,Germany,,,,,,,,,,5, +Philipp Lahm,Germany,,,316,,,,,,,, +Sami Khedira,Germany,,,310,,,,,,,, +Thomas Müller,Germany,,,,,14,,,,,, +Toni Kroos,Germany,,,,,,,,,,, +Dimitris Salpingidis,Greece,2,,,,,,,,,5, +Georgios Samaras,Greece,,,,,,,,,,, +Georgios Tzavellas,Greece,,,,,,,,,,, +Giorgios Karagounis,Greece,,,,,,,,,,,2 +Jose Holebas,Greece,,,,,,,,,,,2 +Konstantinos Chalkias,Greece,,,,,,,,3,,, +Konstantinos Katsouranis,Greece,,,,,,,12,,,, +Kyriakos Papadopoulos,Greece,,,,,,,,,,, +Michalis Sifakis,Greece,,,,,,,,4,11,, +Sokratis Papastathopoulos,Greece,,,,,,,,,,, +Theofanis Gekas,Greece,,,,,,,,,,3, +Vassilas Torosidis,Greece,,,,,,,12,,,, +Alessandro Diamanti,Italy,,,,8,,,,,,, +Andrea Pirlo,Italy,,,423,,,,13,,,, +Antonio Cassano,Italy,,7,,,14,,,,,3, +Antonio Di Natale,Italy,,,,,,,,,,6, +Christian Maggio,Italy,,,,,,,,,,,2 +Claudio Marchisio,Italy,,4,,11,,14,,,,, +Daniele De Rossi,Italy,,,345,9,,,22,,,, +Gianluigi Buffon,Italy,,,,,,,,7,20,, +Giorgio Chiellini,Italy,,,,,,,16,,,, +Leonardo Bonucci,Italy,,,,,,,15,,,, +Mario Balotelli,Italy,3,10,,,,,,,,, +Riccardo Montolivo,Italy,,,,7,,,13,,,, +Thiago Motta,Italy,,,,,,,,,,,2 +Arjen Robben,Netherlands,,,,11,,,,,,, +Ibrahim Afellay,Netherlands,,,,,,,,,,, +Jetro Willems,Netherlands,,,,,,,,,,,2 +Johnny Heitinga,Netherlands,,,,,,,,,,, +Joris Mathijsen,Netherlands,,,,,,,,,,, +Maarten Stekelenburg,Netherlands,,,,,,,,5,12,, +Mark van Bommel,Netherlands,,,,,,,,,,, +Nigel de Jong,Netherlands,,,,,,,,,,, +Rafael van der Vaart,Netherlands,,,,,,,,,,, +Robin van Persie,Netherlands,,5,,,,,,,,, +Ron Vlaar,Netherlands,,,,,,,,,,, +Wesley Sneijder,Netherlands,,,,8,17,,,,,, +Damien Perquis,Poland,,,,,,,,,,, +Eugen Polanski,Poland,,,,,,16,,,,,2 +Jakub Blaszczykowski,Poland,,,,,,,,,,, +Ludovic Obraniak,Poland,,,,,19,,,,,, +Marcin Wasilewski,Poland,,,,,,,,,,, +Przemyslaw Tyton,Poland,,,,,,,,,6,, +Robert Lewandowski,Poland,,,,,,,,,,, +Wojciech Szczesny,Poland,,,,,,,,,,, +Bruno Alves,Portugal,,,,,,,,,,, +Cristiano Ronaldo,Portugal,3,9,,,,,,,,, +Fábio Coentrão,Portugal,,,,,18,14,,,,,2 +Helder Postiga,Portugal,,,,,,,,,,3, +Hugo Almeida,Portugal,,,,,,,,,,5, +João Moutinho,Portugal,,,,,,,15,,,, +João Pereira,Portugal,,,,,,17,,,,,2 +Miguel Veloso,Portugal,,,,,,,,,,,2 +Nani,Portugal,,,,9,19,,,,,, +Nélson Oliveira,Portugal,,,,,,,,,,, +Pepe,Portugal,,,,,,,16,,,, +Raul Meireles,Portugal,,,,,,,11,,,, +Rolando,Portugal,,,,,,,,,,, +Rui Patricio,Portugal,,,,,,,,4,10,, +Silvestre Varela,Portugal,,,,,,,,,,, +Damien Duff,Republic of Ireland,,,,,16,,,,,, +Glenn Whelan,Republic of Ireland,,,,,,,,,,, +Jonathan Walters,Republic of Ireland,,,,,,,,,,, +Keiren Westwood,Republic of Ireland,,,,,,,,2,,, +Keith Andrews,Republic of Ireland,,,,12,,,,,,, +Richard Dunne,Republic of Ireland,,,,,,,,,,, +Robbie Keane,Republic of Ireland,,,,,,,,,,7, +Sean St Ledger,Republic of Ireland,,,,,,,,,,,2 +Shay Given,Republic of Ireland,,,,,,,,9,17,, +Alan Dzagoev,Russia,3,,,,,,,,,, +Alexander Kerzhakov,Russia,,,,14,,,,,,, +Alexei Berezoutski,Russia,,,,,,,,,,, +Andrey Arshavin,Russia,,,,,15,,,,,, +Igor Denisov,Russia,,,303,,,,,,,, +Konstantin Zyryanov,Russia,,,,,,,,,,, +Roman Pavlyuchenko,Russia,,,,,,,,,,, +Roman Shirokov,Russia,,,,,,,,,,, +Sergei Ignashevitch,Russia,,,,,,,,,,, +Vyacheslav Malafeev,Russia,,,,,,,,3,10,, +Yuri Zhirkov,Russia,,,,,,,,,,, +Álvaro Arbeloa,Spain,,,319,,,20,,,,3, +Andrés Iniesta,Spain,,9,446,20,,,,,,, +David Silva,Spain,2,8,283,,,,,,,3, +Fernando Torres,Spain,3,5,,,,,,,,5, +Francesc Fábregas,Spain,2,,,,,,,,,, +Gerard Piqué,Spain,,,312,,,19,,,,, +Iker Casillas,Spain,,,,,,,,,15,, +Jesús Navas,Spain,,,,,16,,,,,, +Jordi Alba,Spain,,,410,,,,,,,, +Pedro,Spain,,,,,,,,,,, +Sergio Busquets,Spain,,,455,,,16,18,,,, +Sergio Ramos,Spain,,,394,7,,16,,,,,2 +Xabi Alonso,Spain,2,,585,,,16,,,,, +Xavi,Spain,,,592,12,13,,,,,, +Anders Svensson,Sweden,,,,,,,,,,,2 +Andreas Isaksson,Sweden,,,,,,,,5,8,, +Christian Wilhelmsson,Sweden,,,,,,,,,,, +Kim Källström,Sweden,,,,,,,,,,, +Ola Toivonen,Sweden,,,,,,,,,,3, +Olof Mellberg,Sweden,,,,,,,,,,, +Samuel Holmén,Sweden,,,,,,,,,,, +Sebastian Larsson,Sweden,,4,,,,,,,,, +Zlatan Ibrahimovic,Sweden,2,7,,,,,,,,, +Anatoliy Tymoshchuk,Ukraine,,,,,,14,,,,,2 +Andriy Pyatov,Ukraine,,,,,,,,4,13,, +Andriy Shevchenko,Ukraine,2,,,,,,,,,, +Andriy Yarmolenko,Ukraine,,,,,,,,,,, +Yevgen Selin,Ukraine,,,,,,,,,,, +Yevhen Khacheridi,Ukraine,,,,,,,,,,, +Yevhen Konoplyanka,Ukraine,,,,13,,,,,,, diff --git a/tests/e2e_dbt_project/data/training/stats_team_training.csv b/tests/e2e_dbt_project/data/training/stats_team_training.csv new file mode 100644 index 000000000..0413c75d5 --- /dev/null +++ b/tests/e2e_dbt_project/data/training/stats_team_training.csv @@ -0,0 +1,17 @@ +Team,Goals,Shots_on_target,Shots_off_target,Total_shots,Hit_Woodwork,Penalty_goals,Penalties_not_scored,Headed_goals,Passes,Passes_completed,Touches,Crosses,Dribbles,Corners_Taken,Tackles,Clearances,Interceptions,Clearances_off_line,Clean_Sheets,Blocks,Goals_conceded,Saves_made,Fouls_Won,Fouls_Conceded,Offsides,Yellow_Cards,Red_Cards,Subs_on,Subs_off,Players_Used +Croatia,4,13,12,32,0,0,0,2,1076,828,1706,60,42,14,49,83,56,,0,10,3,13,41,62,2,9,0,9,9,16 +Czech Republic,4,13,18,39,0,0,0,0,1565,1223,2358,46,68,21,62,98,37,2,1,10,6,9,53,73,8,7,0,11,11,19 +Denmark,4,10,10,27,1,0,0,3,1298,1082,1873,43,32,16,40,61,59,0,1,10,5,10,25,38,8,4,0,7,7,15 +England,5,11,18,40,0,0,0,3,1488,1200,2440,58,60,16,86,106,72,1,2,29,3,22,43,45,6,5,0,11,11,16 +France,3,22,24,65,1,0,0,0,2066,1803,2909,55,76,28,71,76,58,0,1,7,5,6,36,51,5,6,0,11,11,19 +Germany,10,32,32,80,2,1,0,2,2774,2427,3761,101,60,35,91,73,69,0,1,11,6,10,63,49,12,4,0,15,15,17 +Greece,5,8,18,32,1,1,1,0,1187,911,2016,52,53,10,65,123,87,0,1,23,7,13,67,48,12,9,1,12,12,20 +Italy,6,34,45,110,2,0,0,2,3016,2531,4363,75,75,30,98,137,136,1,2,18,7,20,101,89,16,16,0,18,18,19 +Netherlands,2,12,36,60,2,0,0,0,1556,1381,2163,50,49,22,34,41,41,0,0,9,5,12,35,30,3,5,0,7,7,15 +Poland,2,15,23,48,0,0,0,1,1059,852,1724,55,39,14,67,87,62,0,0,8,3,6,48,56,3,7,1,7,7,17 +Portugal,6,22,42,82,6,0,0,2,1891,1461,2958,91,64,41,78,92,86,0,2,11,4,10,73,90,10,12,0,14,14,16 +Republic of Ireland,1,7,12,28,0,0,0,1,851,606,1433,43,18,8,45,78,43,1,0,23,9,17,43,51,11,6,1,10,10,17 +Russia,5,9,31,59,2,0,0,1,1602,1345,2278,40,40,21,65,74,58,0,0,8,3,10,34,43,4,6,0,7,7,16 +Spain,12,42,33,100,0,1,0,2,4317,3820,5585,69,106,44,122,102,79,0,5,8,1,15,102,83,19,11,0,17,17,18 +Sweden,5,17,19,39,3,0,0,1,1192,965,1806,44,29,7,56,54,45,0,1,12,5,8,35,51,7,7,0,9,9,18 +Ukraine,2,7,26,38,0,0,0,2,1276,1043,1894,33,26,18,65,97,29,0,0,4,4,13,48,31,4,5,0,9,9,18 diff --git a/tests/e2e_dbt_project/data/training/string_column_anomalies_training.csv b/tests/e2e_dbt_project/data/training/string_column_anomalies_training.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/any_type_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/any_type_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/backfill_days_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/backfill_days_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/dimension_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/dimension_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/groups_validation.csv b/tests/e2e_dbt_project/data/validation/groups_validation.csv new file mode 100644 index 000000000..1bf171a69 --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/groups_validation.csv @@ -0,0 +1,5 @@ +group_b,group_c,group_d +Netherlands,Spain,Ukraine +Denmark,Italy,Sweden +Germany,Ireland,France +Portugal,Croatia,England diff --git a/tests/e2e_dbt_project/data/validation/numeric_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/numeric_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/data/validation/stats_players_validation.csv b/tests/e2e_dbt_project/data/validation/stats_players_validation.csv new file mode 100644 index 000000000..53806a69e --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/stats_players_validation.csv @@ -0,0 +1,2 @@ +key_crosses,red_cards,Player,Team,Goals,shots_on_target,Passes,Shots_without_scoring,Crosses,Tackles,Interceptions,Goals_conceded,Saves_made,Yellow_cards +bla,bla,Darijo Srna,Croatia,2,4,5,7,16,13,4,4,5,1 diff --git a/tests/e2e_dbt_project/data/validation/stats_team_validation.csv b/tests/e2e_dbt_project/data/validation/stats_team_validation.csv new file mode 100644 index 000000000..9d8dea2ff --- /dev/null +++ b/tests/e2e_dbt_project/data/validation/stats_team_validation.csv @@ -0,0 +1,17 @@ +Team,Goals,Shots_on_target,Shots_off_target,Total_shots,Hit_Woodwork,Penalty_goals,Penalties_not_scored,Headed_goals,Passes,Passes_completed,Touches,Crosses,Dribbles,Corners_Taken,Tackles,Clearances,Interceptions,Clearances_off_line,Clean_Sheets,Blocks,Goals_conceded,Saves_made,Fouls_Won,Fouls_Conceded,Offsides,Yellow_Cards,Red_Cards,Subs_on,Subs_off,Players_Used +Croatia,bla,13,12,32,0,0,0,2,1076,828,1706,60,42,14,49,83,56,,0,10,3,13,41,62,2,9,0,9,9,16 +Czech,bla,13,18,39,0,0,0,0,1565,1223,2358,46,68,21,62,98,37,2,1,10,6,9,53,73,8,7,0,11,11,19 +Denmark,bla,10,10,27,1,0,0,3,1298,1082,1873,43,32,16,40,61,59,0,1,10,5,10,25,38,8,4,0,7,7,15 +England,bla,11,18,40,0,0,0,3,1488,1200,2440,58,60,16,86,106,72,1,2,29,3,22,43,45,6,5,0,11,11,16 +France,bla,22,24,65,1,0,0,0,2066,1803,2909,55,76,28,71,76,58,0,1,7,5,6,36,51,5,6,0,11,11,19 +Germany,bla0,32,32,80,2,1,0,2,2774,2427,3761,101,60,35,91,73,69,0,1,11,6,10,63,49,12,4,0,15,15,17 +Greece,bla,8,18,32,1,1,1,0,1187,911,2016,52,53,10,65,123,87,0,1,23,7,13,67,48,12,9,1,12,12,20 +Italy,bla,34,45,110,2,0,0,2,3016,2531,4363,75,75,30,98,137,136,1,2,18,7,20,101,89,16,16,0,18,18,19 +Netherlands,bla,12,36,60,2,0,0,0,1556,1381,2163,50,49,22,34,41,41,0,0,9,5,12,35,30,3,5,0,7,7,15 +Poland,bla,15,23,48,0,0,0,1,1059,852,1724,55,39,14,67,87,62,0,0,8,3,6,48,56,3,7,1,7,7,17 +Portugal,bla,22,42,82,6,0,0,2,1891,1461,2958,91,64,41,78,92,86,0,2,11,4,10,73,90,10,12,0,14,14,16 +Republic,bla,7,12,28,0,0,0,1,851,606,1433,43,18,8,45,78,43,1,0,23,9,17,43,51,11,6,1,10,10,17 +Russia,bla,9,31,59,2,0,0,1,1602,1345,2278,40,40,21,65,74,58,0,0,8,3,10,34,43,4,6,0,7,7,16 +Spain,bla2,42,33,100,0,1,0,2,4317,3820,5585,69,106,44,122,102,79,0,5,8,1,15,102,83,19,11,0,17,17,18 +Sweden,bla,17,19,39,3,0,0,1,1192,965,1806,44,29,7,56,54,45,0,1,12,5,8,35,51,7,7,0,9,9,18 +Ukraine,bla,7,26,38,0,0,0,2,1276,1043,1894,33,26,18,65,97,29,0,0,4,4,13,48,31,4,5,0,9,9,18 diff --git a/tests/e2e_dbt_project/data/validation/string_column_anomalies_validation.csv b/tests/e2e_dbt_project/data/validation/string_column_anomalies_validation.csv new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e_dbt_project/dbt_project.yml b/tests/e2e_dbt_project/dbt_project.yml new file mode 100644 index 000000000..4c677dcfb --- /dev/null +++ b/tests/e2e_dbt_project/dbt_project.yml @@ -0,0 +1,29 @@ +name: "elementary_integration_tests" +version: "1.0.0" +config-version: 2 +profile: "elementary_tests" + +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + - "dbt_modules" + +vars: + days_back: 30 + debug_logs: "{{ env_var('DBT_EDR_DEBUG', False) }}" + custom_run_started_at: "{{ modules.datetime.datetime.utcfromtimestamp(0) }}" + +seeds: + +schema: test_seeds + +models: + elementary: + +schema: elementary diff --git a/tests/e2e_dbt_project/debug.sh b/tests/e2e_dbt_project/debug.sh new file mode 100644 index 000000000..fa26f9077 --- /dev/null +++ b/tests/e2e_dbt_project/debug.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +export DBT_EDR_DEBUG=1 +export DBT_MACRO_DEBUGGING=1 + diff --git a/tests/e2e_dbt_project/generate_data.py b/tests/e2e_dbt_project/generate_data.py new file mode 100644 index 000000000..e6b3f8faf --- /dev/null +++ b/tests/e2e_dbt_project/generate_data.py @@ -0,0 +1,456 @@ +import csv +import os +import random +import string +from datetime import datetime, timedelta +from pathlib import Path +from typing import List + +FILE_DIR = os.path.dirname(os.path.realpath(__file__)) + +EPOCH = datetime.utcfromtimestamp(0) +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + + +def generate_fake_data(): + generate_string_anomalies_training_and_validation_files() + generate_numeric_anomalies_training_and_validation_files() + generate_any_type_anomalies_training_and_validation_files() + generate_dimension_anomalies_training_and_validation_files() + generate_backfill_days_training_and_validation_files() + generate_seasonality_volume_anomalies_files() + + +def generate_rows_timestamps(base_date, period="days", count=1, days_back=30): + min_date = base_date - timedelta(days=days_back) + dates = [] + while base_date > min_date: + dates.append(base_date) + base_date = base_date - timedelta(**{period: count}) + return dates + + +def write_rows_to_csv(csv_path, rows, header): + # Creates the csv file directories if needed. + directory_path = Path(csv_path).parent.resolve() + Path(directory_path).mkdir(parents=True, exist_ok=True) + + with open(csv_path, "w") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=header) + writer.writeheader() + writer.writerows(rows) + + +def generate_rows(rows_count_per_day, dates, get_row_callback): + rows = [] + for date in dates: + for i in range(0, rows_count_per_day): + row = get_row_callback(date, i, rows_count_per_day) + rows.append(row) + return rows + + +def generate_string_anomalies_training_and_validation_files(rows_count_per_day=100): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + "max_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + "average_length": "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_count": "" + if row_index < (3 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_percent": "" + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(1, 10)) + ), + "max_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 15)) + ), + "average_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 8)) + ), + "missing_count": "" + if row_index < (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "missing_percent": "" + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + } + + string_columns = [ + "updated_at", + "occurred_at", + "min_length", + "max_length", + "average_length", + "missing_count", + "missing_percent", + ] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "string_column_anomalies_training.csv" + ), + training_rows, + string_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "string_column_anomalies_validation.csv" + ), + validation_rows, + string_columns, + ) + + +def generate_numeric_anomalies_training_and_validation_files(rows_count_per_day=200): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min": random.randint(100, 200), + "max": random.randint(100, 200), + "zero_count": 0 + if row_index < (3 / 100 * rows_count) + else random.randint(100, 200), + "zero_percent": 0 + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.randint(100, 200), + "average": random.randint(99, 101), + "standard_deviation": random.randint(99, 101), + "variance": random.randint(99, 101), + "sum": random.randint(100, 200), + } + + def get_validation_row(date, row_index, rows_count): + row_index += -(rows_count / 2) + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min": random.randint(10, 200), + "max": random.randint(100, 300), + "zero_count": 0 + if row_index < (80 / 100 * rows_count) + else random.randint(100, 200), + "zero_percent": 0 + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.randint(100, 200), + "average": random.randint(101, 110), + "standard_deviation": random.randint(80, 120), + "variance": random.randint(80, 120), + "sum": random.randint(300, 400), + } + + numeric_columns = [ + "updated_at", + "occurred_at", + "min", + "max", + "zero_count", + "zero_percent", + "average", + "standard_deviation", + "variance", + "sum", + ] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "numeric_column_anomalies_training.csv" + ), + training_rows, + numeric_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "numeric_column_anomalies_validation.csv" + ), + validation_rows, + numeric_columns, + ) + + +def generate_any_type_anomalies_training_and_validation_files(rows_count_per_day=300): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "null_count_str": None + if row_index < (3 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_percent_str": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_count_float": None + if row_index < (3 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_percent_float": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_count_int": None + if row_index < (3 / 100 * rows_count) + else random.randint(100, 200), + "null_percent_int": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else random.randint(100, 200), + "null_count_bool": None + if row_index < (3 / 100 * rows_count) + else bool(random.getrandbits(1)), + "null_percent_bool": None + if random.randint(1, rows_count) <= (20 / 100 * rows_count) + else bool(random.getrandbits(1)), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "null_count_str": None + if row_index < (80 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_percent_str": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else "".join(random.choices(string.ascii_lowercase, k=5)), + "null_count_float": None + if row_index < (80 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_percent_float": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.uniform(1.2, 8.9), + "null_count_int": None + if row_index < (80 / 100 * rows_count) + else random.randint(100, 200), + "null_percent_int": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else random.randint(100, 200), + "null_count_bool": None + if row_index < (80 / 100 * rows_count) + else bool(random.getrandbits(1)), + "null_percent_bool": None + if random.randint(1, rows_count) <= (60 / 100 * rows_count) + else bool(random.getrandbits(1)), + } + + any_type_columns = [ + "updated_at", + "occurred_at", + "null_count_str", + "null_percent_str", + "null_count_float", + "null_percent_float", + "null_count_int", + "null_percent_int", + "null_count_bool", + "null_percent_bool", + ] + dates = generate_rows_timestamps( + base_date=EPOCH - timedelta(days=2), period="hours", count=4 + ) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "any_type_column_anomalies_training.csv" + ), + training_rows, + any_type_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "any_type_column_anomalies_validation.csv" + ), + validation_rows, + any_type_columns, + ) + + +def generate_dimension_anomalies_training_and_validation_files(): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "platform": "android" if row_index < (rows_count - 20) else "ios", + "version": row_index % 3, + "user_id": random.randint(1, rows_count), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "platform": "android" if row_index < (rows_count - 1) else "ios", + "version": row_index % 3, + "user_id": random.randint(1, rows_count), + } + + dimension_columns = ["updated_at", "platform", "version", "user_id"] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=2)) + training_rows = generate_rows(1020, dates, get_training_row) + write_rows_to_csv( + os.path.join(FILE_DIR, "data", "training", "dimension_anomalies_training.csv"), + training_rows, + dimension_columns, + ) + + validation_date = EPOCH - timedelta(days=1) + validation_rows = generate_rows(1001, [validation_date], get_validation_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "validation", "dimension_anomalies_validation.csv" + ), + validation_rows, + dimension_columns, + ) + + +def generate_seasonal_data_files( + table_name: str, training_dates: List[datetime], validation_dates: List[datetime] +): + columns = ["updated_at", "user_id"] + training_rows = [] + for date in training_dates: + training_rows.extend( + [ + { + "updated_at": date.strftime(DATE_FORMAT), + "user_id": random.randint(1000, 9999), + } + for _ in range(700) + ] + ) + write_rows_to_csv( + csv_path=os.path.join( + FILE_DIR, "data", "training", f"{table_name}_training.csv" + ), + rows=training_rows, + header=columns, + ) + + validation_rows = [] + for date in validation_dates: + validation_rows.extend( + [ + { + "updated_at": date.strftime(DATE_FORMAT), + "user_id": random.randint(1000, 9999), + } + for _ in range(100) + ] + ) + write_rows_to_csv( + csv_path=os.path.join( + FILE_DIR, + "data", + "validation", + f"{table_name}_validation.csv", + ), + rows=validation_rows, + header=columns, + ) + + +def generate_day_of_week_data(): + training_dates = generate_rows_timestamps( + base_date=EPOCH - timedelta(days=1), period="weeks", days_back=(7 * 30) + ) + validation_dates = generate_rows_timestamps(base_date=EPOCH, days_back=1) + generate_seasonal_data_files( + "users_per_day_weekly_seasonal", training_dates, validation_dates + ) + + +def generate_hour_of_day_data(): + training_dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=1)) + validation_dates = generate_rows_timestamps(base_date=EPOCH, days_back=1) + generate_seasonal_data_files( + "users_per_hour_daily_seasonal", training_dates, validation_dates + ) + + +def generate_seasonality_volume_anomalies_files(): + generate_day_of_week_data() + generate_hour_of_day_data() + + +def generate_backfill_days_training_and_validation_files(rows_count_per_day=100): + def get_training_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=1)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(5, 10)) + ), + } + + def get_validation_row(date, row_index, rows_count): + return { + "updated_at": date.strftime(DATE_FORMAT), + "occurred_at": (date - timedelta(hours=7)).strftime(DATE_FORMAT), + "min_length": "".join( + random.choices(string.ascii_lowercase, k=random.randint(1, 10)) + ), + } + + string_columns = ["updated_at", "occurred_at", "min_length"] + dates = generate_rows_timestamps(base_date=EPOCH - timedelta(days=1)) + training_rows = generate_rows(rows_count_per_day, dates, get_training_row) + write_rows_to_csv( + os.path.join( + FILE_DIR, "data", "training", "backfill_days_column_anomalies_training.csv" + ), + training_rows, + string_columns, + ) + + validation_date = EPOCH - timedelta(days=5) + validation_rows = generate_rows( + rows_count_per_day, [validation_date], get_validation_row + ) + write_rows_to_csv( + os.path.join( + FILE_DIR, + "data", + "validation", + "backfill_days_column_anomalies_validation.csv", + ), + validation_rows, + string_columns, + ) + + +def main(): + print("Generating fake data!") + generate_fake_data() + print("Done. Please run 'dbt seed -f' to load the data into your database.") + + +if __name__ == "__main__": + main() diff --git a/tests/e2e_dbt_project/macros/asserts.sql b/tests/e2e_dbt_project/macros/asserts.sql new file mode 100644 index 000000000..e87d1d688 --- /dev/null +++ b/tests/e2e_dbt_project/macros/asserts.sql @@ -0,0 +1,19 @@ +{% macro assert_value(value, expected_value) %} + {% if value != expected_value %} + {% do elementary.edr_log("FAILED: value " ~ value ~ " does not equal to " ~ expected_value) %} + {{ return(1) }} + {% else %} + {% do elementary.edr_log("SUCCESS") %} + {{ return(0) }} + {% endif %} +{% endmacro %} + +{% macro assert_str_in_value(str, value) %} + {% if str not in value %} + {% do elementary.edr_log("FAILED: the string " ~ str ~ " was not found in " ~ value) %} + {{ return(1) }} + {% else %} + {% do elementary.edr_log("SUCCESS") %} + {{ return(0) }} + {% endif %} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql b/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql new file mode 100644 index 000000000..5dc7ebcaf --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql @@ -0,0 +1,39 @@ +{% macro clear_tests() %} + {% if execute %} + {% do elementary_integration_tests.edr_drop_schema(elementary.target_database(), target.schema) %} + + {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} + {% do elementary_integration_tests.edr_drop_schema(database_name, schema_name) %} + + {% set tests_schema_name = elementary.get_elementary_tests_schema(database_name, schema_name) %} + {% if tests_schema_name != schema_name %} + {% do elementary_integration_tests.edr_drop_schema(database_name, tests_schema_name) %} + {% else %} + {{ elementary.edr_log("Tests schema is the same as the main elementary schema, nothing to drop.") }} + {% endif %} + {% endif %} + {{ return('') }} +{% endmacro %} + +{% macro edr_drop_schema(database_name, schema_name) %} + {% do return(adapter.dispatch('edr_drop_schema','elementary_integration_tests')(database_name, schema_name)) %} +{% endmacro %} + +{% macro default__edr_drop_schema(database_name, schema_name) %} + {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name) %} + {% do dbt.drop_schema(schema_relation) %} + {% do adapter.commit() %} + {% do elementary.edr_log("dropped schema {}".format(schema_relation | string)) %} +{% endmacro %} + +{% macro clickhouse__edr_drop_schema(database_name, schema_name) %} + {% set results = run_query("SELECT name FROM system.tables WHERE database = '" ~ database_name ~ "'") %} + {% if execute %} + {% for row in results %} + {% set table = row[0] %} + {% do run_query("DROP TABLE IF EXISTS " ~ database_name ~ "." ~ table) %} + {% endfor %} + {% endif %} + {% do adapter.commit() %} + {% do elementary.edr_log("dropped schema {}".format(schema_name)) %} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql b/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql new file mode 100644 index 000000000..b63bda60b --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql @@ -0,0 +1,53 @@ +{% test config_levels(model, expected_config, timestamp_column, time_bucket, where_expression, anomaly_sensitivity, anomaly_direction, days_back, backfill_days, seasonality, min_training_set_size) %} + {%- if execute and elementary.is_test_command() %} + {%- set unexpected_config = [] %} + {%- set model_relation = dbt.load_relation(model) %} + + {% set configuration_dict, metric_properties_dict = + elementary.get_anomalies_test_configuration(model_relation, + mandatory_params, + timestamp_column, + where_expression, + anomaly_sensitivity, + anomaly_direction, + min_training_set_size, + time_bucket, + days_back, + backfill_days, + seasonality) %} + + {%- set configs_to_test = [('timestamp_column', metric_properties_dict.timestamp_column), + ('where_expression', metric_properties_dict.where_expression), + ('time_bucket', configuration_dict.time_bucket), + ('anomaly_sensitivity', configuration_dict.anomaly_sensitivity), + ('anomaly_direction', configuration_dict.anomaly_direction), + ('min_training_set_size', configuration_dict.min_training_set_size), + ('days_back', configuration_dict.days_back), + ('backfill_days', configuration_dict.backfill_days), + ('seasonality', configuration_dict.seasonality) + ] %} + + {%- for config in configs_to_test %} + {%- set config_name, config_value = config %} + {%- set config_check = compare_configs(config_name, config_value, expected_config) %} + {%- if config_check %} + {%- do unexpected_config.append(config_check) -%} + {%- endif %} + {%- endfor %} + + {%- if unexpected_config | length > 0 %} + {%- do exceptions.raise_compiler_error('Failure config_levels: ' ~ unexpected_config) -%} + {%- else %} + {#- test must run an sql query -#} + {{ elementary.no_results_query() }} + {%- endif %} + {%- endif %} +{%- endtest %} + +{% macro compare_configs(config_name, config, expected_config) %} + {%- if config != expected_config.get(config_name) %} + {%- set unexpected_message = ('For {0} - got config: {1}, expected config: {2}').format(config_name, config, expected_config.get(config_name) ) %} + {{ return(unexpected_message) }} + {%- endif %} + {{ return(none) }} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql b/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql new file mode 100644 index 000000000..b6b52c85d --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql @@ -0,0 +1,163 @@ +{% macro validate_no_timestamp_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + + {# Validating row count for no timestamp table anomaly #} + {% set no_timestamp_row_count_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and sub_type = 'row_count' + and upper(table_name) = 'NO_TIMESTAMP_ANOMALIES' + {% endset %} + {% set results = elementary.result_column_to_list(no_timestamp_row_count_validation_query) %} + {{ assert_lists_contain_same_items(results, ['no_timestamp_anomalies']) }} + + {# Validating any column anomaly with no timestamp #} + {% set no_timestamp_column_validation_alerts %} + select column_name, sub_type + from {{ alerts_relation }} + where status in ('fail', 'warn') and upper(table_name) = 'NO_TIMESTAMP_ANOMALIES' + and column_name is not NULL + group by 1,2 + {% endset %} + {% set alert_rows = run_query(no_timestamp_column_validation_alerts) %} + {% set indexed_columns = {} %} + {% for row in alert_rows %} + {% set column_name = row[0] %} + {% set alert = row[1] %} + {% if column_name in indexed_columns %} + {% do indexed_columns[column_name].append(alert) %} + {% else %} + {% do indexed_columns.update({column_name: [alert]}) %} + {% endif %} + {% endfor %} + {% set results = [] %} + {% for column, column_alerts in indexed_columns.items() %} + {% for alert in column_alerts %} + {% if alert | lower in column | lower %} + {% do results.append(column) %} + {% endif %} + {% endfor %} + {% endfor %} + {{ assert_lists_contain_same_items(results, ['null_count_str']) }} +{% endmacro %} + +{% macro validate_error_test() %} + {% set alerts_relation = ref('test_alerts_union') %} + + {# Validating alert for error test was created #} + {% set error_test_validation_query %} + with error_tests as ( + select + distinct test_name, + {{ elementary.contains('tags', 'error_test') }} as error_tag + from {{ alerts_relation }} + where status = 'error' + ) + select + case when error_tag = true then 'error' + else 'error: ' || test_name + end as error_tests + from error_tests + {% endset %} + {% set results = elementary.result_column_to_list(error_test_validation_query) | unique | list %} + {{ assert_lists_contain_same_items(results, ['error']) }} +{% endmacro %} + +{% macro validate_error_model() %} + {% set alerts_relation = ref('alerts_dbt_models') %} + + {% set error_model_validation_query %} + select distinct status + from {{ alerts_relation }} + where status = 'error' and materialization != 'snapshot' + {% endset %} + {% set results = elementary.result_column_to_list(error_model_validation_query) %} + {{ assert_lists_contain_same_items(results, ['error']) }} +{% endmacro %} + +{% macro validate_error_snapshot() %} + {% set alerts_relation = ref('alerts_dbt_models') %} + + {% set error_snapshot_validation_query %} + select distinct status + from {{ alerts_relation }} + where status = 'error' and materialization = 'snapshot' + {% endset %} + {% set results = elementary.result_column_to_list(error_snapshot_validation_query) %} + {{ assert_lists_contain_same_items(results, ['error']) }} +{% endmacro %} + +{% macro validate_regular_tests() %} + {% set alerts_relation = ref('alerts_dbt_tests') %} + {% set dbt_test_alerts %} + select table_name, column_name, test_name + from {{ alerts_relation }} + where status in ('fail', 'warn') + group by 1, 2, 3 + {% endset %} + {% set alert_rows = run_query(dbt_test_alerts) %} + {% set found_tables = [] %} + {% set found_columns = [] %} + {% set found_tests = [] %} + {% for row in alert_rows %} + {%- if row[0] -%} + {% do found_tables.append(row[0]) %} + {%- endif -%} + {%- if row[1] -%} + {% do found_columns.append(row[1]) %} + {%- endif -%} + {%- if row[2] -%} + {% do found_tests.append(row[2]) %} + {%- endif -%} + {% endfor %} + {{ assert_list1_in_list2(['string_column_anomalies', 'numeric_column_anomalies', 'any_type_column_anomalies', 'any_type_column_anomalies_validation', 'numeric_column_anomalies_training'], found_tables) }} + {{ assert_list1_in_list2(['min_length', 'null_count_int'], found_columns) }} + {{ assert_list1_in_list2(['relationships', 'singular_test_with_no_ref', 'singular_test_with_one_ref', 'singular_test_with_two_refs', 'singular_test_with_source_ref', 'generic_test_on_model', 'generic_test_on_column'], found_tests) }} + +{% endmacro %} + +{% macro validate_dbt_artifacts() %} + {% set dbt_models_relation = ref('dbt_models') %} + {% set dbt_models_query %} + select distinct name from {{ dbt_models_relation }} + {% endset %} + {% set models = elementary.result_column_to_list(dbt_models_query) %} + {{ assert_value_in_list('any_type_column_anomalies', models, context='dbt_models') }} + {{ assert_value_in_list('numeric_column_anomalies', models, context='dbt_models') }} + {{ assert_value_in_list('string_column_anomalies', models, context='dbt_models') }} + + {% set dbt_sources_relation = ref('dbt_sources') %} + {% set dbt_sources_query %} + select distinct name from {{ dbt_sources_relation }} + {% endset %} + {% set sources = elementary.result_column_to_list(dbt_sources_query) %} + {{ assert_value_in_list('any_type_column_anomalies_training', sources, context='dbt_sources') }} + {{ assert_value_in_list('string_column_anomalies_training', sources, context='dbt_sources') }} + {{ assert_value_in_list('any_type_column_anomalies_validation', sources, context='dbt_sources') }} + + {% set dbt_tests_relation = ref('dbt_tests') %} + {% set dbt_tests_query %} + select distinct name from {{ dbt_tests_relation }} + {% endset %} + {% set tests = elementary.result_column_to_list(dbt_tests_query) %} + + {% set dbt_run_results = ref('dbt_run_results') %} + {% set dbt_run_results_query %} + select distinct name from {{ dbt_run_results }} where resource_type in ('model', 'test') + {% endset %} + {% set run_results = elementary.result_column_to_list(dbt_run_results_query) %} + {% set all_executable_nodes = [] %} + {% do all_executable_nodes.extend(models) %} + {% do all_executable_nodes.extend(tests) %} + {{ assert_list1_in_list2(run_results, all_executable_nodes, context='dbt_run_results') }} + + + {% set query %} + select distinct invocations.invocation_id, results.invocation_id from {{ ref("dbt_invocations") }} invocations + full outer join {{ ref("dbt_run_results") }} results + on invocations.invocation_id = results.invocation_id + where invocations.invocation_id is null or results.invocation_id is null + {% endset %} + {% set result = elementary.run_query(query) %} + {% do assert_empty_table(result, "dbt_invocations") %} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql b/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql new file mode 100644 index 000000000..ce98c60b4 --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql @@ -0,0 +1,65 @@ +{% macro assert_value_in_list(value, list, context='') %} + {% set upper_value = value | upper %} + {% set lower_value = value | lower %} + {% if upper_value in list or lower_value in list %} + {% do elementary.edr_log(context ~ " SUCCESS: " ~ upper_value ~ " in list " ~ list) %} + {{ return(0) }} + {% else %} + {% do elementary.edr_log(context ~ " FAILED: " ~ upper_value ~ " not in list " ~ list) %} + {{ return(1) }} + {% endif %} +{% endmacro %} + +{% macro assert_value_not_in_list(value, list) %} + {% set upper_value = value | upper %} + {% if upper_value not in list %} + {% do elementary.edr_log("SUCCESS: " ~ upper_value ~ " not in list " ~ list) %} + {{ return(0) }} + {% else %} + {% do elementary.edr_log("FAILED: " ~ upper_value ~ " in list " ~ list) %} + {{ return(1) }} + {% endif %} +{% endmacro %} + +{% macro assert_lists_contain_same_items(list1, list2, context='') %} + {% if list1 | length != list2 | length %} + {% do elementary.edr_log(context ~ " FAILED: " ~ list1 ~ " has different length than " ~ list2) %} + {{ return(1) }} + {% endif %} + {% for item1 in list1 %} + {% if item1 is string %} + {% set item1 = item1 | lower %} + {% endif %} + {% if item1 not in list2 %} + {% do elementary.edr_log(context ~ " FAILED: " ~ item1 ~ " not in list " ~ list2) %} + {{ return(1) }} + {% endif %} + {% endfor %} + {% do elementary.edr_log(context ~ " SUCCESS: " ~ list1 ~ " in list " ~ list2) %} + {{ return(0) }} +{% endmacro %} + +{% macro assert_list1_in_list2(list1, list2, context = '') %} + {% set lower_list2 = list2 | lower %} + {% if not list1 or not list2 %} + {% do elementary.edr_log(context ~ " FAILED: list1 is empty or list2 is empty") %} + {{ return(1) }} + {% endif %} + {% for item1 in list1 %} + {% if item1 | lower not in lower_list2 %} + {% do elementary.edr_log(context ~ " FAILED: " ~ item1 ~ " not in list " ~ list2) %} + {{ return(1) }} + {% endif %} + {% endfor %} + {% do elementary.edr_log(context ~ " SUCCESS: " ~ list1 ~ " in list " ~ list2) %} + {{ return(0) }} +{% endmacro %} + +{% macro assert_list_has_expected_length(list, expected_length) %} + {% if list | length != expected_length %} + {% do elementary.edr_log("FAILED: " ~ list ~ " has different length than expected " ~ expected_length) %} + {{ return(1) }} + {% endif %} + {% do elementary.edr_log("SUCCESS: " ~ list ~ " has length " ~ expected_length) %} + {{ return(0) }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql b/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql new file mode 100644 index 000000000..b9ef999cb --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql @@ -0,0 +1,19 @@ + +{% macro assert_empty_table(table, context='') %} + {% if table | length > 0 %} + {% do elementary.edr_log(context ~ " FAILED: Table not empty.") %} + {% do table.print_table() %} + {{ return(1) }} + {% endif %} + {% do elementary.edr_log(context ~ " SUCCESS: Table is empty.") %} + {{ return(0) }} +{% endmacro %} + +{% macro assert_table_doesnt_exist(model_name) %} + {% if load_relation(ref(model_name)) is none %} + {% do elementary.edr_log(model_name ~ " SUCCESS: Table doesn't exist.") %} + {{ return(0) }} + {% endif %} + {% do elementary.edr_log(context ~ " FAILED: Table exists.") %} + {{ return(1) }} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql new file mode 100644 index 000000000..c67ca248d --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql @@ -0,0 +1,10 @@ +{% macro validate_backfill_days() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set string_column_alerts %} + select column_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) and upper(table_name) = 'BACKFILL_DAYS_COLUMN_ANOMALIES' + {% endset %} + {% set results = elementary.result_column_to_list(string_column_alerts) %} + {{ assert_lists_contain_same_items(results, ['min_length']) }} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql new file mode 100644 index 000000000..17e35bf7a --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql @@ -0,0 +1,79 @@ +{% macro validate_column_anomalies() %} + {%- do validate_string_column_anomalies() -%} + {%- do validate_numeric_column_anomalies() -%} + {%- do validate_custom_column_monitors() -%} + {%- do validate_any_type_column_anomalies() -%} +{% endmacro %} + +{% macro validate_string_column_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set string_column_alerts %} + select distinct column_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) and upper(table_name) = 'STRING_COLUMN_ANOMALIES' + {% endset %} + {% set results = elementary.result_column_to_list(string_column_alerts) %} + {{ assert_lists_contain_same_items(results, ['min_length', 'max_length', 'average_length', 'missing_count', + 'missing_percent']) }} +{% endmacro %} + +{% macro validate_numeric_column_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set numeric_column_alerts %} + select distinct column_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) + and upper(table_name) = 'NUMERIC_COLUMN_ANOMALIES' + {% endset %} + {% set results = elementary.result_column_to_list(numeric_column_alerts) %} + {{ assert_lists_contain_same_items(results, ['min', 'max', 'zero_count', 'zero_percent', 'average', + 'standard_deviation', 'variance', 'sum']) }} +{% endmacro %} + +{% macro validate_custom_column_monitors() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set query %} + select distinct sub_type from {{ alerts_relation }} + where status in ('fail', 'warn') and upper(table_name) = 'COPY_NUMERIC_COLUMN_ANOMALIES' + {% endset %} + {% set results = elementary.result_column_to_list(query) %} + {{ assert_lists_contain_same_items(results, ["zero_count"]) }} +{% endmacro %} + +{% macro validate_any_type_column_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set any_type_column_alerts %} + select column_name, sub_type + from {{ alerts_relation }} + where status in ('fail', 'warn') and upper(table_name) = 'ANY_TYPE_COLUMN_ANOMALIES' + and column_name is not NULL + group by 1,2 + {% endset %} + {% set alert_rows = run_query(any_type_column_alerts) %} + {% set indexed_columns = {} %} + {% for row in alert_rows %} + {% set column_name = row[0] %} + {% set alert = row[1] %} + {% if column_name in indexed_columns %} + {% do indexed_columns[column_name].append(alert) %} + {% else %} + {% do indexed_columns.update({column_name: [alert]}) %} + {% endif %} + {% endfor %} + {% set results = [] %} + {% for column, column_alerts in indexed_columns.items() %} + {% for alert in column_alerts %} + {% if alert | lower in column | lower %} + {% do results.append(column) %} + {% endif %} + {% endfor %} + {% endfor %} + {{ assert_lists_contain_same_items(results, ['null_count_str', + 'null_percent_str', + 'null_count_float', + 'null_percent_float', + 'null_count_int', + 'null_percent_int', + 'null_count_bool', + 'null_percent_bool']) }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql new file mode 100644 index 000000000..da0b9fa7c --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql @@ -0,0 +1,18 @@ +{% macro validate_config_levels() %} + {% set alerts_relation = ref('test_alerts_union') %} + + {% set config_levels_validation_query %} + with error_tests as ( + select + table_name, alert_description, + {{ elementary.contains('tags', 'config_levels') }} as is_config_levels_tag + from {{ alerts_relation }} + where status = 'error' + ) + select table_name, alert_description + from error_tests + where is_config_levels_tag = true + {% endset %} + {% set results = elementary.run_query(config_levels_validation_query) %} + {{ assert_empty_table(results) }} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql new file mode 100644 index 000000000..0b53fae5c --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql @@ -0,0 +1,41 @@ +{% macro validate_dimension_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + + {% set dimension_validation_query %} + select *, + {{ elementary.contains('tags', 'should_fail') }} as should_fail + from {{ alerts_relation }} + where status in ('fail', 'warn', 'error') and tags like '%dimension_anomalies%' + {% endset %} + {% set results = elementary.agate_to_dicts(run_query(dimension_validation_query)) %} + {% set dimensions_with_problems = [] %} + + {%- set should_fail_descriptions = [] %} + {%- set should_fail_names = [] %} + + {% for result in results %} + {%- set should_fail_tag = result.get('should_fail') %} + {%- set test_name = result.get('test_name') %} + {%- set alert_description = result.get('alert_description') %} + {%- if should_fail_tag == True %} + {%- do should_fail_descriptions.append(alert_description) -%} + {%- do should_fail_names.append(test_name) -%} + {%- endif %} + {% endfor %} + + {{ assert_lists_contain_same_items(should_fail_names, ['elementary_dimension_anomalies_dimension_anomalies_platform__updated_at', 'elementary_dimension_anomalies_dimension_anomalies_platform__version__updated_at']) }} +{% endmacro %} + +{% macro create_new_dimension() %} + {% set dimension_validation_data = ref('dimension_anomalies_validation') %} + {%- set insert_dimension_query %} + INSERT INTO {{ dimension_validation_data }} values ('1969-12-28 00:00:00.000', 'windows', 1, 318); + {% endset %} +{% endmacro %} + +{% macro delete_new_dimension() %} + {% set dimension_validation_data = ref('dimension_anomalies_validation') %} + {%- set delete_dimension_query %} + DELETE FROM{{ dimension_validation_data }} WHERE platform = 'windows'; + {% endset %} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql new file mode 100644 index 000000000..6482951dc --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql @@ -0,0 +1,40 @@ +{% macro validate_directional_anomalies() %} + {%- do validate_spike_directional_anomalies() -%} + {%- do validate_drop_directional_anomalies() -%} +{% endmacro %} + +{% macro validate_spike_directional_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {# Validating alert for correct direction anomalies #} + + {% set row_count_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and tags like '%directional_anomalies%' and tags like '%spike%'; + {% endset %} + {% set results = elementary.result_column_to_list(row_count_validation_query) %} + -- The result list's purpose is a more readable error messages + {% set results_list = [] %} + {% for result in results %} + {% do results_list.append(result) %} + {% endfor %} + {{ assert_lists_contain_same_items(results_list, ['any_type_column_anomalies', 'numeric_column_anomalies']) }} +{% endmacro %} + +{% macro validate_drop_directional_anomalies() %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {# Validating alert for correct direction anomalies #} + + {% set row_count_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and tags like '%directional_anomalies%' and tags like '%drop%'; + {% endset %} + {% set results = elementary.result_column_to_list(row_count_validation_query) %} + -- The result list's purpose is a more readable error messages + {% set results_list = [] %} + {% for result in results %} + {% do results_list.append(result) %} + {% endfor %} + {{ assert_lists_contain_same_items(results_list, ['any_type_column_anomalies', 'dimension_anomalies']) }} +{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql new file mode 100644 index 000000000..9fa2df5ae --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql @@ -0,0 +1,14 @@ +{% macro validate_event_freshness_anomalies() %} + {%- set max_bucket_end = elementary.edr_quote(elementary.get_run_started_at().strftime("%Y-%m-%d 00:00:00")) %} + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set freshness_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where sub_type = 'event_freshness' and detected_at >= {{elementary.edr_cast_as_timestamp(max_bucket_end) }} + {% endset %} + + {% set results = elementary.result_column_to_list(freshness_validation_query) %} + {{ assert_lists_contain_same_items(results, ['string_column_anomalies', + 'numeric_column_anomalies', + 'string_column_anomalies_training']) }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql new file mode 100644 index 000000000..90b0c6933 --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql @@ -0,0 +1,61 @@ +{% macro validate_schema_changes() %} + {% set expected_changes = {('schema_changes', 'red_cards'): 'column_added', + ('schema_changes', 'group_a'): 'column_removed', + ('schema_changes', 'goals'): 'type_changed', + ('schema_changes', 'key_crosses'): 'column_added', + ('schema_changes', 'offsides'): 'column_removed', + ('schema_changes_from_baseline', 'group_b'): 'type_changed', + ('schema_changes_from_baseline', 'group_d'): 'column_added', + ('schema_changes_from_baseline', 'goals'): 'type_changed', + ('schema_changes_from_baseline', 'coffee_cups_consumed'): 'column_removed' + } %} + {% set alerts_relation = ref('alerts_schema_changes') %} + {% set failed_schema_changes_alerts %} + select test_short_name, column_name, sub_type + from {{ alerts_relation }} + where status in ('fail', 'warn') + group by 1,2,3 + {% endset %} + {% set error_schema_changes_alerts %} + select test_short_name, column_name, sub_type + from {{ alerts_relation }} + where status = 'error' + group by 1,2,3 + {% endset %} + {% set error_alert_rows = run_query(error_schema_changes_alerts) %} + {# We should have one error test from schema_changes_from_baseline with enforce_types true #} + {% if error_alert_rows | length != 1 %} + {% do elementary.edr_log("FAILED: for schema_changes_from_baseline with enforce_types true - no error eccured") %} + {{ return(1) }} + {% endif %} + {% set failure_alert_rows = run_query(failed_schema_changes_alerts) %} + {% set found_schema_changes = {} %} + {% for row in failure_alert_rows %} + {% set test_short_name = row[0] | lower %} + {% set column_name = row[1] | lower %} + {% set alert = row[2] | lower %} + {% if (test_short_name, column_name) not in expected_changes %} + {% do elementary.edr_log("FAILED: " ~ test_short_name ~ " - could not find expected alert for " ~ column_name ~ ", " ~ alert) %} + {% endif %} + {% if expected_changes[(test_short_name, column_name)] != alert %} + {% do elementary.edr_log("FAILED: " ~ test_short_name ~ " - for column " ~ column_name ~ " expected alert type " ~ expected_changes[(test_short_name, column_name)] ~ " but got " ~ alert) %} + {{ return(1) }} + {% endif %} + {% do found_schema_changes.update({(test_short_name, column_name): alert}) %} + {% endfor %} + {% if found_schema_changes %} + {%- set missing_changes = [] %} + {%- for expected_change in expected_changes %} + {%- if expected_change not in found_schema_changes %} + {% do elementary.edr_log("FAILED: for column " ~ expected_change ~ " expected alert " ~ expected_changes[expected_change] ~ " but alert is missing") %} + {%- do missing_changes.append(expected_change) -%} + {%- endif %} + {%- endfor %} + {%- if missing_changes | length == 0 %} + {% do elementary.edr_log("SUCCESS: all expected schema changes were found - " ~ found_schema_changes) %} + {{ return(0) }} + {%- endif %} + {% endif %} + {{ return(0) }} +{% endmacro %} + diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql new file mode 100644 index 000000000..3e48f7a3b --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql @@ -0,0 +1,16 @@ +{% macro validate_seasonal_volume_anomalies() %} + {% set query %} + select test_alias, status + from {{ ref('elementary_test_results') }} + where table_name in ('users_per_day_weekly_seasonal', 'users_per_hour_daily_seasonal') + {% endset %} + {% set results = elementary.run_query(query) %} + {{ assert_lists_contain_same_items(results, [ + ('day_of_week_volume_anomalies_no_seasonality', 'fail'), + ('day_of_week_volume_anomalies_with_seasonality', 'pass'), + ('hour_of_day_volume_anomalies_with_seasonality', 'pass'), + ('hour_of_day_volume_anomalies_no_seasonality', 'fail'), + ('hour_of_week_volume_anomalies_no_seasonality', 'fail'), + ('hour_of_week_volume_anomalies_with_seasonality', 'pass') + ]) }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql new file mode 100644 index 000000000..d6efb13c3 --- /dev/null +++ b/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql @@ -0,0 +1,25 @@ +{% macro validate_table_anomalies() %} + -- no validation data which means table freshness and volume should alert + {% set alerts_relation = ref('alerts_anomaly_detection') %} + {% set freshness_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and sub_type = 'freshness' + {% endset %} + {% set results = elementary.result_column_to_list(freshness_validation_query) %} + {{ assert_lists_contain_same_items(results, ['string_column_anomalies', + 'numeric_column_anomalies', + 'string_column_anomalies_training']) }} + {% set row_count_validation_query %} + select distinct table_name + from {{ alerts_relation }} + where status in ('fail', 'warn') and sub_type = 'row_count' + {% endset %} + {% set results = elementary.result_column_to_list(row_count_validation_query) %} + {{ assert_lists_contain_same_items(results, ['users_per_hour_daily_seasonal', + 'users_per_day_weekly_seasonal', + 'any_type_column_anomalies', + 'numeric_column_anomalies', + 'string_column_anomalies_training']) }} + +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql new file mode 100644 index 000000000..fa80e67a8 --- /dev/null +++ b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_column.sql @@ -0,0 +1,7 @@ +{%- test generic_test_on_column(model, column_name) -%} + {% set query_with_rows %} + with nothing as (select 1 as num) + select * from nothing where num = 1 + {%- endset -%} + {{ query_with_rows }} +{%- endtest -%} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql new file mode 100644 index 000000000..a78366532 --- /dev/null +++ b/tests/e2e_dbt_project/macros/generic_tests/generic_test_on_model.sql @@ -0,0 +1,7 @@ +{%- test generic_test_on_model(model) -%} + {% set query_with_rows %} + with nothing as (select 1 as num) + select * from nothing where num = 1 + {%- endset -%} + {{ query_with_rows }} +{%- endtest -%} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/system/dbg.sql b/tests/e2e_dbt_project/macros/system/dbg.sql new file mode 100644 index 000000000..5dcd88dfe --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/dbg.sql @@ -0,0 +1,3 @@ +{% macro dbg() %} + {% do debug() %} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/generate_schema_name.sql b/tests/e2e_dbt_project/macros/system/generate_schema_name.sql new file mode 100644 index 000000000..e30a0e706 --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/generate_schema_name.sql @@ -0,0 +1,12 @@ +{% macro generate_schema_name(custom_schema_name, node) -%} + {%- set default_schema = target.schema -%} + {% if not custom_schema_name %} + {% do return(default_schema) %} + {% endif %} + + {% if node.resource_type == "seed" %} + {% do return(custom_schema_name) %} + {% endif %} + + {% do return("{}_{}".format(default_schema, custom_schema_name)) %} +{%- endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/materializations.sql b/tests/e2e_dbt_project/macros/system/materializations.sql new file mode 100644 index 000000000..89de786c2 --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/materializations.sql @@ -0,0 +1,7 @@ +{% materialization test, default %} + {% do return(elementary.materialization_test_default()) %} +{% endmaterialization %} + +{% materialization test, adapter="snowflake" %} + {% do return(elementary.materialization_test_snowflake()) %} +{% endmaterialization %} diff --git a/tests/e2e_dbt_project/macros/system/read_table.sql b/tests/e2e_dbt_project/macros/system/read_table.sql new file mode 100644 index 000000000..7f9f88f54 --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/read_table.sql @@ -0,0 +1,18 @@ +{% macro read_table(table, where=none, column_names=none) %} + {% set query %} + select + {% if column_names %} + {{ elementary.escape_select(column_names) }} + {% else %} + * + {% endif %} + from {{ ref(table) }} + {% if where %} + where {{ where }} + {% endif %} + {% endset %} + + {% set results = elementary.run_query(query) %} + {% set results_json = elementary.agate_to_json(results) %} + {% do elementary.edr_log(results_json) %} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/return_config_var.sql b/tests/e2e_dbt_project/macros/system/return_config_var.sql new file mode 100644 index 000000000..446001ddd --- /dev/null +++ b/tests/e2e_dbt_project/macros/system/return_config_var.sql @@ -0,0 +1,6 @@ +{# Logging the wanted config var as an elementary log (using elementary.edr_log) #} +{# The dbtRunner catch this log when executed with run_operation #} +{# This is used for accessing the integration tests vars #} +{% macro return_config_var(var_name) %} + {{ elementary.edr_log(elementary.get_config_var(var_name)) }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql b/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql new file mode 100644 index 000000000..b68b50d5b --- /dev/null +++ b/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql @@ -0,0 +1,14 @@ +{% macro test_adapter_specific_macros_have_default_implementation() %} + {% set no_default_macros = [] %} + {% set elementary_macros = elementary.keys() %} + {% for macro in elementary_macros %} + {% set parts = macro.split("__") %} + {% if parts | length == 2 %} + {% set adapter, macro_name = parts %} + {% if macro_name not in no_default_macros and "default__{}".format(macro_name) not in elementary_macros %} + {% do no_default_macros.append(macro_name) %} + {% endif %} + {% endif %} + {% endfor %} + {{ assert_lists_contain_same_items(no_default_macros, [], "no_default_macros") }} +{% endmacro %} diff --git a/tests/e2e_dbt_project/models/any_type_column_anomalies.sql b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql new file mode 100644 index 000000000..aa0952be7 --- /dev/null +++ b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql @@ -0,0 +1,36 @@ +with training as ( + select * from {{ ref('any_type_column_anomalies_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + occurred_at, + null_count_str, + null_percent_str, + null_count_float, + null_percent_float, + null_count_int, + null_percent_int, + null_count_bool, + null_percent_bool + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql new file mode 100644 index 000000000..bcaddcd7e --- /dev/null +++ b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql @@ -0,0 +1,29 @@ +with training as ( + select * from {{ ref('backfill_days_column_anomalies_training') }} +), + + {% if var("stage") == "validation" %} + validation as ( + select * from {{ ref('backfill_days_column_anomalies_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), + {% else %} + source as ( + select * from training + ), + {% endif %} + + final as ( + select + updated_at, + occurred_at, + min_length + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/config_levels_project.sql b/tests/e2e_dbt_project/models/config_levels_project.sql new file mode 100644 index 000000000..3ed516051 --- /dev/null +++ b/tests/e2e_dbt_project/models/config_levels_project.sql @@ -0,0 +1 @@ +select * from {{ ref('any_type_column_anomalies_validation') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/config_levels_test_and_model.sql b/tests/e2e_dbt_project/models/config_levels_test_and_model.sql new file mode 100644 index 000000000..3ed516051 --- /dev/null +++ b/tests/e2e_dbt_project/models/config_levels_test_and_model.sql @@ -0,0 +1 @@ +select * from {{ ref('any_type_column_anomalies_validation') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql b/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql new file mode 100644 index 000000000..b0db44fcc --- /dev/null +++ b/tests/e2e_dbt_project/models/copy_numeric_column_anomalies.sql @@ -0,0 +1 @@ +select * from {{ ref("numeric_column_anomalies") }} diff --git a/tests/e2e_dbt_project/models/dimension_anomalies.sql b/tests/e2e_dbt_project/models/dimension_anomalies.sql new file mode 100644 index 000000000..50a446957 --- /dev/null +++ b/tests/e2e_dbt_project/models/dimension_anomalies.sql @@ -0,0 +1,30 @@ +with training as ( + select * from {{ ref('dimension_anomalies_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ ref('dimension_anomalies_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + platform, + version, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/ephemeral_model.sql b/tests/e2e_dbt_project/models/ephemeral_model.sql new file mode 100644 index 000000000..a4c2b477b --- /dev/null +++ b/tests/e2e_dbt_project/models/ephemeral_model.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='ephemeral' + ) +}} + +select * from {{ ref('any_type_column_anomalies_training') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/error_model.sql b/tests/e2e_dbt_project/models/error_model.sql new file mode 100644 index 000000000..e76eb266d --- /dev/null +++ b/tests/e2e_dbt_project/models/error_model.sql @@ -0,0 +1 @@ +select 'a's as string diff --git a/tests/e2e_dbt_project/models/groups.sql b/tests/e2e_dbt_project/models/groups.sql new file mode 100644 index 000000000..aa7a61a17 --- /dev/null +++ b/tests/e2e_dbt_project/models/groups.sql @@ -0,0 +1,5 @@ +{% if var("stage") == "training" %} + select * from {{ ref('groups_training') }} +{% elif var("stage") == "validation" %} + select * from {{ ref('groups_validation') }} +{% endif %} diff --git a/tests/e2e_dbt_project/models/nested/models/tree/nested.sql b/tests/e2e_dbt_project/models/nested/models/tree/nested.sql new file mode 100644 index 000000000..ec17f8541 --- /dev/null +++ b/tests/e2e_dbt_project/models/nested/models/tree/nested.sql @@ -0,0 +1 @@ +select 1 as one diff --git a/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql new file mode 100644 index 000000000..aa0952be7 --- /dev/null +++ b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql @@ -0,0 +1,36 @@ +with training as ( + select * from {{ ref('any_type_column_anomalies_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + occurred_at, + null_count_str, + null_percent_str, + null_count_float, + null_percent_float, + null_count_int, + null_percent_int, + null_count_bool, + null_percent_bool + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/non_dbt_model.sql b/tests/e2e_dbt_project/models/non_dbt_model.sql new file mode 100644 index 000000000..4a7301931 --- /dev/null +++ b/tests/e2e_dbt_project/models/non_dbt_model.sql @@ -0,0 +1,3 @@ +{{ config(materialized='non_dbt') }} + SELECT 1 +-- depends_on: {{ ref('one') }} \ No newline at end of file diff --git a/tests/e2e_dbt_project/models/numeric_column_anomalies.sql b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql new file mode 100644 index 000000000..27326fa56 --- /dev/null +++ b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql @@ -0,0 +1,36 @@ +with training as ( + select * from {{ ref('numeric_column_anomalies_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ ref('numeric_column_anomalies_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + occurred_at, + min, + max, + zero_count, + zero_percent, + average, + standard_deviation, + variance, + sum + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/one.sql b/tests/e2e_dbt_project/models/one.sql new file mode 100644 index 000000000..ec17f8541 --- /dev/null +++ b/tests/e2e_dbt_project/models/one.sql @@ -0,0 +1 @@ +select 1 as one diff --git a/tests/e2e_dbt_project/models/schema.yml b/tests/e2e_dbt_project/models/schema.yml new file mode 100644 index 000000000..31f376284 --- /dev/null +++ b/tests/e2e_dbt_project/models/schema.yml @@ -0,0 +1,734 @@ +version: 2 + +models: + - name: one + config: + tags: "{{ var('one_tags', []) }}" + meta: + owner: "{{ var('one_owner', none) }}" + columns: + - name: one + tests: + - accepted_values: + meta: + owner: "@elon" + values: [2, 3] + + - name: any_type_column_anomalies + meta: + owner: ["@edr"] + subscribers: "@egk" + description: > + This is a very weird description + with breaklines + and comma, + and even a string like this 'wow'. You know, these $##$34#@#!^ can also be helpful + WDYT? + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + time_bucket: + period: hour + count: 4 + meta: + description: > + This is a very weird description + with breaklines + and comma, + and even a string like this 'wow'. You know, these $##$34#@#!^ can also be helpful + WDYT? + config: + severity: warn + tags: ["table_anomalies"] + - elementary.volume_anomalies: + time_bucket: + period: week + count: 1 + config: + severity: warn + where: 1=1 + tags: ["table_anomalies"] + - elementary.all_columns_anomalies: + tags: ["all_any_type_columns_anomalies", "column_anomalies"] + #This here is to simulate a long test name as test params are part of the test name + exclude_regexp: ".*column1|column2|column3|column4|column5|column6|column7|column8|column9|column10|column11|column12|column13|column14|column15|column16|column17.*" + - generic_test_on_model: + tags: ["regular_tests"] + - elementary.all_columns_anomalies: + anomaly_direction: "drop" + where: 1=1 + tags: ["directional_anomalies", "drop"] + - elementary.all_columns_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + + - name: no_timestamp_anomalies + meta: + owner: "elon@elementary-data.com, or@elementary-data.com" + subscribers: ["elon@elementary-data.com"] + description: This is a description. + description: We use this model to test anomalies when there is no timestamp column + tests: + - elementary.volume_anomalies: + tags: ["no_timestamp"] + + columns: + - name: "null_count_str" + tests: + - elementary.column_anomalies: + tags: ["no_timestamp"] + where: 1=1 + column_anomalies: + - null_count + + - name: dimension_anomalies + meta: + owner: "egk" + subscribers: "elon, egk" + description: We use this model to test dimension anomalies + tests: + - elementary.dimension_anomalies: + tags: ["dimension_anomalies", "should_fail"] + alias: "dimension_anomalies_platform" + timestamp_column: updated_at + where: 1=1 + dimensions: + - platform + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_where_expression" + tags: ["dimension_anomalies"] + timestamp_column: updated_at + dimensions: + - platform + where_expression: "platform = 'android'" + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_new_dimension" + tags: ["dimension_anomalies"] + timestamp_column: updated_at + dimensions: + - platform + where_expression: "platform = 'windows'" + - elementary.dimension_anomalies: + alias: "dimension_anomalies_platform_new_dimension_no_timestamp" + tags: ["dimension_anomalies"] + dimensions: + - platform + where_expression: "platform = 'windows'" + - elementary.dimension_anomalies: + tags: ["dimension_anomalies", "should_fail"] + alias: "dimension_anomalies_platform_version" + timestamp_column: updated_at + dimensions: + - platform + - version + - elementary.dimension_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + timestamp_column: updated_at + dimensions: + - platform + - elementary.dimension_anomalies: + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + timestamp_column: updated_at + dimensions: + - platform + - elementary.dimension_anomalies: + dimensions: + - platform + tags: ["dimension_anomalies"] + alias: "dimension_anomalies_no_timestamp" + + - name: error_model + description: We use this model to create error runs and tests + meta: + owner: ["elon@elementary-data.com", "@elon", "egk"] + config: + tags: ["error_model"] + columns: + - name: "missing_column" + tests: + - uniques: + tags: ["error_test", "regular_tests"] + + - name: backfill_days_column_anomalies + config: + elementary: + timestamp_column: updated_at + columns: + - name: "min_length" + tests: + - elementary.column_anomalies: + column_anomalies: + - min_length + - max_length + tags: ["backfill_days"] + - elementary.column_anomalies: + backfill_days: 7 + column_anomalies: + - min_length + - max_length + tags: ["backfill_days"] + + - name: string_column_anomalies + meta: + owner: "@or" + tags: ["marketing"] + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - elementary.all_columns_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - elementary.schema_changes: + where: 1=1 + tags: ["schema_changes"] + columns: + - name: "min_length" + tests: + - relationships: + tags: ["regular_tests"] + to: source('training', 'string_column_anomalies_training') + field: max_length + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + column_anomalies: + - min_length + - max_length + - missing_count + - name: max_length + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: average_length + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + column_anomalies: + - average_length + - null_count + - name: missing_count + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: missing_percent + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + - name: updated_at + tests: + - elementary.column_anomalies: + tags: ["string_column_anomalies", "column_anomalies"] + + - name: numeric_column_anomalies + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.volume_anomalies: + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + - elementary.volume_anomalies: + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.all_columns_anomalies: + tags: ["all_numeric_columns_anomalies"] + column_anomalies: + - average_length + - null_count + columns: + - name: min + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - name: max + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - elementary.column_anomalies: + column_anomalies: + - average + anomaly_direction: "spike" + tags: ["directional_anomalies", "spike"] + - elementary.column_anomalies: + column_anomalies: + - average + anomaly_direction: "drop" + tags: ["directional_anomalies", "drop"] + - name: average + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - min + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - max + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + column_anomalies: + - average + - name: zero_count + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: zero_percent + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: updated_at + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: variance + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: standard_deviation + tests: + - elementary.column_anomalies: + tags: ["numeric_column_anomalies", "column_anomalies"] + - name: sum + tests: + - elementary.column_anomalies: + column_anomalies: + - sum + tags: ["numeric_column_anomalies", "column_anomalies"] + + - name: copy_numeric_column_anomalies + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.all_columns_anomalies: + column_anomalies: + - zero_count + tags: ["numeric_column_anomalies", "column_anomalies"] + + - name: groups + columns: + - name: group_a + data_type: "{{ 'strIng' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'CHArACTER varying' if target.type == 'redshift' else 'teXt' }}" + - name: group_b + data_type: double + - name: group_c + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.schema_changes_from_baseline: + fail_on_added: true + where: 1=1 + tags: ["schema_changes_from_baseline"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline", "error_test"] + enforce_types: true + + - name: stats_players + columns: + - name: player + data_type: "{{ 'STRING' if (target.type == 'bigquery' or target.type == 'databricks' or target.type == 'athena') else 'character varying' if target.type == 'redshift' else 'TEXT' }}" + - name: goals + data_type: BOOLEAN + - name: coffee_cups_consumed + data_type: INTEGER + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline"] + - elementary.schema_changes_from_baseline: + tags: ["schema_changes_from_baseline"] + enforce_types: true + + - name: stats_team + tests: + - elementary.schema_changes: + tags: ["schema_changes"] + + - name: users_per_day_weekly_seasonal + config: + elementary: + backfill_days: 14 + tests: + - elementary.volume_anomalies: + alias: day_of_week_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + - elementary.volume_anomalies: + alias: day_of_week_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + seasonality: day_of_week + - elementary.volume_anomalies: + alias: hour_of_week_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + - elementary.volume_anomalies: + alias: hour_of_week_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + seasonality: hour_of_week + + - name: users_per_hour_daily_seasonal + tests: + - elementary.volume_anomalies: + alias: hour_of_day_volume_anomalies_no_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + - elementary.volume_anomalies: + alias: hour_of_day_volume_anomalies_with_seasonality + timestamp_column: "updated_at" + tags: ["seasonality_volume", "table_anomalies"] + sensitivity: 2 + time_bucket: + period: hour + count: 1 + seasonality: hour_of_day + + - name: ephemeral_model + config: + elementary: + timestamp_column: updated_at + tests: + - elementary.volume_anomalies: + tags: ["ephemeral_model", "error_test"] + - elementary.all_columns_anomalies: + tags: ["ephemeral_model", "error_test"] + - elementary.freshness_anomalies: + where: 1=1 + tags: ["ephemeral_model", "error_test"] + - elementary.schema_changes: + tags: ["ephemeral_model", "error_test"] + + - name: config_levels_test_and_model + tags: ["config_levels"] + config: + elementary: + min_training_set_size: 22 + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + tests: + - config_levels: + tags: ["config_levels"] + alias: "test_level_config" + min_training_set_size: 18 + days_back: 5 + backfill_days: 5 + seasonality: "day_of_week" + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + expected_config: ## Test level expected config + seasonality: "day_of_week" + min_training_set_size: 18 + days_back: 35 ## *7 because of seasonality + backfill_days: 5 + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + - config_levels: + tags: ["config_levels"] + alias: "model_level_config" + expected_config: ## Model level expected config + min_training_set_size: 22 + seasonality: null + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + - name: config_levels_project + tags: ["config_levels"] + tests: + - config_levels: + tags: ["config_levels"] + alias: "project_level_config" + expected_config: ## Project level expected config + min_training_set_size: 14 + seasonality: null + days_back: 30 + backfill_days: 2 + anomaly_direction: both + anomaly_sensitivity: 3 + where_expression: null + timestamp_column: null + time_bucket: + period: day + count: 1 + +sources: + - name: training + schema: test_seeds + tables: + - name: users_per_hour_daily_seasonal_training + columns: + - name: "user_id" + tests: + - relationships: + tags: ["regular_tests"] + to: source('training', 'users_per_day_weekly_seasonal_training') + field: user_id + - name: any_type_column_anomalies_training + meta: + owner: ["@edr", "egk"] + freshness: + error_after: + count: 1 + period: minute + loaded_at_field: updated_at + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.freshness_anomalies: + tags: ["table_anomalies", "error_test"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + - name: string_column_anomalies_training + meta: + owner: "@edr" + elementary: + timestamp_column: updated_at + freshness: + error_after: + count: 1 + period: minute + loaded_at_field: no_such_column + tests: + - elementary.volume_anomalies: + tags: ["table_anomalies"] + - elementary.freshness_anomalies: + tags: ["table_anomalies"] + - elementary.event_freshness_anomalies: + tags: ["event_freshness_anomalies"] + event_timestamp_column: occurred_at + update_timestamp_column: updated_at + - name: numeric_column_anomalies_training + meta: + elementary: + min_training_set_size: 22 + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + tests: + - config_levels: + tags: ["config_levels"] + alias: "test_level_config" + min_training_set_size: 18 + days_back: 5 + backfill_days: 5 + seasonality: "day_of_week" + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + expected_config: ## Test level expected config + seasonality: "day_of_week" + min_training_set_size: 18 + days_back: 35 ## *7 because of seasonality + backfill_days: 5 + anomaly_direction: "spike" + anomaly_sensitivity: 5 + where_expression: "1=1" + timestamp_column: "occurred_at" + time_bucket: + period: day + count: 1 + - config_levels: + tags: ["config_levels"] + alias: "model_level_config" + expected_config: ## Model level expected config + min_training_set_size: 22 + seasonality: null + days_back: 100 + backfill_days: 10 + anomaly_direction: "drop" + anomaly_sensitivity: 4 + where_expression: "true" + timestamp_column: "updated_at" + time_bucket: + period: hour + count: 4 + + - name: users_per_day_weekly_seasonal_training + - name: validation + schema: test_seeds + tables: + - name: users_per_hour_daily_seasonal_validation + - name: any_type_column_anomalies_validation + meta: + owner: "hello, world" + freshness: + warn_after: + count: 1 + period: minute + loaded_at_field: updated_at + tests: + - elementary.all_columns_anomalies: + tags: ["elementary_source"] + columns: + - name: null_count_int + tests: + - generic_test_on_column: + tags: ["regular_tests"] + - name: users_per_day_weekly_seasonal_validation + +exposures: + - name: elementary_exposure + type: application + maturity: medium + url: https://elementary.not.really + description: > + Keep calm, Elementary tests exposures. + depends_on: + - ref('error_model') + - source('training', 'any_type_column_anomalies_training') + owner: + name: Complete Nonsense + email: fake@fakerson.com + tags: + - marketing + + - name: weekly_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/1 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: Tableau + workbook: By the Week + path: ByTheWeek/Jaffles + + - name: monthly_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/2 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: Looker + workbook: By the Month + path: ByTheMonth/Jaffles + + - name: daily_jaffle_metrics + type: dashboard + maturity: high + url: https://bi.tool/dashboards/3 + description: > + Did someone say "exponential growth"? + depends_on: + - ref('string_column_anomalies') + - ref('numeric_column_anomalies') + owner: + name: Claire from Data + email: data@jaffleshop.com + tags: + - hack + - the + - planet + meta: + platform: bi.tool + workbook: By the Day + path: ByTheDay/Jaffles diff --git a/tests/e2e_dbt_project/models/stats_players.sql b/tests/e2e_dbt_project/models/stats_players.sql new file mode 100644 index 000000000..0f5511aa3 --- /dev/null +++ b/tests/e2e_dbt_project/models/stats_players.sql @@ -0,0 +1,5 @@ +{% if var("stage") == "training" %} + select * from {{ ref('stats_players_training') }} +{% elif var("stage") == "validation" %} + select * from {{ ref('stats_players_validation') }} +{% endif %} diff --git a/tests/e2e_dbt_project/models/stats_team.sql b/tests/e2e_dbt_project/models/stats_team.sql new file mode 100644 index 000000000..af998e794 --- /dev/null +++ b/tests/e2e_dbt_project/models/stats_team.sql @@ -0,0 +1,5 @@ +{% if var("stage") == "training" %} + select * from {{ ref('stats_team_training') }} +{% elif var("stage") == "validation" %} + select * from {{ ref('stats_team_validation') }} +{% endif %} diff --git a/tests/e2e_dbt_project/models/string_column_anomalies.sql b/tests/e2e_dbt_project/models/string_column_anomalies.sql new file mode 100644 index 000000000..2960950fd --- /dev/null +++ b/tests/e2e_dbt_project/models/string_column_anomalies.sql @@ -0,0 +1,33 @@ +with training as ( + select * from {{ ref('string_column_anomalies_training') }} +), + +{% if var("stage") == "validation" %} +validation as ( + select * from {{ ref('string_column_anomalies_validation') }} +), + +source as ( + select * from training + union all + select * from validation +), +{% else %} +source as ( + select * from training +), +{% endif %} + +final as ( + select + updated_at, + occurred_at, + min_length, + max_length, + average_length, + missing_count, + missing_percent + from source +) + +select * from final diff --git a/tests/e2e_dbt_project/models/test_alerts_union.sql b/tests/e2e_dbt_project/models/test_alerts_union.sql new file mode 100644 index 000000000..0567ab927 --- /dev/null +++ b/tests/e2e_dbt_project/models/test_alerts_union.sql @@ -0,0 +1,18 @@ +with dbt as ( + select * from {{ ref('alerts_dbt_tests') }} +), +{%- if target.type != 'databricks' %} +schema_changes as ( + select * from {{ ref('alerts_schema_changes') }} +), +{%- endif %} +anomalies as ( + select * from {{ ref('alerts_anomaly_detection') }} +) +select * from dbt +union all +select * from anomalies +{%- if target.type != 'databricks' %} +union all +select * from schema_changes +{%- endif %} diff --git a/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql new file mode 100644 index 000000000..c8678dbe0 --- /dev/null +++ b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql @@ -0,0 +1,28 @@ +with training as ( + select * from {{ source('training', 'users_per_day_weekly_seasonal_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ source('validation', 'users_per_day_weekly_seasonal_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql new file mode 100644 index 000000000..6d65fe120 --- /dev/null +++ b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql @@ -0,0 +1,28 @@ +with training as ( + select * from {{ source('training', 'users_per_hour_daily_seasonal_training') }} +), + +{% if var("stage") == "validation" %} + validation as ( + select * from {{ source('validation', 'users_per_hour_daily_seasonal_validation') }} + ), + + source as ( + select * from training + union all + select * from validation + ), +{% else %} + source as ( + select * from training + ), +{% endif %} + + final as ( + select + updated_at, + user_id + from source + ) + +select * from final diff --git a/tests/e2e_dbt_project/packages.yml b/tests/e2e_dbt_project/packages.yml new file mode 100644 index 000000000..b74c0ef5a --- /dev/null +++ b/tests/e2e_dbt_project/packages.yml @@ -0,0 +1,10 @@ +packages: + - local: ../../ + - package: dbt-labs/dbt_utils + version: | + {%- set minor_to_utils_range_map = { + "0": [">=0.8.0", "<0.9.0"], + "1": [">=0.8.0", "<0.9.0"], + "2": [">=0.8.0", "<1.0.0"], + } -%} + {{- minor_to_utils_range_map.get(dbt_version.split('.')[1], [">=0.8.0", "<2.0.0"]) -}} diff --git a/tests/e2e_dbt_project/snapshots/failed_snapshot.sql b/tests/e2e_dbt_project/snapshots/failed_snapshot.sql new file mode 100644 index 000000000..0c92e6afa --- /dev/null +++ b/tests/e2e_dbt_project/snapshots/failed_snapshot.sql @@ -0,0 +1,12 @@ +{% snapshot failed_snapshot() %} + +{{ + config( + target_schema='snapshots', + unique_key='unique_id', + strategy='timestamp', + updated_at='generated_at', + ) +}} + SELECT FAILED_SNAPSHOT +{% endsnapshot %} diff --git a/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql new file mode 100644 index 000000000..f7dfdc589 --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_no_ref.sql @@ -0,0 +1,2 @@ +{% set relation = api.Relation.create(database=elementary.target_database(), schema=target.schema, identifier='numeric_column_anomalies') %} +select min from {{ relation }} where min < 100 diff --git a/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql new file mode 100644 index 000000000..22931a7fb --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_one_ref.sql @@ -0,0 +1 @@ +select min from {{ ref('numeric_column_anomalies') }} where min < 100 \ No newline at end of file diff --git a/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql b/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql new file mode 100644 index 000000000..2d61ce9a1 --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_source_ref.sql @@ -0,0 +1 @@ +select min from {{ source('training', 'numeric_column_anomalies_training') }} where min < 105 \ No newline at end of file diff --git a/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql b/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql new file mode 100644 index 000000000..2524955af --- /dev/null +++ b/tests/e2e_dbt_project/tests/singular_test_with_two_refs.sql @@ -0,0 +1,15 @@ +with min_len_issues as ( + select null_count_int as min_issue from {{ ref('any_type_column_anomalies') }} where null_count_int < 100 +), + +min_issues as ( + select min as min_issue from {{ ref('numeric_column_anomalies') }} where min < 100 +), + +all_issues as ( + select * from min_len_issues + union all + select * from min_issues +) + +select * from all_issues From ec7e9cf1e876c78e2dfbdfeca39f405d4c0780a6 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:07:11 +0300 Subject: [PATCH 02/11] e2e project models: remove stage ifs --- .../models/any_type_column_anomalies.sql | 22 +++++------- .../models/backfill_days_column_anomalies.sql | 36 ++++++++----------- .../models/dimension_anomalies.sql | 22 +++++------- tests/e2e_dbt_project/models/groups.sql | 6 +--- .../models/no_timestamp_anomalies.sql | 22 +++++------- .../models/numeric_column_anomalies.sql | 22 +++++------- .../e2e_dbt_project/models/stats_players.sql | 6 +--- tests/e2e_dbt_project/models/stats_team.sql | 6 +--- .../models/string_column_anomalies.sql | 6 ---- .../models/users_per_day_weekly_seasonal.sql | 22 +++++------- .../models/users_per_hour_daily_seasonal.sql | 22 +++++------- 11 files changed, 66 insertions(+), 126 deletions(-) diff --git a/tests/e2e_dbt_project/models/any_type_column_anomalies.sql b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql index aa0952be7..2714303fa 100644 --- a/tests/e2e_dbt_project/models/any_type_column_anomalies.sql +++ b/tests/e2e_dbt_project/models/any_type_column_anomalies.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ ref('any_type_column_anomalies_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ ref('any_type_column_anomalies_validation') }} - ), +validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select diff --git a/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql index bcaddcd7e..56cfb2357 100644 --- a/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql +++ b/tests/e2e_dbt_project/models/backfill_days_column_anomalies.sql @@ -2,28 +2,22 @@ with training as ( select * from {{ ref('backfill_days_column_anomalies_training') }} ), - {% if var("stage") == "validation" %} - validation as ( - select * from {{ ref('backfill_days_column_anomalies_validation') }} - ), +validation as ( + select * from {{ ref('backfill_days_column_anomalies_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), - {% else %} - source as ( - select * from training - ), - {% endif %} +source as ( + select * from training + union all + select * from validation +), - final as ( - select - updated_at, - occurred_at, - min_length - from source - ) +final as ( + select + updated_at, + occurred_at, + min_length + from source +) select * from final diff --git a/tests/e2e_dbt_project/models/dimension_anomalies.sql b/tests/e2e_dbt_project/models/dimension_anomalies.sql index 50a446957..cee7364db 100644 --- a/tests/e2e_dbt_project/models/dimension_anomalies.sql +++ b/tests/e2e_dbt_project/models/dimension_anomalies.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ ref('dimension_anomalies_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ ref('dimension_anomalies_validation') }} - ), +validation as ( + select * from {{ ref('dimension_anomalies_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select diff --git a/tests/e2e_dbt_project/models/groups.sql b/tests/e2e_dbt_project/models/groups.sql index aa7a61a17..bfda126dd 100644 --- a/tests/e2e_dbt_project/models/groups.sql +++ b/tests/e2e_dbt_project/models/groups.sql @@ -1,5 +1 @@ -{% if var("stage") == "training" %} - select * from {{ ref('groups_training') }} -{% elif var("stage") == "validation" %} - select * from {{ ref('groups_validation') }} -{% endif %} +select * from {{ ref('groups_validation') }} diff --git a/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql index aa0952be7..2714303fa 100644 --- a/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql +++ b/tests/e2e_dbt_project/models/no_timestamp_anomalies.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ ref('any_type_column_anomalies_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ ref('any_type_column_anomalies_validation') }} - ), +validation as ( + select * from {{ ref('any_type_column_anomalies_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select diff --git a/tests/e2e_dbt_project/models/numeric_column_anomalies.sql b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql index 27326fa56..8eebe37d8 100644 --- a/tests/e2e_dbt_project/models/numeric_column_anomalies.sql +++ b/tests/e2e_dbt_project/models/numeric_column_anomalies.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ ref('numeric_column_anomalies_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ ref('numeric_column_anomalies_validation') }} - ), +validation as ( + select * from {{ ref('numeric_column_anomalies_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select diff --git a/tests/e2e_dbt_project/models/stats_players.sql b/tests/e2e_dbt_project/models/stats_players.sql index 0f5511aa3..72ed8c603 100644 --- a/tests/e2e_dbt_project/models/stats_players.sql +++ b/tests/e2e_dbt_project/models/stats_players.sql @@ -1,5 +1 @@ -{% if var("stage") == "training" %} - select * from {{ ref('stats_players_training') }} -{% elif var("stage") == "validation" %} - select * from {{ ref('stats_players_validation') }} -{% endif %} +select * from {{ ref('stats_players_validation') }} diff --git a/tests/e2e_dbt_project/models/stats_team.sql b/tests/e2e_dbt_project/models/stats_team.sql index af998e794..10dc3cb20 100644 --- a/tests/e2e_dbt_project/models/stats_team.sql +++ b/tests/e2e_dbt_project/models/stats_team.sql @@ -1,5 +1 @@ -{% if var("stage") == "training" %} - select * from {{ ref('stats_team_training') }} -{% elif var("stage") == "validation" %} - select * from {{ ref('stats_team_validation') }} -{% endif %} +select * from {{ ref('stats_team_validation') }} diff --git a/tests/e2e_dbt_project/models/string_column_anomalies.sql b/tests/e2e_dbt_project/models/string_column_anomalies.sql index 2960950fd..29e8e43fe 100644 --- a/tests/e2e_dbt_project/models/string_column_anomalies.sql +++ b/tests/e2e_dbt_project/models/string_column_anomalies.sql @@ -2,7 +2,6 @@ with training as ( select * from {{ ref('string_column_anomalies_training') }} ), -{% if var("stage") == "validation" %} validation as ( select * from {{ ref('string_column_anomalies_validation') }} ), @@ -12,11 +11,6 @@ source as ( union all select * from validation ), -{% else %} -source as ( - select * from training -), -{% endif %} final as ( select diff --git a/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql index c8678dbe0..f988e4e53 100644 --- a/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql +++ b/tests/e2e_dbt_project/models/users_per_day_weekly_seasonal.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ source('training', 'users_per_day_weekly_seasonal_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ source('validation', 'users_per_day_weekly_seasonal_validation') }} - ), +validation as ( + select * from {{ source('validation', 'users_per_day_weekly_seasonal_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select diff --git a/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql index 6d65fe120..82f550e13 100644 --- a/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql +++ b/tests/e2e_dbt_project/models/users_per_hour_daily_seasonal.sql @@ -2,21 +2,15 @@ with training as ( select * from {{ source('training', 'users_per_hour_daily_seasonal_training') }} ), -{% if var("stage") == "validation" %} - validation as ( - select * from {{ source('validation', 'users_per_hour_daily_seasonal_validation') }} - ), +validation as ( + select * from {{ source('validation', 'users_per_hour_daily_seasonal_validation') }} +), - source as ( - select * from training - union all - select * from validation - ), -{% else %} - source as ( - select * from training - ), -{% endif %} +source as ( + select * from training + union all + select * from validation +), final as ( select From 6b8aff891583b0a49a3781eff9deef72ad6345dd Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:13:03 +0300 Subject: [PATCH 03/11] remove irrelevant macros --- tests/e2e_dbt_project/macros/asserts.sql | 19 -- .../macros/e2e_tests/clear_tests.sql | 39 ----- .../macros/e2e_tests/test_config_levels.sql | 53 ------ .../macros/e2e_tests/tests_validation.sql | 163 ------------------ .../e2e_tests/utils/list_assertions.sql | 65 ------- .../e2e_tests/utils/table_assertions.sql | 19 -- .../e2e_tests/validate_backfill_days.sql | 10 -- .../e2e_tests/validate_column_anomalies.sql | 79 --------- .../e2e_tests/validate_config_levels.sql | 18 -- .../validate_dimensions_anomalies.sql | 41 ----- .../validate_directional_anomalies.sql | 40 ----- .../validate_freshness_anomalies.sql | 14 -- .../e2e_tests/validate_schema_changes.sql | 61 ------- .../validate_seasonal_volume_anomalies.sql | 16 -- .../e2e_tests/validate_table_anomalies.sql | 25 --- .../macros/system/read_table.sql | 18 -- .../macros/system/return_config_var.sql | 6 - ...fic_macros_have_default_implementation.sql | 14 -- 18 files changed, 700 deletions(-) delete mode 100644 tests/e2e_dbt_project/macros/asserts.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql delete mode 100644 tests/e2e_dbt_project/macros/system/read_table.sql delete mode 100644 tests/e2e_dbt_project/macros/system/return_config_var.sql delete mode 100644 tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql diff --git a/tests/e2e_dbt_project/macros/asserts.sql b/tests/e2e_dbt_project/macros/asserts.sql deleted file mode 100644 index e87d1d688..000000000 --- a/tests/e2e_dbt_project/macros/asserts.sql +++ /dev/null @@ -1,19 +0,0 @@ -{% macro assert_value(value, expected_value) %} - {% if value != expected_value %} - {% do elementary.edr_log("FAILED: value " ~ value ~ " does not equal to " ~ expected_value) %} - {{ return(1) }} - {% else %} - {% do elementary.edr_log("SUCCESS") %} - {{ return(0) }} - {% endif %} -{% endmacro %} - -{% macro assert_str_in_value(str, value) %} - {% if str not in value %} - {% do elementary.edr_log("FAILED: the string " ~ str ~ " was not found in " ~ value) %} - {{ return(1) }} - {% else %} - {% do elementary.edr_log("SUCCESS") %} - {{ return(0) }} - {% endif %} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql b/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql deleted file mode 100644 index 5dc7ebcaf..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/clear_tests.sql +++ /dev/null @@ -1,39 +0,0 @@ -{% macro clear_tests() %} - {% if execute %} - {% do elementary_integration_tests.edr_drop_schema(elementary.target_database(), target.schema) %} - - {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} - {% do elementary_integration_tests.edr_drop_schema(database_name, schema_name) %} - - {% set tests_schema_name = elementary.get_elementary_tests_schema(database_name, schema_name) %} - {% if tests_schema_name != schema_name %} - {% do elementary_integration_tests.edr_drop_schema(database_name, tests_schema_name) %} - {% else %} - {{ elementary.edr_log("Tests schema is the same as the main elementary schema, nothing to drop.") }} - {% endif %} - {% endif %} - {{ return('') }} -{% endmacro %} - -{% macro edr_drop_schema(database_name, schema_name) %} - {% do return(adapter.dispatch('edr_drop_schema','elementary_integration_tests')(database_name, schema_name)) %} -{% endmacro %} - -{% macro default__edr_drop_schema(database_name, schema_name) %} - {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name) %} - {% do dbt.drop_schema(schema_relation) %} - {% do adapter.commit() %} - {% do elementary.edr_log("dropped schema {}".format(schema_relation | string)) %} -{% endmacro %} - -{% macro clickhouse__edr_drop_schema(database_name, schema_name) %} - {% set results = run_query("SELECT name FROM system.tables WHERE database = '" ~ database_name ~ "'") %} - {% if execute %} - {% for row in results %} - {% set table = row[0] %} - {% do run_query("DROP TABLE IF EXISTS " ~ database_name ~ "." ~ table) %} - {% endfor %} - {% endif %} - {% do adapter.commit() %} - {% do elementary.edr_log("dropped schema {}".format(schema_name)) %} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql b/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql deleted file mode 100644 index b63bda60b..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/test_config_levels.sql +++ /dev/null @@ -1,53 +0,0 @@ -{% test config_levels(model, expected_config, timestamp_column, time_bucket, where_expression, anomaly_sensitivity, anomaly_direction, days_back, backfill_days, seasonality, min_training_set_size) %} - {%- if execute and elementary.is_test_command() %} - {%- set unexpected_config = [] %} - {%- set model_relation = dbt.load_relation(model) %} - - {% set configuration_dict, metric_properties_dict = - elementary.get_anomalies_test_configuration(model_relation, - mandatory_params, - timestamp_column, - where_expression, - anomaly_sensitivity, - anomaly_direction, - min_training_set_size, - time_bucket, - days_back, - backfill_days, - seasonality) %} - - {%- set configs_to_test = [('timestamp_column', metric_properties_dict.timestamp_column), - ('where_expression', metric_properties_dict.where_expression), - ('time_bucket', configuration_dict.time_bucket), - ('anomaly_sensitivity', configuration_dict.anomaly_sensitivity), - ('anomaly_direction', configuration_dict.anomaly_direction), - ('min_training_set_size', configuration_dict.min_training_set_size), - ('days_back', configuration_dict.days_back), - ('backfill_days', configuration_dict.backfill_days), - ('seasonality', configuration_dict.seasonality) - ] %} - - {%- for config in configs_to_test %} - {%- set config_name, config_value = config %} - {%- set config_check = compare_configs(config_name, config_value, expected_config) %} - {%- if config_check %} - {%- do unexpected_config.append(config_check) -%} - {%- endif %} - {%- endfor %} - - {%- if unexpected_config | length > 0 %} - {%- do exceptions.raise_compiler_error('Failure config_levels: ' ~ unexpected_config) -%} - {%- else %} - {#- test must run an sql query -#} - {{ elementary.no_results_query() }} - {%- endif %} - {%- endif %} -{%- endtest %} - -{% macro compare_configs(config_name, config, expected_config) %} - {%- if config != expected_config.get(config_name) %} - {%- set unexpected_message = ('For {0} - got config: {1}, expected config: {2}').format(config_name, config, expected_config.get(config_name) ) %} - {{ return(unexpected_message) }} - {%- endif %} - {{ return(none) }} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql b/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql deleted file mode 100644 index b6b52c85d..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/tests_validation.sql +++ /dev/null @@ -1,163 +0,0 @@ -{% macro validate_no_timestamp_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - - {# Validating row count for no timestamp table anomaly #} - {% set no_timestamp_row_count_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and sub_type = 'row_count' - and upper(table_name) = 'NO_TIMESTAMP_ANOMALIES' - {% endset %} - {% set results = elementary.result_column_to_list(no_timestamp_row_count_validation_query) %} - {{ assert_lists_contain_same_items(results, ['no_timestamp_anomalies']) }} - - {# Validating any column anomaly with no timestamp #} - {% set no_timestamp_column_validation_alerts %} - select column_name, sub_type - from {{ alerts_relation }} - where status in ('fail', 'warn') and upper(table_name) = 'NO_TIMESTAMP_ANOMALIES' - and column_name is not NULL - group by 1,2 - {% endset %} - {% set alert_rows = run_query(no_timestamp_column_validation_alerts) %} - {% set indexed_columns = {} %} - {% for row in alert_rows %} - {% set column_name = row[0] %} - {% set alert = row[1] %} - {% if column_name in indexed_columns %} - {% do indexed_columns[column_name].append(alert) %} - {% else %} - {% do indexed_columns.update({column_name: [alert]}) %} - {% endif %} - {% endfor %} - {% set results = [] %} - {% for column, column_alerts in indexed_columns.items() %} - {% for alert in column_alerts %} - {% if alert | lower in column | lower %} - {% do results.append(column) %} - {% endif %} - {% endfor %} - {% endfor %} - {{ assert_lists_contain_same_items(results, ['null_count_str']) }} -{% endmacro %} - -{% macro validate_error_test() %} - {% set alerts_relation = ref('test_alerts_union') %} - - {# Validating alert for error test was created #} - {% set error_test_validation_query %} - with error_tests as ( - select - distinct test_name, - {{ elementary.contains('tags', 'error_test') }} as error_tag - from {{ alerts_relation }} - where status = 'error' - ) - select - case when error_tag = true then 'error' - else 'error: ' || test_name - end as error_tests - from error_tests - {% endset %} - {% set results = elementary.result_column_to_list(error_test_validation_query) | unique | list %} - {{ assert_lists_contain_same_items(results, ['error']) }} -{% endmacro %} - -{% macro validate_error_model() %} - {% set alerts_relation = ref('alerts_dbt_models') %} - - {% set error_model_validation_query %} - select distinct status - from {{ alerts_relation }} - where status = 'error' and materialization != 'snapshot' - {% endset %} - {% set results = elementary.result_column_to_list(error_model_validation_query) %} - {{ assert_lists_contain_same_items(results, ['error']) }} -{% endmacro %} - -{% macro validate_error_snapshot() %} - {% set alerts_relation = ref('alerts_dbt_models') %} - - {% set error_snapshot_validation_query %} - select distinct status - from {{ alerts_relation }} - where status = 'error' and materialization = 'snapshot' - {% endset %} - {% set results = elementary.result_column_to_list(error_snapshot_validation_query) %} - {{ assert_lists_contain_same_items(results, ['error']) }} -{% endmacro %} - -{% macro validate_regular_tests() %} - {% set alerts_relation = ref('alerts_dbt_tests') %} - {% set dbt_test_alerts %} - select table_name, column_name, test_name - from {{ alerts_relation }} - where status in ('fail', 'warn') - group by 1, 2, 3 - {% endset %} - {% set alert_rows = run_query(dbt_test_alerts) %} - {% set found_tables = [] %} - {% set found_columns = [] %} - {% set found_tests = [] %} - {% for row in alert_rows %} - {%- if row[0] -%} - {% do found_tables.append(row[0]) %} - {%- endif -%} - {%- if row[1] -%} - {% do found_columns.append(row[1]) %} - {%- endif -%} - {%- if row[2] -%} - {% do found_tests.append(row[2]) %} - {%- endif -%} - {% endfor %} - {{ assert_list1_in_list2(['string_column_anomalies', 'numeric_column_anomalies', 'any_type_column_anomalies', 'any_type_column_anomalies_validation', 'numeric_column_anomalies_training'], found_tables) }} - {{ assert_list1_in_list2(['min_length', 'null_count_int'], found_columns) }} - {{ assert_list1_in_list2(['relationships', 'singular_test_with_no_ref', 'singular_test_with_one_ref', 'singular_test_with_two_refs', 'singular_test_with_source_ref', 'generic_test_on_model', 'generic_test_on_column'], found_tests) }} - -{% endmacro %} - -{% macro validate_dbt_artifacts() %} - {% set dbt_models_relation = ref('dbt_models') %} - {% set dbt_models_query %} - select distinct name from {{ dbt_models_relation }} - {% endset %} - {% set models = elementary.result_column_to_list(dbt_models_query) %} - {{ assert_value_in_list('any_type_column_anomalies', models, context='dbt_models') }} - {{ assert_value_in_list('numeric_column_anomalies', models, context='dbt_models') }} - {{ assert_value_in_list('string_column_anomalies', models, context='dbt_models') }} - - {% set dbt_sources_relation = ref('dbt_sources') %} - {% set dbt_sources_query %} - select distinct name from {{ dbt_sources_relation }} - {% endset %} - {% set sources = elementary.result_column_to_list(dbt_sources_query) %} - {{ assert_value_in_list('any_type_column_anomalies_training', sources, context='dbt_sources') }} - {{ assert_value_in_list('string_column_anomalies_training', sources, context='dbt_sources') }} - {{ assert_value_in_list('any_type_column_anomalies_validation', sources, context='dbt_sources') }} - - {% set dbt_tests_relation = ref('dbt_tests') %} - {% set dbt_tests_query %} - select distinct name from {{ dbt_tests_relation }} - {% endset %} - {% set tests = elementary.result_column_to_list(dbt_tests_query) %} - - {% set dbt_run_results = ref('dbt_run_results') %} - {% set dbt_run_results_query %} - select distinct name from {{ dbt_run_results }} where resource_type in ('model', 'test') - {% endset %} - {% set run_results = elementary.result_column_to_list(dbt_run_results_query) %} - {% set all_executable_nodes = [] %} - {% do all_executable_nodes.extend(models) %} - {% do all_executable_nodes.extend(tests) %} - {{ assert_list1_in_list2(run_results, all_executable_nodes, context='dbt_run_results') }} - - - {% set query %} - select distinct invocations.invocation_id, results.invocation_id from {{ ref("dbt_invocations") }} invocations - full outer join {{ ref("dbt_run_results") }} results - on invocations.invocation_id = results.invocation_id - where invocations.invocation_id is null or results.invocation_id is null - {% endset %} - {% set result = elementary.run_query(query) %} - {% do assert_empty_table(result, "dbt_invocations") %} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql b/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql deleted file mode 100644 index ce98c60b4..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/utils/list_assertions.sql +++ /dev/null @@ -1,65 +0,0 @@ -{% macro assert_value_in_list(value, list, context='') %} - {% set upper_value = value | upper %} - {% set lower_value = value | lower %} - {% if upper_value in list or lower_value in list %} - {% do elementary.edr_log(context ~ " SUCCESS: " ~ upper_value ~ " in list " ~ list) %} - {{ return(0) }} - {% else %} - {% do elementary.edr_log(context ~ " FAILED: " ~ upper_value ~ " not in list " ~ list) %} - {{ return(1) }} - {% endif %} -{% endmacro %} - -{% macro assert_value_not_in_list(value, list) %} - {% set upper_value = value | upper %} - {% if upper_value not in list %} - {% do elementary.edr_log("SUCCESS: " ~ upper_value ~ " not in list " ~ list) %} - {{ return(0) }} - {% else %} - {% do elementary.edr_log("FAILED: " ~ upper_value ~ " in list " ~ list) %} - {{ return(1) }} - {% endif %} -{% endmacro %} - -{% macro assert_lists_contain_same_items(list1, list2, context='') %} - {% if list1 | length != list2 | length %} - {% do elementary.edr_log(context ~ " FAILED: " ~ list1 ~ " has different length than " ~ list2) %} - {{ return(1) }} - {% endif %} - {% for item1 in list1 %} - {% if item1 is string %} - {% set item1 = item1 | lower %} - {% endif %} - {% if item1 not in list2 %} - {% do elementary.edr_log(context ~ " FAILED: " ~ item1 ~ " not in list " ~ list2) %} - {{ return(1) }} - {% endif %} - {% endfor %} - {% do elementary.edr_log(context ~ " SUCCESS: " ~ list1 ~ " in list " ~ list2) %} - {{ return(0) }} -{% endmacro %} - -{% macro assert_list1_in_list2(list1, list2, context = '') %} - {% set lower_list2 = list2 | lower %} - {% if not list1 or not list2 %} - {% do elementary.edr_log(context ~ " FAILED: list1 is empty or list2 is empty") %} - {{ return(1) }} - {% endif %} - {% for item1 in list1 %} - {% if item1 | lower not in lower_list2 %} - {% do elementary.edr_log(context ~ " FAILED: " ~ item1 ~ " not in list " ~ list2) %} - {{ return(1) }} - {% endif %} - {% endfor %} - {% do elementary.edr_log(context ~ " SUCCESS: " ~ list1 ~ " in list " ~ list2) %} - {{ return(0) }} -{% endmacro %} - -{% macro assert_list_has_expected_length(list, expected_length) %} - {% if list | length != expected_length %} - {% do elementary.edr_log("FAILED: " ~ list ~ " has different length than expected " ~ expected_length) %} - {{ return(1) }} - {% endif %} - {% do elementary.edr_log("SUCCESS: " ~ list ~ " has length " ~ expected_length) %} - {{ return(0) }} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql b/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql deleted file mode 100644 index b9ef999cb..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/utils/table_assertions.sql +++ /dev/null @@ -1,19 +0,0 @@ - -{% macro assert_empty_table(table, context='') %} - {% if table | length > 0 %} - {% do elementary.edr_log(context ~ " FAILED: Table not empty.") %} - {% do table.print_table() %} - {{ return(1) }} - {% endif %} - {% do elementary.edr_log(context ~ " SUCCESS: Table is empty.") %} - {{ return(0) }} -{% endmacro %} - -{% macro assert_table_doesnt_exist(model_name) %} - {% if load_relation(ref(model_name)) is none %} - {% do elementary.edr_log(model_name ~ " SUCCESS: Table doesn't exist.") %} - {{ return(0) }} - {% endif %} - {% do elementary.edr_log(context ~ " FAILED: Table exists.") %} - {{ return(1) }} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql deleted file mode 100644 index c67ca248d..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_backfill_days.sql +++ /dev/null @@ -1,10 +0,0 @@ -{% macro validate_backfill_days() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set string_column_alerts %} - select column_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) and upper(table_name) = 'BACKFILL_DAYS_COLUMN_ANOMALIES' - {% endset %} - {% set results = elementary.result_column_to_list(string_column_alerts) %} - {{ assert_lists_contain_same_items(results, ['min_length']) }} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql deleted file mode 100644 index 17e35bf7a..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_column_anomalies.sql +++ /dev/null @@ -1,79 +0,0 @@ -{% macro validate_column_anomalies() %} - {%- do validate_string_column_anomalies() -%} - {%- do validate_numeric_column_anomalies() -%} - {%- do validate_custom_column_monitors() -%} - {%- do validate_any_type_column_anomalies() -%} -{% endmacro %} - -{% macro validate_string_column_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set string_column_alerts %} - select distinct column_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) and upper(table_name) = 'STRING_COLUMN_ANOMALIES' - {% endset %} - {% set results = elementary.result_column_to_list(string_column_alerts) %} - {{ assert_lists_contain_same_items(results, ['min_length', 'max_length', 'average_length', 'missing_count', - 'missing_percent']) }} -{% endmacro %} - -{% macro validate_numeric_column_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set numeric_column_alerts %} - select distinct column_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and lower(sub_type) = lower(column_name) - and upper(table_name) = 'NUMERIC_COLUMN_ANOMALIES' - {% endset %} - {% set results = elementary.result_column_to_list(numeric_column_alerts) %} - {{ assert_lists_contain_same_items(results, ['min', 'max', 'zero_count', 'zero_percent', 'average', - 'standard_deviation', 'variance', 'sum']) }} -{% endmacro %} - -{% macro validate_custom_column_monitors() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set query %} - select distinct sub_type from {{ alerts_relation }} - where status in ('fail', 'warn') and upper(table_name) = 'COPY_NUMERIC_COLUMN_ANOMALIES' - {% endset %} - {% set results = elementary.result_column_to_list(query) %} - {{ assert_lists_contain_same_items(results, ["zero_count"]) }} -{% endmacro %} - -{% macro validate_any_type_column_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set any_type_column_alerts %} - select column_name, sub_type - from {{ alerts_relation }} - where status in ('fail', 'warn') and upper(table_name) = 'ANY_TYPE_COLUMN_ANOMALIES' - and column_name is not NULL - group by 1,2 - {% endset %} - {% set alert_rows = run_query(any_type_column_alerts) %} - {% set indexed_columns = {} %} - {% for row in alert_rows %} - {% set column_name = row[0] %} - {% set alert = row[1] %} - {% if column_name in indexed_columns %} - {% do indexed_columns[column_name].append(alert) %} - {% else %} - {% do indexed_columns.update({column_name: [alert]}) %} - {% endif %} - {% endfor %} - {% set results = [] %} - {% for column, column_alerts in indexed_columns.items() %} - {% for alert in column_alerts %} - {% if alert | lower in column | lower %} - {% do results.append(column) %} - {% endif %} - {% endfor %} - {% endfor %} - {{ assert_lists_contain_same_items(results, ['null_count_str', - 'null_percent_str', - 'null_count_float', - 'null_percent_float', - 'null_count_int', - 'null_percent_int', - 'null_count_bool', - 'null_percent_bool']) }} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql deleted file mode 100644 index da0b9fa7c..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_config_levels.sql +++ /dev/null @@ -1,18 +0,0 @@ -{% macro validate_config_levels() %} - {% set alerts_relation = ref('test_alerts_union') %} - - {% set config_levels_validation_query %} - with error_tests as ( - select - table_name, alert_description, - {{ elementary.contains('tags', 'config_levels') }} as is_config_levels_tag - from {{ alerts_relation }} - where status = 'error' - ) - select table_name, alert_description - from error_tests - where is_config_levels_tag = true - {% endset %} - {% set results = elementary.run_query(config_levels_validation_query) %} - {{ assert_empty_table(results) }} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql deleted file mode 100644 index 0b53fae5c..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_dimensions_anomalies.sql +++ /dev/null @@ -1,41 +0,0 @@ -{% macro validate_dimension_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - - {% set dimension_validation_query %} - select *, - {{ elementary.contains('tags', 'should_fail') }} as should_fail - from {{ alerts_relation }} - where status in ('fail', 'warn', 'error') and tags like '%dimension_anomalies%' - {% endset %} - {% set results = elementary.agate_to_dicts(run_query(dimension_validation_query)) %} - {% set dimensions_with_problems = [] %} - - {%- set should_fail_descriptions = [] %} - {%- set should_fail_names = [] %} - - {% for result in results %} - {%- set should_fail_tag = result.get('should_fail') %} - {%- set test_name = result.get('test_name') %} - {%- set alert_description = result.get('alert_description') %} - {%- if should_fail_tag == True %} - {%- do should_fail_descriptions.append(alert_description) -%} - {%- do should_fail_names.append(test_name) -%} - {%- endif %} - {% endfor %} - - {{ assert_lists_contain_same_items(should_fail_names, ['elementary_dimension_anomalies_dimension_anomalies_platform__updated_at', 'elementary_dimension_anomalies_dimension_anomalies_platform__version__updated_at']) }} -{% endmacro %} - -{% macro create_new_dimension() %} - {% set dimension_validation_data = ref('dimension_anomalies_validation') %} - {%- set insert_dimension_query %} - INSERT INTO {{ dimension_validation_data }} values ('1969-12-28 00:00:00.000', 'windows', 1, 318); - {% endset %} -{% endmacro %} - -{% macro delete_new_dimension() %} - {% set dimension_validation_data = ref('dimension_anomalies_validation') %} - {%- set delete_dimension_query %} - DELETE FROM{{ dimension_validation_data }} WHERE platform = 'windows'; - {% endset %} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql deleted file mode 100644 index 6482951dc..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_directional_anomalies.sql +++ /dev/null @@ -1,40 +0,0 @@ -{% macro validate_directional_anomalies() %} - {%- do validate_spike_directional_anomalies() -%} - {%- do validate_drop_directional_anomalies() -%} -{% endmacro %} - -{% macro validate_spike_directional_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {# Validating alert for correct direction anomalies #} - - {% set row_count_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and tags like '%directional_anomalies%' and tags like '%spike%'; - {% endset %} - {% set results = elementary.result_column_to_list(row_count_validation_query) %} - -- The result list's purpose is a more readable error messages - {% set results_list = [] %} - {% for result in results %} - {% do results_list.append(result) %} - {% endfor %} - {{ assert_lists_contain_same_items(results_list, ['any_type_column_anomalies', 'numeric_column_anomalies']) }} -{% endmacro %} - -{% macro validate_drop_directional_anomalies() %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {# Validating alert for correct direction anomalies #} - - {% set row_count_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and tags like '%directional_anomalies%' and tags like '%drop%'; - {% endset %} - {% set results = elementary.result_column_to_list(row_count_validation_query) %} - -- The result list's purpose is a more readable error messages - {% set results_list = [] %} - {% for result in results %} - {% do results_list.append(result) %} - {% endfor %} - {{ assert_lists_contain_same_items(results_list, ['any_type_column_anomalies', 'dimension_anomalies']) }} -{% endmacro %} \ No newline at end of file diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql deleted file mode 100644 index 9fa2df5ae..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_freshness_anomalies.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% macro validate_event_freshness_anomalies() %} - {%- set max_bucket_end = elementary.edr_quote(elementary.get_run_started_at().strftime("%Y-%m-%d 00:00:00")) %} - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set freshness_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where sub_type = 'event_freshness' and detected_at >= {{elementary.edr_cast_as_timestamp(max_bucket_end) }} - {% endset %} - - {% set results = elementary.result_column_to_list(freshness_validation_query) %} - {{ assert_lists_contain_same_items(results, ['string_column_anomalies', - 'numeric_column_anomalies', - 'string_column_anomalies_training']) }} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql deleted file mode 100644 index 90b0c6933..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_schema_changes.sql +++ /dev/null @@ -1,61 +0,0 @@ -{% macro validate_schema_changes() %} - {% set expected_changes = {('schema_changes', 'red_cards'): 'column_added', - ('schema_changes', 'group_a'): 'column_removed', - ('schema_changes', 'goals'): 'type_changed', - ('schema_changes', 'key_crosses'): 'column_added', - ('schema_changes', 'offsides'): 'column_removed', - ('schema_changes_from_baseline', 'group_b'): 'type_changed', - ('schema_changes_from_baseline', 'group_d'): 'column_added', - ('schema_changes_from_baseline', 'goals'): 'type_changed', - ('schema_changes_from_baseline', 'coffee_cups_consumed'): 'column_removed' - } %} - {% set alerts_relation = ref('alerts_schema_changes') %} - {% set failed_schema_changes_alerts %} - select test_short_name, column_name, sub_type - from {{ alerts_relation }} - where status in ('fail', 'warn') - group by 1,2,3 - {% endset %} - {% set error_schema_changes_alerts %} - select test_short_name, column_name, sub_type - from {{ alerts_relation }} - where status = 'error' - group by 1,2,3 - {% endset %} - {% set error_alert_rows = run_query(error_schema_changes_alerts) %} - {# We should have one error test from schema_changes_from_baseline with enforce_types true #} - {% if error_alert_rows | length != 1 %} - {% do elementary.edr_log("FAILED: for schema_changes_from_baseline with enforce_types true - no error eccured") %} - {{ return(1) }} - {% endif %} - {% set failure_alert_rows = run_query(failed_schema_changes_alerts) %} - {% set found_schema_changes = {} %} - {% for row in failure_alert_rows %} - {% set test_short_name = row[0] | lower %} - {% set column_name = row[1] | lower %} - {% set alert = row[2] | lower %} - {% if (test_short_name, column_name) not in expected_changes %} - {% do elementary.edr_log("FAILED: " ~ test_short_name ~ " - could not find expected alert for " ~ column_name ~ ", " ~ alert) %} - {% endif %} - {% if expected_changes[(test_short_name, column_name)] != alert %} - {% do elementary.edr_log("FAILED: " ~ test_short_name ~ " - for column " ~ column_name ~ " expected alert type " ~ expected_changes[(test_short_name, column_name)] ~ " but got " ~ alert) %} - {{ return(1) }} - {% endif %} - {% do found_schema_changes.update({(test_short_name, column_name): alert}) %} - {% endfor %} - {% if found_schema_changes %} - {%- set missing_changes = [] %} - {%- for expected_change in expected_changes %} - {%- if expected_change not in found_schema_changes %} - {% do elementary.edr_log("FAILED: for column " ~ expected_change ~ " expected alert " ~ expected_changes[expected_change] ~ " but alert is missing") %} - {%- do missing_changes.append(expected_change) -%} - {%- endif %} - {%- endfor %} - {%- if missing_changes | length == 0 %} - {% do elementary.edr_log("SUCCESS: all expected schema changes were found - " ~ found_schema_changes) %} - {{ return(0) }} - {%- endif %} - {% endif %} - {{ return(0) }} -{% endmacro %} - diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql deleted file mode 100644 index 3e48f7a3b..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_seasonal_volume_anomalies.sql +++ /dev/null @@ -1,16 +0,0 @@ -{% macro validate_seasonal_volume_anomalies() %} - {% set query %} - select test_alias, status - from {{ ref('elementary_test_results') }} - where table_name in ('users_per_day_weekly_seasonal', 'users_per_hour_daily_seasonal') - {% endset %} - {% set results = elementary.run_query(query) %} - {{ assert_lists_contain_same_items(results, [ - ('day_of_week_volume_anomalies_no_seasonality', 'fail'), - ('day_of_week_volume_anomalies_with_seasonality', 'pass'), - ('hour_of_day_volume_anomalies_with_seasonality', 'pass'), - ('hour_of_day_volume_anomalies_no_seasonality', 'fail'), - ('hour_of_week_volume_anomalies_no_seasonality', 'fail'), - ('hour_of_week_volume_anomalies_with_seasonality', 'pass') - ]) }} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql b/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql deleted file mode 100644 index d6efb13c3..000000000 --- a/tests/e2e_dbt_project/macros/e2e_tests/validate_table_anomalies.sql +++ /dev/null @@ -1,25 +0,0 @@ -{% macro validate_table_anomalies() %} - -- no validation data which means table freshness and volume should alert - {% set alerts_relation = ref('alerts_anomaly_detection') %} - {% set freshness_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and sub_type = 'freshness' - {% endset %} - {% set results = elementary.result_column_to_list(freshness_validation_query) %} - {{ assert_lists_contain_same_items(results, ['string_column_anomalies', - 'numeric_column_anomalies', - 'string_column_anomalies_training']) }} - {% set row_count_validation_query %} - select distinct table_name - from {{ alerts_relation }} - where status in ('fail', 'warn') and sub_type = 'row_count' - {% endset %} - {% set results = elementary.result_column_to_list(row_count_validation_query) %} - {{ assert_lists_contain_same_items(results, ['users_per_hour_daily_seasonal', - 'users_per_day_weekly_seasonal', - 'any_type_column_anomalies', - 'numeric_column_anomalies', - 'string_column_anomalies_training']) }} - -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/read_table.sql b/tests/e2e_dbt_project/macros/system/read_table.sql deleted file mode 100644 index 7f9f88f54..000000000 --- a/tests/e2e_dbt_project/macros/system/read_table.sql +++ /dev/null @@ -1,18 +0,0 @@ -{% macro read_table(table, where=none, column_names=none) %} - {% set query %} - select - {% if column_names %} - {{ elementary.escape_select(column_names) }} - {% else %} - * - {% endif %} - from {{ ref(table) }} - {% if where %} - where {{ where }} - {% endif %} - {% endset %} - - {% set results = elementary.run_query(query) %} - {% set results_json = elementary.agate_to_json(results) %} - {% do elementary.edr_log(results_json) %} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/system/return_config_var.sql b/tests/e2e_dbt_project/macros/system/return_config_var.sql deleted file mode 100644 index 446001ddd..000000000 --- a/tests/e2e_dbt_project/macros/system/return_config_var.sql +++ /dev/null @@ -1,6 +0,0 @@ -{# Logging the wanted config var as an elementary log (using elementary.edr_log) #} -{# The dbtRunner catch this log when executed with run_operation #} -{# This is used for accessing the integration tests vars #} -{% macro return_config_var(var_name) %} - {{ elementary.edr_log(elementary.get_config_var(var_name)) }} -{% endmacro %} diff --git a/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql b/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql deleted file mode 100644 index b68b50d5b..000000000 --- a/tests/e2e_dbt_project/macros/unit_tests/test_adapter_specific_macros_have_default_implementation.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% macro test_adapter_specific_macros_have_default_implementation() %} - {% set no_default_macros = [] %} - {% set elementary_macros = elementary.keys() %} - {% for macro in elementary_macros %} - {% set parts = macro.split("__") %} - {% if parts | length == 2 %} - {% set adapter, macro_name = parts %} - {% if macro_name not in no_default_macros and "default__{}".format(macro_name) not in elementary_macros %} - {% do no_default_macros.append(macro_name) %} - {% endif %} - {% endif %} - {% endfor %} - {{ assert_lists_contain_same_items(no_default_macros, [], "no_default_macros") }} -{% endmacro %} From b4f6ea24ac345a19777adfaebf4432316394a5a2 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:33:58 +0300 Subject: [PATCH 04/11] test-warehouse: change CI to use a static dbt project rather than the deprecated package tests --- .github/workflows/test-warehouse.yml | 56 +++++++++++++++------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 74e5d0724..4d6a85920 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -26,16 +26,6 @@ on: type: string required: false description: dbt's version to test with - should-run-tests: - type: boolean - required: false - default: true - description: Whether to run E2E tests - clear-tests: - type: boolean - required: false - default: true - description: Whether to clean test environment generate-data: type: boolean required: false @@ -47,10 +37,6 @@ on: warehouse-type: type: string required: true - should-run-tests: - type: boolean - required: false - default: true elementary-ref: type: string required: false @@ -60,10 +46,6 @@ on: dbt-version: type: string required: false - clear-tests: - type: boolean - required: false - default: true generate-data: type: boolean required: false @@ -71,7 +53,6 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - DBT_PKG_INTEG_TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests/deprecated_tests ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project jobs: @@ -170,12 +151,38 @@ jobs: rm -rf "$DBT_PKGS_PATH/elementary" ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" - - name: Run dbt package integration tests - if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests - working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + - name: Seed e2e dbt project + working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + if: inputs.warehouse-type == 'postgres' || inputs.generate-data run: | - dbt deps - python run_e2e_tests.py -t "${{ inputs.warehouse-type }}" -g "${{ inputs.warehouse-type == 'postgres' || inputs.generate-data }}" --clear-tests "${{ inputs.clear-tests }}" + dbt seed --target "${{ inputs.warehouse-type }}" + + - name: Run e2e dbt project + working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + run: | + dbt run --target "${{ inputs.warehouse-type }}" || true + + # Validate run_results.json: only error_model should be non-success + jq -e ' + [.results[] | select(.status != "success") | .unique_id] + | length == 1 and .[0] == "model.elementary_integration_tests.error_model" + ' target/run_results.json > /dev/null + jq_exit=$? + + if [ $jq_exit -eq 0 ]; then + echo "✅ Validation passed: only error_model failed." + else + echo "❌ Validation failed. Unexpected failures:" + jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json + fi + + exit $jq_exit + + - name: Test e2e dbt project + working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + continue-on-error: true + run: | + dbt test --target "${{ inputs.warehouse-type }}" - name: Run help run: edr --help @@ -259,5 +266,4 @@ jobs: path: elementary/edr_target/edr.log - name: Run Python package e2e tests - if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }} From 4bbf70bf453de13dc9f733a2340e4e875052267f Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:42:33 +0300 Subject: [PATCH 05/11] fixes --- .github/workflows/test-warehouse.yml | 17 ++++++------ tests/e2e_dbt_project/docker-compose.yml | 33 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 tests/e2e_dbt_project/docker-compose.yml diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 4d6a85920..ff231fd7b 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -54,6 +54,7 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project + E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project jobs: # PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets. @@ -98,12 +99,12 @@ jobs: - name: Start Postgres if: inputs.warehouse-type == 'postgres' - working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} run: docker compose up -d postgres # - name: Start Clickhouse # if: inputs.warehouse-type == 'clickhouse' - # working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }} + # working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} # run: docker compose up -d clickhouse - name: Setup Python @@ -152,13 +153,13 @@ jobs: ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" - name: Seed e2e dbt project - working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} if: inputs.warehouse-type == 'postgres' || inputs.generate-data run: | dbt seed --target "${{ inputs.warehouse-type }}" - name: Run e2e dbt project - working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} run: | dbt run --target "${{ inputs.warehouse-type }}" || true @@ -179,7 +180,7 @@ jobs: exit $jq_exit - name: Test e2e dbt project - working-directory: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} continue-on-error: true run: | dbt test --target "${{ inputs.warehouse-type }}" @@ -194,7 +195,7 @@ jobs: edr monitor -t "${{ inputs.warehouse-type }}" --group-by table - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" --slack-webhook "$SLACK_WEBHOOK" @@ -208,7 +209,7 @@ jobs: run: > edr monitor report -t "${{ inputs.warehouse-type }}" - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" - name: Set report artifact name @@ -237,7 +238,7 @@ jobs: run: > edr monitor send-report -t "${{ inputs.warehouse-type }}" - --project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}" + --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" --project-profile-target "${{ inputs.warehouse-type }}" --slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html" --slack-token "$SLACK_TOKEN" diff --git a/tests/e2e_dbt_project/docker-compose.yml b/tests/e2e_dbt_project/docker-compose.yml new file mode 100644 index 000000000..6bdc21ad5 --- /dev/null +++ b/tests/e2e_dbt_project/docker-compose.yml @@ -0,0 +1,33 @@ +version: "3.8" + +services: + postgres: + image: postgres + ports: + - "127.0.0.1:5432:5432" + command: postgres -c max_connections=500 + environment: + POSTGRES_USER: admin + POSTGRES_PASSWORD: admin + volumes: + - postgres:/var/lib/postgresql/data + + clickhouse: + image: clickhouse/clickhouse-server:latest + container_name: clickhouse + ports: + - "8123:8123" + - "9000:9000" + volumes: + - ./clickhouse-data:/var/lib/clickhouse + environment: + CLICKHOUSE_DB: default + CLICKHOUSE_USER: default + CLICKHOUSE_PASSWORD: "default" + ulimits: + nofile: + soft: 262144 + hard: 262144 + +volumes: + postgres: From 9aa8b5e92f09e9904522f27247e84aee30d3e6e4 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:45:04 +0300 Subject: [PATCH 06/11] bugfix --- .github/workflows/test-warehouse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index ff231fd7b..7d7d5bb46 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -54,7 +54,7 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project - E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary-cli/tests/e2e_dbt_project + E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project jobs: # PRs from forks require approval, specifically with the "pull_request_target" event as it contains repo secrets. From 6c4c4eb6610eddbe813bf998e3efc3bd6ffdf2be Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:50:28 +0300 Subject: [PATCH 07/11] add deps --- .github/workflows/test-warehouse.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 7d7d5bb46..9f34b09ff 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -152,6 +152,11 @@ jobs: rm -rf "$DBT_PKGS_PATH/elementary" ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" + - name: Run deps for E2E dbt project + working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + run: | + dbt deps + - name: Seed e2e dbt project working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} if: inputs.warehouse-type == 'postgres' || inputs.generate-data From 2b26b2f2a347a2bbb1f5d7e8710f8e55a8a54125 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:56:35 +0300 Subject: [PATCH 08/11] test-warehouse: fix packages for e2e project --- .github/workflows/test-warehouse.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 9f34b09ff..ea6611718 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -53,7 +53,8 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project + ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability + CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project jobs: @@ -154,6 +155,8 @@ jobs: - name: Run deps for E2E dbt project working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} + env: + ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }} run: | dbt deps @@ -205,7 +208,7 @@ jobs: --slack-webhook "$SLACK_WEBHOOK" - name: Validate alerts statuses were updated - working-directory: ${{ env.ELMENTARY_INTERNAL_DBT_PKG_DIR }} + working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} run: | dbt deps dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}" From 7ac3b18c28e708494e481c81796cea34b569f641 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 14:57:47 +0300 Subject: [PATCH 09/11] forgot to update packages --- tests/e2e_dbt_project/packages.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/e2e_dbt_project/packages.yml b/tests/e2e_dbt_project/packages.yml index b74c0ef5a..ea515e82a 100644 --- a/tests/e2e_dbt_project/packages.yml +++ b/tests/e2e_dbt_project/packages.yml @@ -1,10 +1,4 @@ packages: - - local: ../../ + - local: "{{ env_var('ELEMENTARY_DBT_PACKAGE_PATH') }}" - package: dbt-labs/dbt_utils - version: | - {%- set minor_to_utils_range_map = { - "0": [">=0.8.0", "<0.9.0"], - "1": [">=0.8.0", "<0.9.0"], - "2": [">=0.8.0", "<1.0.0"], - } -%} - {{- minor_to_utils_range_map.get(dbt_version.split('.')[1], [">=0.8.0", "<2.0.0"]) -}} + version: [">=0.8.0", "<2.0.0"] From 02b3002647d0a473939f731f2175410e5bb40623 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 17:02:40 +0300 Subject: [PATCH 10/11] generate data before seeding --- .github/workflows/test-warehouse.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index ea6611718..46195bcd1 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -164,6 +164,7 @@ jobs: working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} if: inputs.warehouse-type == 'postgres' || inputs.generate-data run: | + python generate_data.py dbt seed --target "${{ inputs.warehouse-type }}" - name: Run e2e dbt project From 8ce421eee3849fb36c19742bd1df9661286f4c75 Mon Sep 17 00:00:00 2001 From: Itamar Hartstein Date: Wed, 8 Oct 2025 17:12:48 +0300 Subject: [PATCH 11/11] full refresh seeds --- .github/workflows/test-warehouse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 46195bcd1..0f00f7452 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -165,7 +165,7 @@ jobs: if: inputs.warehouse-type == 'postgres' || inputs.generate-data run: | python generate_data.py - dbt seed --target "${{ inputs.warehouse-type }}" + dbt seed -f --target "${{ inputs.warehouse-type }}" - name: Run e2e dbt project working-directory: ${{ env.E2E_DBT_PROJECT_DIR }}