diff --git a/.github/workflows/py-cli-e2e-tests.yml b/.github/workflows/py-cli-e2e-tests.yml index e8b9fbf4497d..09736418cb6a 100644 --- a/.github/workflows/py-cli-e2e-tests.yml +++ b/.github/workflows/py-cli-e2e-tests.yml @@ -18,7 +18,7 @@ on: e2e-tests: description: "E2E Tests to run" required: True - default: '["bigquery", "dbt_redshift", "metabase", "mssql", "mysql", "redash", "snowflake", "tableau", "python-unittests", "python-integration", "redshift", "quicksight", "datalake_s3", "postgres", "oracle", "athena", "bigquery_multiple_project"]' + default: '["bigquery", "dbt_redshift", "metabase", "mssql", "mysql", "redash", "snowflake", "tableau", "python-unittests", "python-integration", "redshift", "quicksight", "datalake_s3", "postgres", "oracle", "athena", "bigquery_multiple_project", "exasol"]' debug: description: "If Debugging the Pipeline, Slack and Sonar events won't be triggered [default, true or false]. Default will trigger only on main branch." required: False @@ -45,7 +45,7 @@ jobs: strategy: fail-fast: false matrix: - e2e-test: ${{ fromJSON(inputs.e2e-tests || '["bigquery", "dbt_redshift", "metabase", "mssql", "mysql", "redash", "snowflake", "tableau", "python-unittests", "python-integration", "redshift", "quicksight", "datalake_s3", "postgres", "oracle", "athena", "bigquery_multiple_project"]') }} + e2e-test: ${{ fromJSON(inputs.e2e-tests || '["bigquery", "dbt_redshift", "metabase", "mssql", "mysql", "redash", "snowflake", "tableau", "python-unittests", "python-integration", "redshift", "quicksight", "datalake_s3", "postgres", "oracle", "athena", "bigquery_multiple_project", "exasol"]') }} environment: test steps: diff --git a/ingestion/setup.py b/ingestion/setup.py index 1103b01a2bdd..3be8ced71f45 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -279,7 +279,7 @@ "opensearch": {VERSIONS["opensearch"]}, "exasol": { "sqlalchemy_exasol>=6,<7", - "exasol-integration-test-docker-environment>=3.1.0,<4", + "exasol-integration-test-docker-environment>=6.0.0,<7", }, "glue": {VERSIONS["boto3"]}, "great-expectations": {VERSIONS["great-expectations"]}, diff --git a/ingestion/src/metadata/ingestion/source/database/exasol/metadata.py b/ingestion/src/metadata/ingestion/source/database/exasol/metadata.py index 01ca400c0e53..7f1479c41751 100644 --- a/ingestion/src/metadata/ingestion/source/database/exasol/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/exasol/metadata.py @@ -1,6 +1,7 @@ from typing import Optional, cast from sqlalchemy.engine.reflection import Inspector +from sqlalchemy_exasol.base import EXADialect from metadata.generated.schema.entity.services.connections.database.exasolConnection import ( ExasolConnection, @@ -11,10 +12,17 @@ from metadata.ingestion.api.steps import InvalidSourceException from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.common_db_source import CommonDbSourceService -from metadata.utils.sqlalchemy_utils import get_all_table_ddls, get_table_ddl +from metadata.ingestion.source.database.exasol.sqla_utils import get_table_comment +from metadata.utils.sqlalchemy_utils import ( + get_all_table_comments, + get_all_table_ddls, + get_table_ddl, +) Inspector.get_all_table_ddls = get_all_table_ddls Inspector.get_table_ddl = get_table_ddl +EXADialect.get_table_comment = get_table_comment +EXADialect.get_all_table_comments = get_all_table_comments class ExasolSource(CommonDbSourceService): diff --git a/ingestion/src/metadata/ingestion/source/database/exasol/queries.py b/ingestion/src/metadata/ingestion/source/database/exasol/queries.py index 9981af36fb04..f61fefa3596c 100644 --- a/ingestion/src/metadata/ingestion/source/database/exasol/queries.py +++ b/ingestion/src/metadata/ingestion/source/database/exasol/queries.py @@ -20,7 +20,8 @@ """ # noqa: W291 ) -EXASOL_TEST_GET_QUERIES = """ +EXASOL_TEST_GET_QUERIES = textwrap.dedent( + """ SELECT s.sql_text, s.command_name, @@ -33,3 +34,15 @@ ON s.SESSION_ID = se.SESSION_ID LIMIT 1 """ +) + +EXASOL_GET_TABLE_COMMENTS = textwrap.dedent( + """ + SELECT + root_name AS "schema", + object_name AS "table_name", + object_comment AS "table_comment" + FROM EXA_ALL_OBJECTS + WHERE object_type IN ('TABLE', 'VIEW') +""" +) diff --git a/ingestion/src/metadata/ingestion/source/database/exasol/sqla_utils.py b/ingestion/src/metadata/ingestion/source/database/exasol/sqla_utils.py new file mode 100644 index 000000000000..7daacb802316 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/exasol/sqla_utils.py @@ -0,0 +1,15 @@ +from sqlalchemy.engine import reflection + +from metadata.ingestion.source.database.exasol.queries import EXASOL_GET_TABLE_COMMENTS +from metadata.utils.sqlalchemy_utils import get_table_comment_wrapper + + +@reflection.cache +def get_table_comment(self, connection, table_name, schema=None, **kw): + return get_table_comment_wrapper( + self, + connection, + query=EXASOL_GET_TABLE_COMMENTS, + table_name=table_name.upper(), + schema=schema.upper() if schema else None, + ) diff --git a/ingestion/tests/cli_e2e/database/exasol/exasol.yaml b/ingestion/tests/cli_e2e/database/exasol/exasol.yaml index daf24f386008..4b4b34cfb1eb 100644 --- a/ingestion/tests/cli_e2e/database/exasol/exasol.yaml +++ b/ingestion/tests/cli_e2e/database/exasol/exasol.yaml @@ -7,7 +7,7 @@ source: username: sys password: exasol hostPort: localhost:8563 - tls: disable-tls + tls: ignore-certificate connectionOptions: {} connectionArguments: {} sourceConfig: diff --git a/ingestion/tests/cli_e2e/test_cli_exasol.py b/ingestion/tests/cli_e2e/test_cli_exasol.py index c48c0e922822..c217622f4d46 100644 --- a/ingestion/tests/cli_e2e/test_cli_exasol.py +++ b/ingestion/tests/cli_e2e/test_cli_exasol.py @@ -28,10 +28,19 @@ TABLE_NAME = "datatypes" VIEW_NAME = f"view_{TABLE_NAME}" DB_PORT = 8563 -DB_VERSION = "7.1.26" +# The compressed size of this image is 3.23 GB, so it takes on the order of minutes +# to pull it. +DB_VERSION = "2025.1.8" CONTAINER_SUFFIX = "exasoaddl" CONTAINER_NAME = f"db_container_{CONTAINER_SUFFIX}" +VANILLA_INGESTION_SKIP_REASON = """ +There are currently issues with this test, likely related to how OpenMetadata relies +upon certain SQLAlchemy functions, which seem not to be defined yet for Exasol. +This leads in the UI to warnings with a basic ingestion setup, but here, in this test, +this leads to larger problems. This will be investigated and resolved. +""" + class ExasolCliTest(CliCommonDB.TestSuite, SQACommonMethods): """ @@ -41,7 +50,7 @@ class ExasolCliTest(CliCommonDB.TestSuite, SQACommonMethods): create_table_query: str = f""" CREATE TABLE IF NOT EXISTS {SCHEMA_NAME}.{TABLE_NAME} ( col_boolean BOOLEAN, - col_decimal DECIMAL(18,0), + col_decimal DOUBLE PRECISION, col_date DATE, col_timestamp TIMESTAMP, col_timestamp_local TIMESTAMP WITH LOCAL TIME ZONE, @@ -52,8 +61,15 @@ class ExasolCliTest(CliCommonDB.TestSuite, SQACommonMethods): create_view_query: str = f""" CREATE VIEW {SCHEMA_NAME}.{VIEW_NAME} AS - SELECT * - FROM {SCHEMA_NAME}.{TABLE_NAME} + SELECT + col_boolean, + col_decimal, + col_date, + col_timestamp, + col_timestamp_local, + col_char, + col_varchar + FROM {SCHEMA_NAME}.{TABLE_NAME} """ insert_data_queries: List[str] = [ # noqa: RUF012, UP006 @@ -77,7 +93,28 @@ class ExasolCliTest(CliCommonDB.TestSuite, SQACommonMethods): @classmethod def setUpClass(cls): - subprocess.run( # noqa: PLW1510 + """ + To run the Exasol tests, we use the Integration Test Docker Environment (ITDE) + package. By default, this pulls an Exasol Database Docker image of the + requested version. However, to reduce confusion and make it clearer what is + leading to an issue, we have added to the setup that the Docker image is pulled + first and in a separate command. + + The ITDE includes configuration files for each Exasol Database Docker image. + Thus, there is unfortunately a tight coupling between the ITDE version + you are using and the Docker image you can use. Over time, Exasol may drop + support of certain Docker images, like the one used in this test, if the + tests break and assistance is needed due to that, please reach out to us at + opensource@exasol.com or open an issue in the ITDE at + https://github.com/exasol/integration-test-docker-environment. + For example, a mismatch in ITDE and Docker image would lead to an error like + this when the "itde spawn-test-environment" were run: + FileNotFoundError: [Errno 2] No such file or directory: + '$HOME/OpenMetadata/venv/lib/python3.11/site-packages/exasol_integration_test_docker_environment/docker_db_config/2025.2.1/init_db.sh' + """ + + subprocess.run(["docker", "pull", f"exasol/docker-db:{DB_VERSION}"], check=True) + subprocess.run( [ "itde", "spawn-test-environment", @@ -91,12 +128,13 @@ def setUpClass(cls): DB_VERSION, "--db-mem-size", "4GB", - ] + ], + check=True, ) super().setUpClass() with cls.engine.connect() as connection: connection.execute(text(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}")) - connection.execute(text(f"CREATE SCHEMA IF NOT EXISTS IGNORE_SCHEMA")) # noqa: F541 + connection.execute(text("CREATE SCHEMA IF NOT EXISTS IGNORE_SCHEMA")) connection.execute(text(cls.create_table_query)) connection.execute( text(f"CREATE OR REPLACE TABLE {SCHEMA_NAME}.IGNORE_TABLE AS SELECT * FROM {SCHEMA_NAME}.{TABLE_NAME}") @@ -155,7 +193,7 @@ def expected_sample_size(self) -> int: return len(self.insert_data_queries) def view_column_lineage_count(self) -> int: - return 22 + return 7 def expected_lineage_node(self) -> str: return f"{SERVICE_NAME}.default.{SCHEMA_NAME}.{VIEW_NAME}"