diff --git a/ingestion/src/metadata/ingestion/source/database/hive/connection.py b/ingestion/src/metadata/ingestion/source/database/hive/connection.py index 911513e22476..507811b10337 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/connection.py @@ -12,6 +12,7 @@ """ Source connection handler """ + from copy import deepcopy from enum import Enum from functools import singledispatch @@ -28,9 +29,15 @@ HiveConnection, HiveScheme, ) +from metadata.generated.schema.entity.services.connections.database.mssqlConnection import ( + MssqlConnection, +) from metadata.generated.schema.entity.services.connections.database.mysqlConnection import ( MysqlConnection, ) +from metadata.generated.schema.entity.services.connections.database.oracleConnection import ( + OracleConnection, +) from metadata.generated.schema.entity.services.connections.database.postgresConnection import ( PostgresConnection, ) @@ -56,6 +63,8 @@ HIVE_POSTGRES_SCHEME = "hive+postgres" HIVE_MYSQL_SCHEME = "hive+mysql" +HIVE_MSSQL_SCHEME = "hive+mssql" +HIVE_ORACLE_SCHEME = "hive+oracle" # Monkey-patch the pyhive.hive module to use our custom connection import pyhive.hive @@ -203,6 +212,61 @@ class CustomMysqlConnection(MysqlConnection): ) +@get_metastore_connection.register +def _(connection: MssqlConnection): + # import required to load sqlalchemy plugin + # pylint: disable=import-outside-toplevel,unused-import + from metadata.ingestion.source.database.hive.metastore_dialects.mssql import ( # nopycln: import + HiveMssqlMetaStoreDialect, + ) + + class CustomMssqlScheme(Enum): + HIVE_MSSQL = HIVE_MSSQL_SCHEME + + class CustomMssqlConnection(MssqlConnection): + scheme: Optional[CustomMssqlScheme] + + connection_copy = deepcopy(connection.__dict__) + connection_copy["scheme"] = CustomMssqlScheme.HIVE_MSSQL + + custom_connection = CustomMssqlConnection(**connection_copy) + + return create_generic_db_connection( + connection=custom_connection, + get_connection_url_fn=get_connection_url_common, + get_connection_args_fn=get_connection_args_common, + ) + + +@get_metastore_connection.register +def _(connection: OracleConnection): + # import required to load sqlalchemy plugin + # pylint: disable=import-outside-toplevel,unused-import + from metadata.ingestion.source.database.hive.metastore_dialects.oracle import ( # nopycln: import + HiveOracleMetaStoreDialect, + ) + from metadata.ingestion.source.database.oracle.connection import ( + OracleConnection as OracleConnectionHandler, + ) + + class CustomOracleScheme(Enum): + HIVE_ORACLE = HIVE_ORACLE_SCHEME + + class CustomOracleConnection(OracleConnection): + scheme: Optional[CustomOracleScheme] + + connection_copy = deepcopy(connection.__dict__) + connection_copy["scheme"] = CustomOracleScheme.HIVE_ORACLE + + custom_connection = CustomOracleConnection(**connection_copy) + + return create_generic_db_connection( + connection=custom_connection, + get_connection_url_fn=OracleConnectionHandler.get_connection_url, + get_connection_args_fn=get_connection_args_common, + ) + + def test_connection( metadata: OpenMetadata, engine: Engine, @@ -218,20 +282,27 @@ def test_connection( metastore_conn = service_connection.metastoreConnection if metastore_conn: - if isinstance(metastore_conn, (PostgresConnection, MysqlConnection)): + if isinstance( + metastore_conn, + (PostgresConnection, MysqlConnection, MssqlConnection, OracleConnection), + ): engine = get_metastore_connection(metastore_conn) elif isinstance(metastore_conn, dict) and len(metastore_conn) > 0: - try: - service_connection.metastoreConnection = ( - PostgresConnection.model_validate(metastore_conn) - ) - except ValidationError: + for conn_cls in ( + PostgresConnection, + MysqlConnection, + MssqlConnection, + OracleConnection, + ): try: - service_connection.metastoreConnection = ( - MysqlConnection.model_validate(metastore_conn) + service_connection.metastoreConnection = conn_cls.model_validate( + metastore_conn ) + break except ValidationError: - raise ValueError("Invalid metastore connection") + continue + else: + raise ValueError("Invalid metastore connection") engine = get_metastore_connection(service_connection.metastoreConnection) return test_connection_db_schema_sources( diff --git a/ingestion/src/metadata/ingestion/source/database/hive/lineage.py b/ingestion/src/metadata/ingestion/source/database/hive/lineage.py index 3ee9594652c7..fc2128a9461f 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/lineage.py @@ -11,6 +11,7 @@ """ Hive lineage module """ + from typing import Optional from metadata.generated.schema.entity.services.connections.database.hiveConnection import ( diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metadata.py b/ingestion/src/metadata/ingestion/source/database/hive/metadata.py index 7ea1fbb4bec5..9e30efb256ca 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metadata.py @@ -24,9 +24,15 @@ from metadata.generated.schema.entity.services.connections.database.hiveConnection import ( HiveConnection, ) +from metadata.generated.schema.entity.services.connections.database.mssqlConnection import ( + MssqlConnection, +) from metadata.generated.schema.entity.services.connections.database.mysqlConnection import ( MysqlConnection, ) +from metadata.generated.schema.entity.services.connections.database.oracleConnection import ( + OracleConnection, +) from metadata.generated.schema.entity.services.connections.database.postgresConnection import ( PostgresConnection, ) @@ -84,7 +90,9 @@ def _parse_version(self, version: str) -> Tuple: def _get_validated_metastore_connection( self, - ) -> Optional[Union[PostgresConnection, MysqlConnection]]: + ) -> Optional[ + Union[PostgresConnection, MysqlConnection, MssqlConnection, OracleConnection] + ]: """ Validate and return the metastore connection if it exists. Handles cases where the connection may be a raw dict that needs validation. @@ -94,18 +102,25 @@ def _get_validated_metastore_connection( if not metastore_conn: return None - if isinstance(metastore_conn, (PostgresConnection, MysqlConnection)): + # Supported metastore connection types + METASTORE_CONNECTION_TYPES = ( + PostgresConnection, + MysqlConnection, + MssqlConnection, + OracleConnection, + ) + + if isinstance(metastore_conn, METASTORE_CONNECTION_TYPES): return metastore_conn if isinstance(metastore_conn, dict) and len(metastore_conn) > 0: - try: - return PostgresConnection.model_validate(metastore_conn) - except ValidationError: + for conn_cls in METASTORE_CONNECTION_TYPES: try: - return MysqlConnection.model_validate(metastore_conn) + return conn_cls.model_validate(metastore_conn) except ValidationError: - logger.warning("Invalid metastore connection configuration") - return None + continue + logger.warning("Invalid metastore connection configuration") + return None return None diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mixin.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mixin.py index ef4e0b72f0c6..2502d7354de7 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mixin.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mixin.py @@ -11,6 +11,7 @@ """ Hive Metastore Dialect Mixin """ + from sqlalchemy.engine import reflection from metadata.ingestion.source.database.hive.utils import get_columns diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/__init__.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/__init__.py new file mode 100644 index 000000000000..c82c6316e979 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Hive Metastore MSSQL Dialect +""" + +from sqlalchemy.dialects import registry + +from .dialect import HiveMssqlMetaStoreDialect + +__version__ = "0.1.0" +__all__ = ["HiveMssqlMetaStoreDialect"] +registry.register( + "hive.mssql", + "metadata.ingestion.source.database.hive.metastore_dialects.mssql.dialect", + "HiveMssqlMetaStoreDialect", +) diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/dialect.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/dialect.py new file mode 100644 index 000000000000..7febf80dc97a --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mssql/dialect.py @@ -0,0 +1,156 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Hive Metastore MSSQL Dialect Mixin +""" + +from sqlalchemy import text +from sqlalchemy.dialects.mssql.base import MSDialect +from sqlalchemy.engine import reflection + +from metadata.ingestion.source.database.hive.metastore_dialects.mixin import ( + HiveMetaStoreDialectMixin, +) +from metadata.utils.logger import ingestion_logger +from metadata.utils.sqlalchemy_utils import ( + get_table_comment_wrapper, + get_view_definition_wrapper, +) + +logger = ingestion_logger() + + +# pylint: disable=abstract-method +class HiveMssqlMetaStoreDialect(HiveMetaStoreDialectMixin, MSDialect): + """ + MSSQL metastore dialect class for Hive metastore backed by SQL Server. + Uses unquoted identifiers and supports CTEs. + """ + + name = "hive" + driver = "mssql" + supports_statement_cache = False + + def get_schema_names(self, connection, **kw): + # Equivalent to SHOW DATABASES + schema_names = [ + row[0] for row in connection.execute(text("SELECT NAME FROM DBS")) + ] + logger.debug(f"Fetched schema names: {schema_names}") + return schema_names + + # pylint: disable=arguments-differ + def get_view_names(self, connection, schema=None, **kw): + query, params = self._get_table_names_base_query(schema=schema) + query += " WHERE TBL_TYPE = 'VIRTUAL_VIEW'" + view_names = [row[0] for row in connection.execute(text(query), params)] + logger.debug(f"Fetched view names for schema '{schema}': {view_names}") + return view_names + + def _get_table_columns(self, connection, table_name, schema): + params = {"table_name": table_name} + schema_join = ( + """ + JOIN DBS db ON tbsl.DB_ID = db.DB_ID + AND db.NAME = :schema + """ + if schema + else "" + ) + if schema: + params["schema"] = schema + + query = f""" + WITH regular_columns AS ( + SELECT + col.COLUMN_NAME, + col.TYPE_NAME, + col.COMMENT + FROM COLUMNS_V2 col + JOIN CDS cds ON col.CD_ID = cds.CD_ID + JOIN SDS sds ON sds.CD_ID = cds.CD_ID + JOIN TBLS tbsl ON sds.SD_ID = tbsl.SD_ID + AND tbsl.TBL_NAME = :table_name + {schema_join} + ), + partition_columns AS ( + SELECT + pk.PKEY_NAME AS COLUMN_NAME, + pk.PKEY_TYPE AS TYPE_NAME, + pk.PKEY_COMMENT AS COMMENT + FROM PARTITION_KEYS pk + JOIN TBLS tbsl ON pk.TBL_ID = tbsl.TBL_ID + AND tbsl.TBL_NAME = :table_name + {schema_join} + ) + SELECT * FROM regular_columns + UNION ALL + SELECT * FROM partition_columns + """ + return connection.execute(text(query), params).fetchall() + + def _get_table_names_base_query(self, schema=None): + query = "SELECT TBL_NAME FROM TBLS tbl" + params = {} + if schema: + query += " JOIN DBS db ON tbl.DB_ID = db.DB_ID AND db.NAME = :schema" + params["schema"] = schema + return query, params + + def get_table_names(self, connection, schema=None, **kw): + query, params = self._get_table_names_base_query(schema=schema) + query += " WHERE (TBL_TYPE != 'VIRTUAL_VIEW' OR TBL_TYPE IS NULL)" + table_names = [row[0] for row in connection.execute(text(query), params)] + logger.debug(f"Fetched table names for schema '{schema}': {table_names}") + return table_names + + @reflection.cache + def get_view_definition(self, connection, view_name, schema=None, **kw): + query = """ + SELECT + dbs.NAME AS [schema], + tbls.TBL_NAME AS view_name, + tbls.VIEW_ORIGINAL_TEXT AS view_def + FROM TBLS tbls + JOIN DBS dbs ON tbls.DB_ID = dbs.DB_ID + WHERE tbls.VIEW_ORIGINAL_TEXT IS NOT NULL + """ + return get_view_definition_wrapper( + self, + connection, + table_name=view_name, + schema=schema, + query=query, + ) + + @reflection.cache + def get_table_comment(self, connection, table_name, schema=None, **kw): + query = """ + SELECT + DBS.NAME AS [schema], + TBLS.TBL_NAME AS table_name, + TABLE_PARAMS.PARAM_VALUE AS table_comment + FROM DBS + JOIN TBLS ON DBS.DB_ID = TBLS.DB_ID + LEFT JOIN TABLE_PARAMS ON TBLS.TBL_ID = TABLE_PARAMS.TBL_ID + AND TABLE_PARAMS.PARAM_KEY = 'comment' + """ + return get_table_comment_wrapper( + self, + connection, + table_name=table_name, + schema=schema, + query=query, + ) + + # pylint: disable=arguments-renamed + def get_dialect_cls(self): + return HiveMssqlMetaStoreDialect diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/__init__.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/__init__.py index 16bb9e015916..30e23a3c2096 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/__init__.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/__init__.py @@ -11,6 +11,7 @@ """ Hive Metastore Mysql Dialect """ + from sqlalchemy.dialects import registry from .dialect import HiveMysqlMetaStoreDialect diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/dialect.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/dialect.py index fe754cd37ae7..57223208e7ec 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/dialect.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/mysql/dialect.py @@ -11,6 +11,7 @@ """ Hive Metastore Mysql Dialect """ + from sqlalchemy import text from sqlalchemy.dialects.mysql.pymysql import MySQLDialect_pymysql from sqlalchemy.engine import reflection @@ -48,21 +49,24 @@ def get_schema_names(self, connection, **kw): def get_view_names(self, connection, schema=None, **kw): # Hive does not provide functionality to query tableType # This allows reflection to not crash at the cost of being inaccurate - query = self._get_table_names_base_query(schema=schema) + query, params = self._get_table_names_base_query(schema=schema) query += """ WHERE TBL_TYPE = 'VIRTUAL_VIEW'""" - view_names = [row[0] for row in connection.execute(text(query))] + view_names = [row[0] for row in connection.execute(text(query), params)] logger.debug(f"Fetched view names for schema '{schema}': {view_names}") return view_names def _get_table_columns(self, connection, table_name, schema): + params = {"table_name": table_name} schema_join = ( - f""" + """ JOIN DBS db on tbsl.DB_ID = db.DB_ID - AND db.NAME = '{schema}' + AND db.NAME = :schema """ if schema else "" ) + if schema: + params["schema"] = schema # Rewritten to avoid CTE syntax for MySQL < 8.0 compatibility # Using direct UNION ALL of subqueries instead of WITH clause @@ -75,7 +79,7 @@ def _get_table_columns(self, connection, table_name, schema): JOIN CDS cds ON col.CD_ID = cds.CD_ID JOIN SDS sds ON sds.CD_ID = cds.CD_ID JOIN TBLS tbsl ON sds.SD_ID = tbsl.SD_ID - AND tbsl.TBL_NAME = '{table_name}' + AND tbsl.TBL_NAME = :table_name {schema_join} UNION ALL SELECT @@ -84,23 +88,25 @@ def _get_table_columns(self, connection, table_name, schema): pk.PKEY_COMMENT as COMMENT FROM PARTITION_KEYS pk JOIN TBLS tbsl ON pk.TBL_ID = tbsl.TBL_ID - AND tbsl.TBL_NAME = '{table_name}' + AND tbsl.TBL_NAME = :table_name {schema_join} """ - return connection.execute(text(query)).fetchall() + return connection.execute(text(query), params).fetchall() def _get_table_names_base_query(self, schema=None): query = "SELECT TBL_NAME from TBLS tbl" + params = {} if schema: - query += f""" JOIN DBS db on tbl.DB_ID = db.DB_ID - and db.NAME = '{schema}'""" - return query + query += """ JOIN DBS db on tbl.DB_ID = db.DB_ID + and db.NAME = :schema""" + params["schema"] = schema + return query, params def get_table_names(self, connection, schema=None, **kw): - query = self._get_table_names_base_query(schema=schema) + query, params = self._get_table_names_base_query(schema=schema) query += """ WHERE (TBL_TYPE != 'VIRTUAL_VIEW' OR TBL_TYPE IS NULL)""" - table_names = [row[0] for row in connection.execute(text(query))] + table_names = [row[0] for row in connection.execute(text(query), params)] logger.debug(f"Fetched table names for schema '{schema}': {table_names}") return table_names diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/__init__.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/__init__.py new file mode 100644 index 000000000000..7b9a3ebf640a --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Hive Metastore Oracle Dialect +""" + +from sqlalchemy.dialects import registry + +from .dialect import HiveOracleMetaStoreDialect + +__version__ = "0.1.0" +__all__ = ["HiveOracleMetaStoreDialect"] +registry.register( + "hive.oracle", + "metadata.ingestion.source.database.hive.metastore_dialects.oracle.dialect", + "HiveOracleMetaStoreDialect", +) diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/dialect.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/dialect.py new file mode 100644 index 000000000000..a2cc0bb8ac72 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/oracle/dialect.py @@ -0,0 +1,157 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Hive Metastore Oracle Dialect Mixin +""" + +from sqlalchemy import text +from sqlalchemy.dialects.oracle.cx_oracle import OracleDialect_cx_oracle +from sqlalchemy.engine import reflection + +from metadata.ingestion.source.database.hive.metastore_dialects.mixin import ( + HiveMetaStoreDialectMixin, +) +from metadata.utils.logger import ingestion_logger +from metadata.utils.sqlalchemy_utils import ( + get_table_comment_wrapper, + get_view_definition_wrapper, +) + +logger = ingestion_logger() + + +# pylint: disable=abstract-method +class HiveOracleMetaStoreDialect(HiveMetaStoreDialectMixin, OracleDialect_cx_oracle): + """ + Oracle metastore dialect class for Hive metastore backed by Oracle Database. + Uses double-quote identifiers compatible with Oracle and supports CTEs. + """ + + name = "hive" + driver = "oracle" + supports_statement_cache = False + + def get_schema_names(self, connection, **kw): + # Equivalent to SHOW DATABASES + schema_names = [ + row[0] for row in connection.execute(text('SELECT "NAME" FROM "DBS"')) + ] + logger.debug(f"Fetched schema names: {schema_names}") + return schema_names + + # pylint: disable=arguments-differ + def get_view_names(self, connection, schema=None, **kw): + query, params = self._get_table_names_base_query(schema=schema) + query += """ WHERE "TBL_TYPE" = 'VIRTUAL_VIEW'""" + view_names = [row[0] for row in connection.execute(text(query), params)] + logger.debug(f"Fetched view names for schema '{schema}': {view_names}") + return view_names + + def _get_table_columns(self, connection, table_name, schema): + params = {"table_name": table_name} + schema_join = ( + """ + JOIN "DBS" db ON tbsl."DB_ID" = db."DB_ID" + AND db."NAME" = :schema + """ + if schema + else "" + ) + if schema: + params["schema"] = schema + + query = f""" + WITH regular_columns AS ( + SELECT + col."COLUMN_NAME", + col."TYPE_NAME", + col."COMMENT" + FROM "COLUMNS_V2" col + JOIN "CDS" cds ON col."CD_ID" = cds."CD_ID" + JOIN "SDS" sds ON sds."CD_ID" = cds."CD_ID" + JOIN "TBLS" tbsl ON sds."SD_ID" = tbsl."SD_ID" + AND tbsl."TBL_NAME" = :table_name + {schema_join} + ), + partition_columns AS ( + SELECT + pk."PKEY_NAME" AS "COLUMN_NAME", + pk."PKEY_TYPE" AS "TYPE_NAME", + pk."PKEY_COMMENT" AS "COMMENT" + FROM "PARTITION_KEYS" pk + JOIN "TBLS" tbsl ON pk."TBL_ID" = tbsl."TBL_ID" + AND tbsl."TBL_NAME" = :table_name + {schema_join} + ) + SELECT * FROM regular_columns + UNION ALL + SELECT * FROM partition_columns + """ + return connection.execute(text(query), params).fetchall() + + def _get_table_names_base_query(self, schema=None): + query = 'SELECT "TBL_NAME" FROM "TBLS" tbl' + params = {} + if schema: + query += """ JOIN "DBS" db ON tbl."DB_ID" = db."DB_ID" + AND db."NAME" = :schema""" + params["schema"] = schema + return query, params + + def get_table_names(self, connection, schema=None, **kw): + query, params = self._get_table_names_base_query(schema=schema) + query += """ WHERE ("TBL_TYPE" != 'VIRTUAL_VIEW' OR "TBL_TYPE" IS NULL)""" + table_names = [row[0] for row in connection.execute(text(query), params)] + logger.debug(f"Fetched table names for schema '{schema}': {table_names}") + return table_names + + @reflection.cache + def get_view_definition(self, connection, view_name, schema=None, **kw): + query = """ + SELECT + dbs."NAME" AS "schema", + tbls."TBL_NAME" AS view_name, + tbls."VIEW_ORIGINAL_TEXT" AS view_def + FROM "TBLS" tbls + JOIN "DBS" dbs ON tbls."DB_ID" = dbs."DB_ID" + WHERE tbls."VIEW_ORIGINAL_TEXT" IS NOT NULL + """ + return get_view_definition_wrapper( + self, + connection, + table_name=view_name, + schema=schema, + query=query, + ) + + @reflection.cache + def get_table_comment(self, connection, table_name, schema=None, **kw): + query = """ + SELECT + "DBS"."NAME" AS "schema", + "TBLS"."TBL_NAME" AS table_name, + "TABLE_PARAMS"."PARAM_VALUE" AS table_comment + FROM "DBS" + JOIN "TBLS" ON "DBS"."DB_ID" = "TBLS"."DB_ID" + LEFT JOIN "TABLE_PARAMS" ON "TBLS"."TBL_ID" = "TABLE_PARAMS"."TBL_ID" + AND "TABLE_PARAMS"."PARAM_KEY" = 'comment' + """ + return get_table_comment_wrapper( + self, + connection, + table_name=table_name, + schema=schema, + query=query, + ) + + # pylint: disable=arguments-renamed + def get_dialect_cls(self): + return HiveOracleMetaStoreDialect diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/__init__.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/__init__.py index a98a56223640..21d067c3af4a 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/__init__.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/__init__.py @@ -11,6 +11,7 @@ """ Hive Metastore Postgres Dialect """ + from sqlalchemy.dialects import registry from .dialect import HivePostgresMetaStoreDialect diff --git a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/dialect.py b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/dialect.py index d7960081f155..fca19fcdf09e 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/dialect.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/metastore_dialects/postgres/dialect.py @@ -11,6 +11,7 @@ """ Hive Metastore Postgres Dialect Mixin """ + from sqlalchemy import text from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2 from sqlalchemy.engine import reflection @@ -49,22 +50,25 @@ def get_schema_names(self, connection, **kw): def get_view_names(self, connection, schema=None, **kw): # Hive does not provide functionality to query tableType # This allows reflection to not crash at the cost of being inaccurate - query = self._get_table_names_base_query(schema=schema) + query, params = self._get_table_names_base_query(schema=schema) query += """ WHERE "TBL_TYPE" = 'VIRTUAL_VIEW'""" - view_names = [row[0] for row in connection.execute(text(query))] + view_names = [row[0] for row in connection.execute(text(query), params)] logger.debug(f"Fetched view names for schema '{schema}': {view_names}") return view_names def _get_table_columns(self, connection, table_name, schema): # Build schema join clause if schema is provided + params = {"table_name": table_name} schema_join = ( - f""" + """ JOIN "DBS" db on tbsl."DB_ID" = db."DB_ID" - AND db."NAME" = '{schema}' + AND db."NAME" = :schema """ if schema else "" ) + if schema: + params["schema"] = schema query = f""" WITH regular_columns AS ( @@ -77,7 +81,7 @@ def _get_table_columns(self, connection, table_name, schema): JOIN "CDS" cds ON col."CD_ID" = cds."CD_ID" JOIN "SDS" sds ON sds."CD_ID" = cds."CD_ID" JOIN "TBLS" tbsl ON sds."SD_ID" = tbsl."SD_ID" - AND tbsl."TBL_NAME" = '{table_name}' + AND tbsl."TBL_NAME" = :table_name {schema_join} ), partition_columns AS ( @@ -88,7 +92,7 @@ def _get_table_columns(self, connection, table_name, schema): pk."PKEY_COMMENT" as "COMMENT" FROM "PARTITION_KEYS" pk JOIN "TBLS" tbsl ON pk."TBL_ID" = tbsl."TBL_ID" - AND tbsl."TBL_NAME" = '{table_name}' + AND tbsl."TBL_NAME" = :table_name {schema_join} ) -- Combine regular and partition columns @@ -96,19 +100,21 @@ def _get_table_columns(self, connection, table_name, schema): UNION ALL SELECT * FROM partition_columns """ - return connection.execute(text(query)).fetchall() + return connection.execute(text(query), params).fetchall() def _get_table_names_base_query(self, schema=None): query = 'SELECT "TBL_NAME" from "TBLS" tbl' + params = {} if schema: - query += f""" JOIN "DBS" db on tbl."DB_ID" = db."DB_ID" - and db."NAME" = '{schema}'""" - return query + query += """ JOIN "DBS" db on tbl."DB_ID" = db."DB_ID" + and db."NAME" = :schema""" + params["schema"] = schema + return query, params def get_table_names(self, connection, schema=None, **kw): - query = self._get_table_names_base_query(schema=schema) + query, params = self._get_table_names_base_query(schema=schema) query += """ WHERE ("TBL_TYPE" != 'VIRTUAL_VIEW' OR "TBL_TYPE" IS NULL)""" - table_names = [row[0] for row in connection.execute(text(query))] + table_names = [row[0] for row in connection.execute(text(query), params)] logger.debug(f"Fetched table names for schema '{schema}': {table_names}") return table_names diff --git a/ingestion/src/metadata/ingestion/source/database/hive/utils.py b/ingestion/src/metadata/ingestion/source/database/hive/utils.py index 01310ac7d5af..4e156a848428 100644 --- a/ingestion/src/metadata/ingestion/source/database/hive/utils.py +++ b/ingestion/src/metadata/ingestion/source/database/hive/utils.py @@ -11,6 +11,7 @@ """ Hive source methods. """ + import re from pyhive.sqlalchemy_hive import _type_map diff --git a/ingestion/src/metadata/sampler/processor.py b/ingestion/src/metadata/sampler/processor.py index 312d8eeb2611..0b8e544bcd02 100644 --- a/ingestion/src/metadata/sampler/processor.py +++ b/ingestion/src/metadata/sampler/processor.py @@ -116,7 +116,12 @@ def __init__( ) self._sample_data_config = None - settings = self.metadata.get_profiler_config_settings() + try: + settings = self.metadata.get_profiler_config_settings() + except Exception as exc: + logger.debug(f"Could not fetch global profiler config: {exc}") + settings = None + if settings: profiler_cfg = cast(ProfilerConfiguration, settings.config_value) self._sample_data_config = profiler_cfg.sampleDataConfig diff --git a/ingestion/tests/unit/topology/database/test_hive.py b/ingestion/tests/unit/topology/database/test_hive.py index 5379adf7d964..8a69c5735322 100644 --- a/ingestion/tests/unit/topology/database/test_hive.py +++ b/ingestion/tests/unit/topology/database/test_hive.py @@ -37,9 +37,15 @@ HiveConnection, HiveScheme, ) +from metadata.generated.schema.entity.services.connections.database.mssqlConnection import ( + MssqlConnection, +) from metadata.generated.schema.entity.services.connections.database.mysqlConnection import ( MysqlConnection, ) +from metadata.generated.schema.entity.services.connections.database.oracleConnection import ( + OracleConnection, +) from metadata.generated.schema.entity.services.connections.database.postgresConnection import ( PostgresConnection, ) @@ -1263,3 +1269,61 @@ def test_get_validated_metastore_connection_with_invalid_dict(self): self.hive.service_connection.metastoreConnection = invalid_dict result = self.hive._get_validated_metastore_connection() self.assertIsNone(result) + + def test_get_validated_metastore_connection_with_mssql_object(self): + """ + Test _get_validated_metastore_connection returns MssqlConnection when already validated + """ + mssql_conn = MssqlConnection( + username="mssql_user", + hostPort="localhost:1433", + database="hive_metastore", + ) + self.hive.service_connection.metastoreConnection = mssql_conn + result = self.hive._get_validated_metastore_connection() + self.assertIsInstance(result, MssqlConnection) + self.assertEqual(result, mssql_conn) + + def test_get_validated_metastore_connection_with_oracle_object(self): + """ + Test _get_validated_metastore_connection returns OracleConnection when already validated + """ + oracle_conn = OracleConnection( + username="oracle_user", + hostPort="localhost:1521", + oracleConnectionType={"oracleServiceName": "XE"}, + ) + self.hive.service_connection.metastoreConnection = oracle_conn + result = self.hive._get_validated_metastore_connection() + self.assertIsInstance(result, OracleConnection) + self.assertEqual(result, oracle_conn) + + def test_get_validated_metastore_connection_with_mssql_dict(self): + """ + Test _get_validated_metastore_connection parses dict as MssqlConnection + """ + mssql_dict = { + "type": "Mssql", + "username": "mssql_user", + "hostPort": "localhost:1433", + "database": "hive_metastore", + } + self.hive.service_connection.metastoreConnection = mssql_dict + result = self.hive._get_validated_metastore_connection() + self.assertIsInstance(result, MssqlConnection) + self.assertEqual(result.username, "mssql_user") + + def test_get_validated_metastore_connection_with_oracle_dict(self): + """ + Test _get_validated_metastore_connection parses dict as OracleConnection + """ + oracle_dict = { + "type": "Oracle", + "username": "oracle_user", + "hostPort": "localhost:1521", + "oracleConnectionType": {"oracleServiceName": "XE"}, + } + self.hive.service_connection.metastoreConnection = oracle_dict + result = self.hive._get_validated_metastore_connection() + self.assertIsInstance(result, OracleConnection) + self.assertEqual(result.username, "oracle_user") diff --git a/ingestion/tests/unit/topology/database/test_hive_metastore_mssql_dialect.py b/ingestion/tests/unit/topology/database/test_hive_metastore_mssql_dialect.py new file mode 100644 index 000000000000..78f7e8c7978a --- /dev/null +++ b/ingestion/tests/unit/topology/database/test_hive_metastore_mssql_dialect.py @@ -0,0 +1,205 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test Hive MSSQL Metastore Dialect +""" + +from unittest.mock import MagicMock, Mock + +from metadata.ingestion.source.database.hive.metastore_dialects.mssql.dialect import ( + HiveMssqlMetaStoreDialect, +) + + +class TestHiveMssqlMetastoreDialectGetTableColumns: + """ + MSSQL supports CTEs (unlike MySQL 5.7), so the dialect uses WITH clauses + and unquoted identifiers (MSSQL does not require quoting for standard names). + """ + + def setup_method(self): + self.dialect = HiveMssqlMetaStoreDialect() + + def test_get_table_columns_uses_cte(self): + """MSSQL dialect uses WITH … AS (CTE) — unlike MySQL which uses UNION ALL only.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [ + ("col1", "nvarchar", "First column"), + ("col2", "int", "Second column"), + ("part_col", "nvarchar", "Partition column"), + ] + mock_connection.execute.return_value = mock_result + + result = self.dialect._get_table_columns( + mock_connection, "test_table", "test_schema" + ) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "WITH" in executed_query.upper() + assert "UNION ALL" in executed_query.upper() + assert len(result) == 3 + + def test_get_table_columns_without_schema(self): + """Without schema, the DBS join should be absent.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [("col1", "nvarchar", None)] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", None) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert ":table_name" in executed_query + assert "DBS" not in executed_query + assert mock_connection.execute.call_args[0][1] == {"table_name": "test_table"} + + def test_get_table_columns_with_schema_joins_dbs(self): + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "my_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert ":schema" in executed_query + assert "DBS" in executed_query + assert mock_connection.execute.call_args[0][1] == { + "schema": "my_schema", + "table_name": "test_table", + } + + def test_get_table_columns_query_structure(self): + """Query must reference both regular and partition column tables.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "COLUMNS_V2" in executed_query + assert "PARTITION_KEYS" in executed_query + assert "PKEY_NAME" in executed_query + assert "PKEY_TYPE" in executed_query + assert "PKEY_COMMENT" in executed_query + assert executed_query.upper().count("SELECT") == 4 + + def test_get_table_columns_uses_unquoted_identifiers(self): + """MSSQL dialect uses unquoted identifiers, not double-quoted like Postgres.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + # Should NOT use Postgres-style double-quoted identifiers + assert '"COLUMN_NAME"' not in executed_query + assert '"TBLS"' not in executed_query + # Should use plain unquoted identifiers + assert "COLUMN_NAME" in executed_query + assert "TBLS" in executed_query + + +class TestHiveMssqlMetastoreDialectGetTableNames: + """ + Null-safe TBL_TYPE filtering: NULL != 'VIRTUAL_VIEW' evaluates to NULL in SQL, + so rows with a NULL TBL_TYPE would be excluded without the IS NULL guard. + """ + + def setup_method(self): + self.dialect = HiveMssqlMetaStoreDialect() + + def test_get_table_names_query_excludes_virtual_views(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("table1",), ("table2",)] + + result = self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "VIRTUAL_VIEW" in executed_query + assert "!=" in executed_query + assert result == ["table1", "table2"] + assert mock_connection.execute.call_args[0][1] == {"schema": "test_schema"} + + def test_get_table_names_query_includes_null_tbl_type(self): + """IS NULL guard ensures tables without a TBL_TYPE are included.""" + mock_connection = Mock() + mock_connection.execute.return_value = [] + + self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "IS NULL" in executed_query.upper() + assert "TBL_TYPE" in executed_query + + def test_get_table_names_query_uses_or_condition(self): + mock_connection = Mock() + mock_connection.execute.return_value = [] + + self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "OR" in executed_query.upper() + assert "TBL_TYPE" in executed_query + assert "VIRTUAL_VIEW" in executed_query + assert "IS NULL" in executed_query.upper() + + def test_get_table_names_with_schema_joins_dbs(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("my_table",)] + + result = self.dialect.get_table_names(mock_connection, schema="my_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "DBS" in executed_query + assert ":schema" in executed_query + assert result == ["my_table"] + assert mock_connection.execute.call_args[0][1] == {"schema": "my_schema"} + + def test_get_table_names_without_schema(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("table_a",)] + + result = self.dialect.get_table_names(mock_connection, schema=None) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "TBL_TYPE" in executed_query + assert "IS NULL" in executed_query.upper() + assert "DBS" not in executed_query + assert result == ["table_a"] + assert mock_connection.execute.call_args[0][1] == {} + + def test_get_table_names_returns_empty_when_no_tables(self): + mock_connection = Mock() + mock_connection.execute.return_value = [] + + result = self.dialect.get_table_names(mock_connection, schema="empty_schema") + + assert result == [] + + def test_get_schema_names_uses_unquoted_identifiers(self): + """MSSQL schema names query uses unquoted NAME column.""" + mock_connection = Mock() + mock_connection.execute.return_value = [("db1",), ("db2",)] + + result = self.dialect.get_schema_names(mock_connection) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "NAME" in executed_query + assert "DBS" in executed_query + # MSSQL uses plain unquoted names, not Postgres-style "NAME" + assert '"NAME"' not in executed_query + assert result == ["db1", "db2"] diff --git a/ingestion/tests/unit/topology/database/test_hive_metastore_mysql_dialect.py b/ingestion/tests/unit/topology/database/test_hive_metastore_mysql_dialect.py index ece137f0304b..2427e032ef08 100644 --- a/ingestion/tests/unit/topology/database/test_hive_metastore_mysql_dialect.py +++ b/ingestion/tests/unit/topology/database/test_hive_metastore_mysql_dialect.py @@ -70,11 +70,15 @@ def test_get_table_columns_query_no_cte(self): self.assertIn("PARTITION_KEYS", executed_query) self.assertIn("TBLS", executed_query) - # Verify the query includes the table name - self.assertIn("test_table", executed_query) + # Verify the query uses bound parameters (no string interpolation) + self.assertIn(":table_name", executed_query) + self.assertIn(":schema", executed_query) + self.assertEqual( + mock_connection.execute.call_args[0][1], + {"schema": "test_schema", "table_name": "test_table"}, + ) # Verify the query includes the schema join - self.assertIn("test_schema", executed_query) self.assertIn("DBS", executed_query) # Verify the result @@ -111,6 +115,11 @@ def test_get_table_columns_without_schema(self): # Verify UNION ALL is present self.assertIn("UNION ALL", executed_query.upper()) + self.assertIn(":table_name", executed_query) + self.assertEqual( + mock_connection.execute.call_args[0][1], + {"table_name": "test_table"}, + ) # Verify the result self.assertEqual(len(result), 1) @@ -205,6 +214,7 @@ def test_get_table_names_query_excludes_virtual_views(self): assert "VIRTUAL_VIEW" in executed_query assert "!=" in executed_query assert result == ["table1", "table2"] + assert mock_connection.execute.call_args[0][1] == {"schema": "test_schema"} def test_get_table_names_query_includes_null_tbl_type(self): mock_connection = Mock() @@ -236,8 +246,9 @@ def test_get_table_names_with_schema_joins_dbs(self): executed_query = str(mock_connection.execute.call_args[0][0]) assert "DBS" in executed_query - assert "my_schema" in executed_query + assert ":schema" in executed_query assert result == ["my_table"] + assert mock_connection.execute.call_args[0][1] == {"schema": "my_schema"} def test_get_table_names_without_schema(self): mock_connection = Mock() @@ -250,6 +261,7 @@ def test_get_table_names_without_schema(self): assert "IS NULL" in executed_query.upper() assert "DBS" not in executed_query assert result == ["table_a"] + assert mock_connection.execute.call_args[0][1] == {} def test_get_table_names_returns_empty_when_no_tables(self): mock_connection = Mock() diff --git a/ingestion/tests/unit/topology/database/test_hive_metastore_oracle_dialect.py b/ingestion/tests/unit/topology/database/test_hive_metastore_oracle_dialect.py new file mode 100644 index 000000000000..f3852c96d637 --- /dev/null +++ b/ingestion/tests/unit/topology/database/test_hive_metastore_oracle_dialect.py @@ -0,0 +1,196 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test Hive Oracle Metastore Dialect +""" + +from unittest.mock import MagicMock, Mock + +from metadata.ingestion.source.database.hive.metastore_dialects.oracle.dialect import ( + HiveOracleMetaStoreDialect, +) + + +class TestHiveOracleMetastoreDialectGetTableNames: + """ + Oracle dialect uses double-quoted identifiers for case-insensitive column names, + matching the Postgres dialect behaviour. NULL-safe TBL_TYPE filtering applies. + """ + + def setup_method(self): + self.dialect = HiveOracleMetaStoreDialect() + + def test_get_table_names_query_excludes_virtual_views(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("table1",), ("table2",)] + + result = self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "VIRTUAL_VIEW" in executed_query + assert "!=" in executed_query + assert result == ["table1", "table2"] + assert mock_connection.execute.call_args[0][1] == {"schema": "test_schema"} + + def test_get_table_names_query_includes_null_tbl_type(self): + mock_connection = Mock() + mock_connection.execute.return_value = [] + + self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "IS NULL" in executed_query.upper() + assert "TBL_TYPE" in executed_query + + def test_get_table_names_query_uses_or_condition(self): + mock_connection = Mock() + mock_connection.execute.return_value = [] + + self.dialect.get_table_names(mock_connection, schema="test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "OR" in executed_query.upper() + assert "TBL_TYPE" in executed_query + assert "VIRTUAL_VIEW" in executed_query + assert "IS NULL" in executed_query.upper() + + def test_get_table_names_with_schema_joins_dbs(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("my_table",)] + + result = self.dialect.get_table_names(mock_connection, schema="my_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "DBS" in executed_query + assert ":schema" in executed_query + assert result == ["my_table"] + assert mock_connection.execute.call_args[0][1] == {"schema": "my_schema"} + + def test_get_table_names_without_schema(self): + mock_connection = Mock() + mock_connection.execute.return_value = [("table_a",)] + + result = self.dialect.get_table_names(mock_connection, schema=None) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "TBL_TYPE" in executed_query + assert "IS NULL" in executed_query.upper() + assert "DBS" not in executed_query + assert result == ["table_a"] + assert mock_connection.execute.call_args[0][1] == {} + + def test_get_table_names_returns_empty_when_no_tables(self): + mock_connection = Mock() + mock_connection.execute.return_value = [] + + result = self.dialect.get_table_names(mock_connection, schema="empty_schema") + + assert result == [] + + def test_get_schema_names_uses_quoted_identifiers(self): + """Oracle dialect uses double-quoted NAME to preserve case.""" + mock_connection = Mock() + mock_connection.execute.return_value = [("db1",), ("db2",)] + + result = self.dialect.get_schema_names(mock_connection) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert '"NAME"' in executed_query + assert '"DBS"' in executed_query + assert result == ["db1", "db2"] + + +class TestHiveOracleMetastoreDialectGetTableColumns: + """ + Oracle dialect uses CTE (WITH clause) and double-quoted identifiers, + matching the Postgres dialect pattern. + """ + + def setup_method(self): + self.dialect = HiveOracleMetaStoreDialect() + + def test_get_table_columns_uses_cte(self): + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [ + ("col1", "VARCHAR2", "First column"), + ("col2", "NUMBER", "Second column"), + ] + mock_connection.execute.return_value = mock_result + + result = self.dialect._get_table_columns( + mock_connection, "test_table", "test_schema" + ) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert "WITH" in executed_query.upper() + assert "UNION ALL" in executed_query.upper() + assert len(result) == 2 + + def test_get_table_columns_with_schema(self): + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [("col1", "VARCHAR2", None)] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert ":schema" in executed_query + assert '"DBS"' in executed_query + assert mock_connection.execute.call_args[0][1] == { + "schema": "test_schema", + "table_name": "test_table", + } + + def test_get_table_columns_without_schema(self): + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [("col1", "VARCHAR2", None)] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", None) + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert ":table_name" in executed_query + assert '"COLUMNS_V2"' in executed_query + assert '"PARTITION_KEYS"' in executed_query + assert mock_connection.execute.call_args[0][1] == {"table_name": "test_table"} + + def test_get_table_columns_uses_quoted_identifiers(self): + """Oracle dialect uses double-quoted identifiers for case-sensitivity.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert '"COLUMN_NAME"' in executed_query + assert '"TYPE_NAME"' in executed_query + assert '"TBLS"' in executed_query + + def test_get_table_columns_contains_both_selects(self): + """Query must select from both COLUMNS_V2 and PARTITION_KEYS.""" + mock_connection = Mock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + mock_connection.execute.return_value = mock_result + + self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") + + executed_query = str(mock_connection.execute.call_args[0][0]) + assert '"COLUMNS_V2"' in executed_query + assert '"PARTITION_KEYS"' in executed_query + assert '"PKEY_NAME"' in executed_query + assert '"PKEY_TYPE"' in executed_query + assert '"PKEY_COMMENT"' in executed_query diff --git a/ingestion/tests/unit/topology/database/test_hive_metastore_postgres_dialect.py b/ingestion/tests/unit/topology/database/test_hive_metastore_postgres_dialect.py index 6da357e06b05..48f75752ac0f 100644 --- a/ingestion/tests/unit/topology/database/test_hive_metastore_postgres_dialect.py +++ b/ingestion/tests/unit/topology/database/test_hive_metastore_postgres_dialect.py @@ -40,6 +40,7 @@ def test_get_table_names_query_excludes_virtual_views(self): assert "VIRTUAL_VIEW" in executed_query assert "!=" in executed_query assert result == ["table1", "table2"] + assert mock_connection.execute.call_args[0][1] == {"schema": "test_schema"} def test_get_table_names_query_includes_null_tbl_type(self): mock_connection = Mock() @@ -71,8 +72,9 @@ def test_get_table_names_with_schema_joins_dbs(self): executed_query = str(mock_connection.execute.call_args[0][0]) assert "DBS" in executed_query - assert "my_schema" in executed_query + assert ":schema" in executed_query assert result == ["my_table"] + assert mock_connection.execute.call_args[0][1] == {"schema": "my_schema"} def test_get_table_names_without_schema(self): mock_connection = Mock() @@ -85,6 +87,7 @@ def test_get_table_names_without_schema(self): assert "IS NULL" in executed_query.upper() assert "DBS" not in executed_query assert result == ["table_a"] + assert mock_connection.execute.call_args[0][1] == {} def test_get_table_names_returns_empty_when_no_tables(self): mock_connection = Mock() @@ -131,8 +134,12 @@ def test_get_table_columns_with_schema(self): self.dialect._get_table_columns(mock_connection, "test_table", "test_schema") executed_query = str(mock_connection.execute.call_args[0][0]) - assert "test_schema" in executed_query + assert ":schema" in executed_query assert '"DBS"' in executed_query + assert mock_connection.execute.call_args[0][1] == { + "schema": "test_schema", + "table_name": "test_table", + } def test_get_table_columns_without_schema(self): mock_connection = Mock() @@ -143,9 +150,10 @@ def test_get_table_columns_without_schema(self): self.dialect._get_table_columns(mock_connection, "test_table", None) executed_query = str(mock_connection.execute.call_args[0][0]) - assert "test_table" in executed_query + assert ":table_name" in executed_query assert '"COLUMNS_V2"' in executed_query assert '"PARTITION_KEYS"' in executed_query + assert mock_connection.execute.call_args[0][1] == {"table_name": "test_table"} def test_get_table_columns_uses_quoted_identifiers(self): """Postgres dialect uses double-quoted identifiers for case-sensitivity.""" diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/hiveConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/hiveConnection.json index 205905f0cdd1..fd37b53f1225 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/hiveConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/hiveConnection.json @@ -96,6 +96,12 @@ { "$ref": "./mysqlConnection.json" }, + { + "$ref": "./mssqlConnection.json" + }, + { + "$ref": "./oracleConnection.json" + }, { "title": "None", "type": "object", diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/automations/createWorkflow.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/automations/createWorkflow.ts index 2d97b70617eb..c2936b061ada 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/automations/createWorkflow.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/automations/createWorkflow.ts @@ -1265,7 +1265,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4128,9 +4128,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4335,6 +4335,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4362,11 +4368,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4387,6 +4399,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4416,6 +4431,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4433,13 +4454,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4450,7 +4547,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4520,7 +4619,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts index ec992bfdd87d..e5cdff9e22f0 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/createDatabaseService.ts @@ -1846,6 +1846,10 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -1873,11 +1877,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -1898,6 +1908,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: Config; sslMode?: SSLMode; @@ -1927,6 +1940,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -1944,6 +1963,55 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: OracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; } /** @@ -1976,6 +2044,29 @@ export interface AuthTypeClass { gcpConfig?: GCPCredentials; } +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface OracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; +} + /** * Storage config to store sample data */ @@ -2064,7 +2155,11 @@ export interface AwsCredentials { * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -2122,33 +2217,12 @@ export enum SSLMode { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } -/** - * Connect with oracle by either passing service name or database schema name. - */ -export interface OracleConnectionType { - /** - * databaseSchema of the data source. This is optional parameter, if you would like to - * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata - * Ingestion attempts to scan all the databaseSchema. - */ - databaseSchema?: string; - /** - * The Oracle Service name is the TNS alias that you give when you remotely connect to your - * database. - */ - oracleServiceName?: string; - /** - * Pass the full constructed TNS string, e.g., - * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). - */ - oracleTNSConnection?: string; - [property: string]: any; -} - /** * SQLAlchemy driver scheme options. * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts index 31a124dfe0c8..6940ae7ffef5 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts @@ -4336,7 +4336,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -6609,9 +6609,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -6827,6 +6827,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -6854,11 +6860,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -6879,6 +6891,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: DbtSSLConfigClass; sslMode?: SSLMode; @@ -6908,6 +6923,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -6925,13 +6946,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -6942,7 +7039,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -7012,7 +7111,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/aiPlatformConfiguration.ts b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/aiPlatformConfiguration.ts index ef75b021c9f5..f1f5cb02eca5 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/aiPlatformConfiguration.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/aiPlatformConfiguration.ts @@ -89,11 +89,7 @@ export interface GrpcConfiguration { */ port: number; /** - * Deadline (minutes) Collate enforces on an AI Platform streaming response. Carried on the - * gRPC call, so the AI Platform reads it from context and wraps up gracefully. The chat - * lock sweeper uses streamDeadlineMinutes + 2 as its default stale-lock ceiling (override - * via COLLATE_CHAT_LOCK_MAX_DURATION_MINUTES). Capped at 60 minutes; for longer tasks - * prefer async job + polling over a single long-lived stream. + * Deadline (minutes) enforced on a streaming response from the gRPC server. */ streamDeadlineMinutes?: number; [property: string]: any; diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/testServiceConnection.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/testServiceConnection.ts index ad4f68112d44..ef700e5be841 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/testServiceConnection.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/testServiceConnection.ts @@ -1147,7 +1147,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4010,9 +4010,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4217,6 +4217,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4244,11 +4250,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4269,6 +4281,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4298,6 +4313,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4315,13 +4336,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4332,7 +4429,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4402,7 +4501,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/workflow.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/workflow.ts index 6577953517fb..d89f6482307b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/workflow.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/automations/workflow.ts @@ -1809,7 +1809,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4509,9 +4509,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4716,6 +4716,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4743,11 +4749,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4768,6 +4780,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4797,6 +4812,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4814,13 +4835,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4831,7 +4928,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4901,7 +5000,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/hiveConnection.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/hiveConnection.ts index 00e8ab7abf6c..49212ed52e70 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/hiveConnection.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/hiveConnection.ts @@ -135,6 +135,10 @@ export interface FilterPattern { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -162,11 +166,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -187,6 +197,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: Scheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: Config; sslMode?: SSLMode; @@ -216,6 +229,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -233,6 +252,55 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: OracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; } /** @@ -466,6 +534,29 @@ export interface GCPImpersonateServiceAccountValues { [property: string]: any; } +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface OracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; +} + /** * Storage config to store sample data */ @@ -554,7 +645,11 @@ export interface AwsCredentials { * SQLAlchemy driver scheme options. */ export enum Scheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -564,6 +659,9 @@ export enum Scheme { * * SSL Configuration details. * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. + * * OpenMetadata Client configured to validate SSL certificates. */ export interface Config { @@ -599,7 +697,9 @@ export enum SSLMode { * Service type. */ export enum Type { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/serviceConnection.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/serviceConnection.ts index e25a9f87ac34..c3441e74711b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/serviceConnection.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/serviceConnection.ts @@ -1412,7 +1412,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4039,9 +4039,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4257,6 +4257,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4284,11 +4290,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4309,6 +4321,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4338,6 +4353,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4355,13 +4376,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4372,7 +4469,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4442,7 +4541,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/databaseService.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/databaseService.ts index 8d4000f9bc6a..4d8c3be3bad4 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/databaseService.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/databaseService.ts @@ -1977,6 +1977,10 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -2004,11 +2008,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -2029,6 +2039,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: Config; sslMode?: SSLMode; @@ -2058,6 +2071,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -2075,6 +2094,55 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: OracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; } /** @@ -2107,6 +2175,29 @@ export interface AuthTypeClass { gcpConfig?: GCPCredentials; } +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface OracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; +} + /** * Storage config to store sample data */ @@ -2195,7 +2286,11 @@ export interface AwsCredentials { * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -2253,33 +2348,12 @@ export enum SSLMode { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } -/** - * Connect with oracle by either passing service name or database schema name. - */ -export interface OracleConnectionType { - /** - * databaseSchema of the data source. This is optional parameter, if you would like to - * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata - * Ingestion attempts to scan all the databaseSchema. - */ - databaseSchema?: string; - /** - * The Oracle Service name is the TNS alias that you give when you remotely connect to your - * database. - */ - oracleServiceName?: string; - /** - * Pass the full constructed TNS string, e.g., - * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). - */ - oracleTNSConnection?: string; - [property: string]: any; -} - /** * SQLAlchemy driver scheme options. * diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts index b90f8938133f..9e02ef0d621b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts @@ -4868,7 +4868,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -7122,9 +7122,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -7340,6 +7340,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -7367,11 +7373,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -7392,6 +7404,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: DbtSSLConfigClass; sslMode?: SSLMode; @@ -7421,6 +7436,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -7438,13 +7459,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -7455,7 +7552,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -7525,7 +7624,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/testSuitePipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/testSuitePipeline.ts index eb9bd3269b1b..9a7fb0490eb1 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/testSuitePipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/testSuitePipeline.ts @@ -1456,7 +1456,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4083,9 +4083,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4301,6 +4301,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4328,11 +4334,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4353,6 +4365,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4382,6 +4397,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4399,13 +4420,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4416,7 +4513,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4486,7 +4585,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts index 2192bb25beb2..4eabe3175a75 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts @@ -1501,7 +1501,7 @@ export interface ConfigObject { /** * Connect with oracle by either passing service name or database schema name. */ - oracleConnectionType?: OracleConnectionType; + oracleConnectionType?: ConfigOracleConnectionType; /** * Controls how Oracle identifier names (tables, columns, schemas) are stored in * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. @@ -4156,9 +4156,9 @@ export interface PurpleGCPCredentials { } /** - * Underlying database connection - * * Mssql Database Connection Config + * + * Underlying database connection */ export interface DatabaseConnectionClass { connectionArguments?: { [key: string]: any }; @@ -4374,6 +4374,12 @@ export enum Logmech { * Postgres Database Connection Config * * Mysql Database Connection Config + * + * Mssql Database Connection Config + * + * Underlying database connection + * + * Oracle Database Connection Config */ export interface HiveMetastoreConnectionDetails { /** @@ -4401,11 +4407,17 @@ export interface HiveMetastoreConnectionDetails { * * Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in * the format 'project_id:region:instance_name'. + * + * Host and port of the MSSQL service. + * + * Host and port of the Oracle service. */ hostPort?: string; /** * Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of * this. + * + * Ingest data from all databases in Mssql. You can use databaseFilterPattern on top of this. */ ingestAllDatabases?: boolean; /** @@ -4426,6 +4438,9 @@ export interface HiveMetastoreConnectionDetails { scheme?: HiveMetastoreConnectionDetailsScheme; /** * SSL Configuration details. + * + * SSL/TLS certificate configuration for client authentication. Provide CA certificate, + * client certificate, and private key for mutual TLS authentication. */ sslConfig?: ConsumerConfigSSLClass; sslMode?: SSLMode; @@ -4455,6 +4470,12 @@ export interface HiveMetastoreConnectionDetails { * * Username to connect to MySQL. This user should have privileges to read all the metadata * in Mysql. + * + * Username to connect to MSSQL. This user should have privileges to read all the metadata + * in MsSQL. + * + * Username to connect to Oracle. This user should have privileges to read all the metadata + * in Oracle. */ username?: string; /** @@ -4472,13 +4493,89 @@ export interface HiveMetastoreConnectionDetails { * Use slow logs to extract lineage. */ useSlowLogs?: boolean; + /** + * ODBC driver version in case of pyodbc connection. + */ + driver?: string; + /** + * Enable SSL/TLS encryption for the MSSQL connection. When enabled, all data transmitted + * between the client and server will be encrypted. + */ + encrypt?: boolean; + /** + * Password to connect to MSSQL. + * + * Password to connect to Oracle. + */ + password?: string; + /** + * Trust the server certificate without validation. Set to false in production to validate + * server certificates against the certificate authority. + */ + trustServerCertificate?: boolean; + /** + * This directory will be used to set the LD_LIBRARY_PATH env variable. It is required if + * you need to enable thick connection mode. By default, we bring instant client 19 and + * point to /instantclient. + */ + instantClientDirectory?: string; + /** + * Connect with oracle by either passing service name or database schema name. + */ + oracleConnectionType?: HiveMetastoreConnectionDetailsOracleConnectionType; + /** + * Controls how Oracle identifier names (tables, columns, schemas) are stored in + * OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. + * EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but + * different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the + * same name. When enabled, names are stored exactly as Oracle persists them, which solves + * same-name collisions between quoted and unquoted identifiers. WARNING: enabling this + * after data has already been ingested with the default setting will change the stored + * names of all existing tables, columns, schemas, and constraints — breaking attached tags, + * descriptions, lineage, data quality tests, and any other metadata associated with those + * entities. If you must switch, soft-delete all previously ingested entities before + * re-ingesting. + */ + preserveIdentifierCase?: boolean; + /** + * Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA + * privileges. + */ + useDBATable?: boolean; +} + +/** + * Connect with oracle by either passing service name or database schema name. + */ +export interface HiveMetastoreConnectionDetailsOracleConnectionType { + /** + * databaseSchema of the data source. This is optional parameter, if you would like to + * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata + * Ingestion attempts to scan all the databaseSchema. + */ + databaseSchema?: string; + /** + * The Oracle Service name is the TNS alias that you give when you remotely connect to your + * database. + */ + oracleServiceName?: string; + /** + * Pass the full constructed TNS string, e.g., + * (DESCRIPTION=(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=myhost)(PORT=1530)))(CONNECT_DATA=(SID=MYSERVICENAME))). + */ + oracleTNSConnection?: string; + [property: string]: any; } /** * SQLAlchemy driver scheme options. */ export enum HiveMetastoreConnectionDetailsScheme { + MssqlPymssql = "mssql+pymssql", + MssqlPyodbc = "mssql+pyodbc", + MssqlPytds = "mssql+pytds", MysqlPymysql = "mysql+pymysql", + OracleCxOracle = "oracle+cx_oracle", PgspiderPsycopg2 = "pgspider+psycopg2", PostgresqlPsycopg2 = "postgresql+psycopg2", } @@ -4489,7 +4586,9 @@ export enum HiveMetastoreConnectionDetailsScheme { * Service type. */ export enum HiveMetastoreConnectionDetailsType { + Mssql = "Mssql", Mysql = "Mysql", + Oracle = "Oracle", Postgres = "Postgres", } @@ -4559,7 +4658,7 @@ export interface OpenAPISchemaConnection { /** * Connect with oracle by either passing service name or database schema name. */ -export interface OracleConnectionType { +export interface ConfigOracleConnectionType { /** * databaseSchema of the data source. This is optional parameter, if you would like to * restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata