open-metadata
diff --git a/‎bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql‎
Lines changed: 36 additions & 0 deletions b/‎bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql‎
Lines changed: 31 additions & 0 deletions b/‎bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎ingestion/setup.py‎
Lines changed: 4 additions & 0 deletions b/‎ingestion/setup.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎ingestion/src/metadata/data_quality/runner/base_test_suite_source.py‎
Lines changed: 2 additions & 12 deletions b/‎ingestion/src/metadata/data_quality/runner/base_test_suite_source.py‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py‎
Lines changed: 1 addition & 1 deletion b/‎ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py‎
Lines changed: 16 additions & 9 deletions b/‎ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableDiff.py‎
Lines changed: 16 additions & 9 deletions
diff --git a/‎ingestion/src/metadata/great_expectations/action.py‎
Lines changed: 6 additions & 6 deletions b/‎ingestion/src/metadata/great_expectations/action.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎ingestion/src/metadata/great_expectations/action1xx.py‎
Lines changed: 6 additions & 6 deletions b/‎ingestion/src/metadata/great_expectations/action1xx.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py‎
Lines changed: 1 addition & 1 deletion b/‎ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ingestion/src/metadata/ingestion/source/database/presto/service_spec.py‎
Lines changed: 3 additions & 1 deletion b/‎ingestion/src/metadata/ingestion/source/database/presto/service_spec.py‎
Lines changed: 3 additions & 1 deletion
@@ -216,6 +216,42 @@ WHERE pipelineType = 'profiler'
     OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleType')
     OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.samplingMethodType'));
 
+-- ingestion_pipeline_entity (testSuite pipelines): build profileSampleConfig (skip if already migrated)
+UPDATE ingestion_pipeline_entity
+SET json = JSON_SET(
+    json,
+    '$.sourceConfig.config.profileSampleConfig',
+    JSON_OBJECT(
+        'sampleConfigType', 'STATIC',
+        'config', JSON_OBJECT(
+            'profileSample', JSON_EXTRACT(json, '$.sourceConfig.config.profileSample'),
+            'profileSampleType', COALESCE(
+                JSON_EXTRACT(json, '$.sourceConfig.config.profileSampleType'),
+                CAST('"PERCENTAGE"' AS JSON)
+            ),
+            'samplingMethodType', JSON_EXTRACT(json, '$.sourceConfig.config.samplingMethodType')
+        )
+    )
+)
+WHERE pipelineType = 'testSuite'
+  AND JSON_EXTRACT(json, '$.sourceConfig.config.profileSample') IS NOT NULL
+  AND JSON_TYPE(JSON_EXTRACT(json, '$.sourceConfig.config.profileSample')) != 'NULL'
+  AND NOT JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleConfig');
+
+-- ingestion_pipeline_entity (testSuite pipelines): remove old flat fields
+UPDATE ingestion_pipeline_entity
+SET json = JSON_REMOVE(
+    JSON_REMOVE(
+        JSON_REMOVE(json, '$.sourceConfig.config.samplingMethodType'),
+        '$.sourceConfig.config.profileSampleType'
+    ),
+    '$.sourceConfig.config.profileSample'
+)
+WHERE pipelineType = 'testSuite'
+  AND (JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSample')
+    OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.profileSampleType')
+    OR JSON_CONTAINS_PATH(json, 'one', '$.sourceConfig.config.samplingMethodType'));
+
 -- RDF distributed indexing state tables
 CREATE TABLE IF NOT EXISTS rdf_index_job (
     id VARCHAR(36) NOT NULL,
 
@@ -235,6 +235,37 @@ WHERE json #>> '{pipelineType}' = 'profiler'
     OR json::jsonb #>> '{sourceConfig,config,profileSampleType}' IS NOT NULL
     OR json::jsonb #>> '{sourceConfig,config,samplingMethodType}' IS NOT NULL);
 
+-- ingestion_pipeline_entity (testSuite pipelines): build profileSampleConfig (skip if already migrated)
+UPDATE ingestion_pipeline_entity
+SET json = jsonb_set(
+    json::jsonb,
+    '{sourceConfig,config,profileSampleConfig}',
+    jsonb_build_object(
+        'sampleConfigType', 'STATIC',
+        'config', jsonb_build_object(
+            'profileSample', json::jsonb #> '{sourceConfig,config,profileSample}',
+            'profileSampleType', COALESCE(
+                json::jsonb #> '{sourceConfig,config,profileSampleType}',
+                '"PERCENTAGE"'::jsonb
+            ),
+            'samplingMethodType', json::jsonb #> '{sourceConfig,config,samplingMethodType}'
+        )
+    )
+)::json
+WHERE json #>> '{pipelineType}' = 'testSuite'
+  AND json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL
+  AND json::jsonb #> '{sourceConfig,config,profileSampleConfig}' IS NULL;
+
+-- ingestion_pipeline_entity (testSuite pipelines): remove old flat fields
+UPDATE ingestion_pipeline_entity
+SET json = (json::jsonb #- '{sourceConfig,config,profileSample}'
+                        #- '{sourceConfig,config,profileSampleType}'
+                        #- '{sourceConfig,config,samplingMethodType}')::json
+WHERE json #>> '{pipelineType}' = 'testSuite'
+  AND (json::jsonb #>> '{sourceConfig,config,profileSample}' IS NOT NULL
+    OR json::jsonb #>> '{sourceConfig,config,profileSampleType}' IS NOT NULL
+    OR json::jsonb #>> '{sourceConfig,config,samplingMethodType}' IS NOT NULL);
+
 -- RDF distributed indexing state tables
 CREATE TABLE IF NOT EXISTS rdf_index_job (
     id VARCHAR(36) NOT NULL,
 
@@ -422,6 +422,8 @@
     # TODO: Remove once no unit test requires testcontainers
     "testcontainers",
     VERSIONS["factory-boy"],
+    *plugins["exasol"],
+    *plugins["teradata"],
 }
 
 test = {
@@ -490,6 +492,8 @@
     VERSIONS["kafka-connect"],
     VERSIONS["factory-boy"],
     "locust~=2.32.0",
+    *plugins["exasol"],
+    *plugins["teradata"],
 }
 
 docs = {
 
@@ -34,10 +34,7 @@
 from metadata.generated.schema.type.entityReference import EntityReference
 from metadata.ingestion.ometa.ometa_api import OpenMetadata
 from metadata.sampler.models import (
-    ProfileSampleConfig,
-    ProfileSampleConfigType,
     SampleConfig,
-    StaticSamplingConfig,
 )
 from metadata.sampler.sampler_interface import SamplerInterface  # noqa: TC001
 from metadata.utils.bigquery_utils import copy_service_config
@@ -126,15 +123,8 @@ def create_data_quality_interface(self) -> TestSuiteInterface:
             schema_entity=schema_entity,
             database_entity=database_entity,
             default_sample_config=SampleConfig(
-                profileSampleConfig=ProfileSampleConfig(
-                    sampleConfigType=ProfileSampleConfigType.STATIC,
-                    config=StaticSamplingConfig(
-                        profileSample=self.source_config.profileSample,
-                        profileSampleType=self.source_config.profileSampleType,
-                        samplingMethodType=self.source_config.samplingMethodType,
-                    ),
-                )
-                if self.source_config.profileSample
+                profileSampleConfig=self.source_config.profileSampleConfig
+                if self.source_config.profileSampleConfig
                 else None,
             ),
         )
 
@@ -75,7 +75,7 @@ def _run_results(self, metric: Metrics, column: Column) -> Optional[int]:  # noq
         """
         count = Metrics.valuesCount.value(column).fn()
         grouped_cte = (
-            select(count.label(column.name)).select_from(self.runner.dataset).group_by(column).cte("grouped_cte")
+            select(count.label(column.name)).select_from(self.runner.dataset).group_by(column).cte("grouped_cte")  # type: ignore
         )
         unique_count = Metrics.uniqueCount.value(column).query(
             sample=self.runner.dataset,
 
@@ -56,6 +56,7 @@
 from metadata.profiler.orm.functions.md5 import MD5
 from metadata.profiler.orm.functions.substr import Substr
 from metadata.profiler.orm.registry import Dialects, PythonDialects
+from metadata.sampler.config import resolve_static_sampling_config
 from metadata.utils.collections import CaseInsensitiveList
 from metadata.utils.credentials import normalize_pem_string
 from metadata.utils.logger import test_suite_logger
@@ -443,9 +444,12 @@ def sample_where_clause(self) -> Tuple[Optional[str], Optional[str]]:  # noqa: U
             return None, None
         profile_sample_config = config.profileSampleConfig if config else None
         sample_config = profile_sample_config.root if profile_sample_config else None
-        static = sample_config.config if sample_config else None
-        profile_sample = getattr(static, "profileSample", None) if static else None
-        profile_sample_type = getattr(static, "profileSampleType", None) if static else None
+        static = resolve_static_sampling_config(
+            sample_config=sample_config,
+            row_count=self.get_total_row_count(),
+        )
+        profile_sample = static.profileSample if static else None
+        profile_sample_type = static.profileSampleType if static else None
         if profile_sample is None or (profile_sample_type == ProfileSampleType.PERCENTAGE and profile_sample == 100):
             return None, None
         if DatabaseServiceType.Mssql in [
@@ -490,16 +494,19 @@ def calculate_nounce(self, max_nounce=2**32 - 1) -> int:
         config = self.runtime_params.table_profile_config
         profile_sample_config = config.profileSampleConfig if config else None
         sample_config = profile_sample_config.root if profile_sample_config else None
-        static = sample_config.config if sample_config else None
-        profile_sample = getattr(static, "profileSample", 100)
-        profile_sample_type = getattr(static, "profileSampleType", None)
+        row_count = self.get_total_row_count()
+        static = resolve_static_sampling_config(
+            sample_config=sample_config,
+            row_count=row_count,
+        )
+        profile_sample = static.profileSample if static else None
+        profile_sample_type = static.profileSampleType if static else None
         if profile_sample_type == ProfileSampleType.PERCENTAGE:
-            return int(max_nounce * profile_sample / 100)
+            return int(max_nounce * ((profile_sample or 100) / 100))
         if profile_sample_type == ProfileSampleType.ROWS:
-            row_count = self.get_total_row_count()
             if row_count is None:
                 raise ValueError("Row count is required for ROWS profile sample type")
-            return int(max_nounce * (profile_sample / row_count))
+            return int(max_nounce * ((profile_sample or row_count) / row_count))
         raise ValueError("Invalid profile sample type")
 
     def get_row_diff_test_case_result(
 
@@ -22,7 +22,7 @@
 from typing import Dict, List, Optional, Union, cast  # noqa: UP035
 
 from great_expectations.checkpoint.actions import ValidationAction
-from great_expectations.core import ExpectationConfiguration
+from great_expectations.core import ExpectationConfiguration  # type: ignore
 from great_expectations.core.batch import Batch
 from great_expectations.core.batch_spec import (
     RuntimeDataBatchSpec,
@@ -32,8 +32,8 @@
 from great_expectations.core.expectation_validation_result import (
     ExpectationSuiteValidationResult,
 )
-from great_expectations.data_asset.data_asset import DataAsset
-from great_expectations.data_context.data_context import DataContext
+from great_expectations.data_asset.data_asset import DataAsset  # type: ignore
+from great_expectations.data_context.data_context import DataContext  # type: ignore
 
 from metadata.generated.schema.type.basic import Timestamp
 
@@ -115,7 +115,7 @@ def __init__(
         table_name: Optional[str] = None,  # noqa: UP045
         expectation_suite_table_config_map: Optional[Dict[str, Dict[str, str]]] = None,  # noqa: UP006, UP045
     ):
-        super().__init__(data_context, name=name)
+        super().__init__(data_context, name=name)  # type: ignore
         self.database_service_name = database_service_name
         self.database_name = database_name
         self.table_name = table_name
@@ -153,7 +153,7 @@ def _run(  # pylint: disable=unused-argument
         expectation_suite_name = None
 
         if expectation_suite_identifier:
-            expectation_suite_name = expectation_suite_identifier.expectation_suite_name
+            expectation_suite_name = expectation_suite_identifier.expectation_suite_name  # type: ignore
             self.expectation_suite = self.data_context.get_expectation_suite(expectation_suite_name)
 
         check_point_spec = self._get_checkpoint_batch_spec(data_asset)
@@ -182,7 +182,7 @@ def _run(  # pylint: disable=unused-argument
 
         if table_entity:
             for result in validation_result_suite.results:
-                self._handle_test_case(result, table_entity)
+                self._handle_test_case(result, table_entity)  # type: ignore
 
     @staticmethod
     def _get_checkpoint_batch_spec(
 
@@ -22,15 +22,15 @@
 from typing import Dict, List, Literal, Optional, Union, cast  # noqa: UP035
 
 from great_expectations.checkpoint import (
-    ActionContext,
-    CheckpointResult,
-    ValidationAction,
+    ActionContext,  # type: ignore
+    CheckpointResult,  # type: ignore
+    ValidationAction,  # type: ignore
 )
 from great_expectations.core.batch import Batch
 from great_expectations.core.expectation_validation_result import (
     ExpectationSuiteValidationResultMeta,
 )
-from great_expectations.datasource.fluent import DataAsset
+from great_expectations.datasource.fluent import DataAsset  # type: ignore
 from great_expectations.validator.validator import Validator
 from sqlalchemy.engine.base import Connection, Engine
 from sqlalchemy.engine.url import URL
@@ -83,7 +83,7 @@ class OpenMetadataValidationAction1xx(ValidationAction):
             Format: {"suite_name": {"database_name": "db", "schema_name": "schema", "table_name": "table"}}
     """
 
-    type: Literal["open_metadata_validation_action"] = "open_metadata_validation_action"
+    type: Literal["open_metadata_validation_action"] = "open_metadata_validation_action"  # type: ignore
     name: str = "OpenMetadataValidationAction"
     config_file_path: Optional[str] = None  # noqa: UP045
     database_service_name: Optional[str] = None  # noqa: UP045
@@ -153,7 +153,7 @@ def run(
 
             if table_entity:
                 for result in v.results:
-                    self._handle_test_case(result, table_entity)
+                    self._handle_test_case(result, table_entity)  # type: ignore
 
     @staticmethod
     def _get_checkpoint_batch_spec(meta: Union[ExpectationSuiteValidationResultMeta, dict]):  # noqa: UP007
 
@@ -314,7 +314,7 @@ def __read_manifest(
                     # For remote repositories, clone the dependency as before
                     url_parsed = giturlparse.parse(remote_git_url)
                     _clone_repo(
-                        f"{url_parsed.owner}/{url_parsed.repo}",  # pylint: disable=E1101
+                        f"{url_parsed.owner}/{url_parsed.repo}",  # type: ignore
                         f"{repo.path}/{IMPORTED_PROJECTS_DIR}/{remote_name}",
                         credentials,
                     )
 
@@ -1,4 +1,6 @@
 from metadata.ingestion.source.database.presto.metadata import PrestoSource
 from metadata.utils.service_spec.default import DefaultDatabaseSpec
 
-ServiceSpec = DefaultDatabaseSpec(metadata_source_class=PrestoSource)
+ServiceSpec = DefaultDatabaseSpec(
+    metadata_source_class=PrestoSource,  # type: ignore
+)
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ def _run_results(self, metric: Metrics, column: Column) -> Optional[int]: # noq`
`75`	`75`	`"""`
`76`	`76`	`count = Metrics.valuesCount.value(column).fn()`
`77`	`77`	`grouped_cte = (`
`78`		`- select(count.label(column.name)).select_from(self.runner.dataset).group_by(column).cte("grouped_cte")`
	`78`	`+ select(count.label(column.name)).select_from(self.runner.dataset).group_by(column).cte("grouped_cte") # type: ignore`
`79`	`79`	`)`
`80`	`80`	`unique_count = Metrics.uniqueCount.value(column).query(`
`81`	`81`	`sample=self.runner.dataset,`
Original file line number	Diff line number	Diff line change
`@@ -314,7 +314,7 @@ def __read_manifest(`
`314`	`314`	`# For remote repositories, clone the dependency as before`
`315`	`315`	`url_parsed = giturlparse.parse(remote_git_url)`
`316`	`316`	`_clone_repo(`
`317`		`- f"{url_parsed.owner}/{url_parsed.repo}", # pylint: disable=E1101`
	`317`	`+ f"{url_parsed.owner}/{url_parsed.repo}", # type: ignore`
`318`	`318`	`f"{repo.path}/{IMPORTED_PROJECTS_DIR}/{remote_name}",`
`319`	`319`	`credentials,`
`320`	`320`	`)`