From 9e35d352a153787a08f9639786bc99b486446826 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Sun, 15 Mar 2026 20:04:50 +0100
Subject: [PATCH 01/24] feat: add variable substitution support for check
 definitions

---
 src/databricks/labs/dqx/engine.py             |  38 ++-
 src/databricks/labs/dqx/utils.py              | 129 ++++++++++
 tests/integration/test_apply_checks.py        | 159 ++++++++++++
 .../test_apply_checks_and_save_in_table.py    |  57 +++++
 tests/unit/test_utils.py                      | 237 ++++++++++++++++++
 5 files changed, 614 insertions(+), 6 deletions(-)

diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index 792fa5328..88c3d17ea 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -47,7 +47,7 @@
 from databricks.labs.dqx.telemetry import telemetry_logger, log_telemetry, log_dataframe_telemetry
 from databricks.sdk import WorkspaceClient
 from databricks.labs.dqx.errors import InvalidCheckError, InvalidConfigError, InvalidParameterError
-from databricks.labs.dqx.utils import list_tables, safe_strip_file_from_path
+from databricks.labs.dqx.utils import list_tables, safe_strip_file_from_path, apply_variables
 from databricks.labs.dqx.io import is_one_time_trigger
 
 logger = logging.getLogger(__name__)
@@ -220,6 +220,7 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame.
 
@@ -232,11 +233,14 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
             summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
+        checks = apply_variables(checks, variables)
         dq_rule_checks = deserialize_checks(checks, custom_check_functions)
 
         return self.apply_checks(df, dq_rule_checks, ref_dfs)
@@ -247,6 +251,7 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -260,6 +265,8 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -269,6 +276,7 @@ def apply_checks_by_metadata_and_split(
         Raises:
             InvalidCheckError: If any of the checks are invalid.
         """
+        checks = apply_variables(checks, variables)
         dq_rule_checks = deserialize_checks(checks, custom_check_functions)
 
         good_df, bad_df, *observations = self.apply_checks_and_split(df, dq_rule_checks, ref_dfs)
@@ -283,6 +291,7 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
+        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -294,10 +303,13 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation.
 
         Returns:
             ChecksValidationStatus indicating the validation result.
         """
+        checks = apply_variables(checks, variables)
         return ChecksValidator.validate_checks(checks, custom_check_functions, validate_custom_check_functions)
 
     def get_invalid(self, df: DataFrame) -> DataFrame:
@@ -614,6 +626,7 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame.
 
@@ -626,13 +639,15 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
             summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
-        return self._engine.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs)
+        return self._engine.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs, variables)
 
     @telemetry_logger("engine", "apply_checks_by_metadata_and_split")
     def apply_checks_by_metadata_and_split(
@@ -641,6 +656,7 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -654,6 +670,8 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -661,7 +679,7 @@ def apply_checks_by_metadata_and_split(
             quality summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
-        return self._engine.apply_checks_by_metadata_and_split(df, checks, custom_check_functions, ref_dfs)
+        return self._engine.apply_checks_by_metadata_and_split(df, checks, custom_check_functions, ref_dfs, variables)
 
     @telemetry_logger("engine", "apply_checks_and_save_in_table")
     def apply_checks_and_save_in_table(
@@ -758,6 +776,7 @@ def apply_checks_by_metadata_and_save_in_table(
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
         checks_location: str | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> None:
         """
         Apply metadata-defined data quality checks to input data and save results.
@@ -782,6 +801,8 @@ def apply_checks_by_metadata_and_save_in_table(
                 to callables/modules (e.g., globals()).
             ref_dfs: Optional reference DataFrames used by checks.
             checks_location: Optional location of the checks. Used for reporting in the summary metrics table only.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
         """
         logger.info(f"Applying checks to {input_config.location}")
 
@@ -792,7 +813,9 @@ def apply_checks_by_metadata_and_save_in_table(
         quarantine_streaming_query = None
 
         if quarantine_config:
-            check_result = self.apply_checks_by_metadata_and_split(df, checks, custom_check_functions, ref_dfs)
+            check_result = self.apply_checks_by_metadata_and_split(
+                df, checks, custom_check_functions, ref_dfs, variables
+            )
             if self._engine.observer:
                 good_df, bad_df, batch_observation = check_result
             else:
@@ -801,7 +824,7 @@ def apply_checks_by_metadata_and_save_in_table(
             quarantine_streaming_query = save_dataframe_as_table(bad_df, quarantine_config)
             target_streaming_query = quarantine_streaming_query
         else:
-            check_result = self.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs)
+            check_result = self.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs, variables)
             if self._engine.observer:
                 checked_df, batch_observation = check_result
             else:
@@ -958,6 +981,7 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
+        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -969,11 +993,13 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation.
 
         Returns:
             ChecksValidationStatus indicating the validation result.
         """
-        return DQEngineCore.validate_checks(checks, custom_check_functions, validate_custom_check_functions)
+        return DQEngineCore.validate_checks(checks, custom_check_functions, validate_custom_check_functions, variables)
 
     def get_invalid(self, df: DataFrame) -> DataFrame:
         """
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index 5ff8fa168..f8e13bb2e 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -5,6 +5,7 @@
 import re
 from decimal import Decimal
 from importlib.util import find_spec
+from collections.abc import Callable, Generator
 from typing import Any
 from fnmatch import fnmatch
 from pathlib import Path
@@ -30,6 +31,8 @@
 COLUMN_NORMALIZE_EXPRESSION = re.compile("[^a-zA-Z0-9]+")
 COLUMN_PATTERN = re.compile(r"Column<'(.*?)(?: AS (\w+))?'>$", re.DOTALL)
 INVALID_COLUMN_NAME_PATTERN = re.compile(r"[\s,;{}\(\)\n\t=]+")
+_UNRESOLVED_PLACEHOLDER_PATTERN = re.compile(r"\{\{.*?\}\}")
+_SCALAR_VARIABLE_TYPES = (str, int, float, bool, Decimal)
 
 
 def get_column_name_or_alias(
@@ -527,6 +530,132 @@ def missing_required_packages(packages: list[str]) -> bool:
     return not all(find_spec(spec) for spec in packages)
 
 
+def _literal_replacer(val: str) -> Callable[[re.Match], str]:
+    """Return a ``re.sub`` replacer that always returns *val* literally."""
+
+    def replacer(_: re.Match) -> str:
+        return val
+
+    return replacer
+
+
+def _replace_template(text: str, variables: dict[str, str]) -> str:
+    """Replace ``{{ key }}`` placeholders in *text* with values from *variables*.
+
+    Tolerates whitespace inside braces (e.g. ``{{ key }}``, ``{{key}}``).
+    Uses a lambda replacement to avoid backslash interpretation in values.
+
+    Args:
+        text: Input string potentially containing ``{{ key }}`` placeholders.
+        variables: Pre-stringified mapping of placeholder names to values.
+
+    Returns:
+        String with all matching placeholders replaced.
+    """
+    for key, val in variables.items():
+        pattern = r"\{\{\s*" + re.escape(key) + r"\s*\}\}"
+        text = re.sub(pattern, _literal_replacer(val), text)
+    return text
+
+
+def _substitute_variables(obj: Any, variables: dict[str, str]) -> Any:
+    """Recursively replace ``{{ key }}`` placeholders in all string values within *obj*.
+
+    Traverses dicts, lists, and strings. Non-string/non-collection values are
+    returned unchanged. Dict keys are not substituted.
+
+    Args:
+        obj: A string, dict, list, or other value to process.
+        variables: Pre-stringified mapping of placeholder names to values.
+
+    Returns:
+        A new object with all string values having placeholders replaced.
+    """
+    if isinstance(obj, str):
+        return _replace_template(obj, variables)
+    if isinstance(obj, dict):
+        return {k: _substitute_variables(v, variables) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_substitute_variables(item, variables) for item in obj]
+    return obj
+
+
+def _validate_variable_types(variables: dict[str, Any]) -> None:
+    """Raise :class:`InvalidParameterError` if any variable value is not a supported scalar type."""
+    for key, val in variables.items():
+        if not isinstance(val, _SCALAR_VARIABLE_TYPES):
+            raise InvalidParameterError(
+                f"Variable '{key}' has unsupported type '{type(val).__name__}'. "
+                f"Only scalar types are supported: str, int, float, bool, Decimal."
+            )
+
+
+def apply_variables(checks: list[dict], variables: dict[str, Any] | None) -> list[dict]:
+    """Apply variable substitution to check definitions.
+
+    Replaces ``{{ key }}`` placeholders in all string values of *checks* with the
+    corresponding values from *variables*. The original *checks* list is never mutated.
+
+    Variable values must be scalar types (``str``, ``int``, ``float``, ``bool``,
+    ``Decimal``). Non-string scalars are converted via ``str()`` — for example,
+    ``{"threshold": 10}`` becomes ``"10"`` in the substituted string. Collection
+    types (``list``, ``dict``, ``set``, etc.) are rejected with
+    :class:`~databricks.labs.dqx.errors.InvalidParameterError` because their
+    ``str()`` representation is rarely meaningful in SQL or column expressions.
+
+    Logs a warning for any ``{{ ... }}`` placeholders that remain unresolved after
+    substitution (e.g. misspelled variable names).
+
+    Args:
+        checks: List of check definition dictionaries (metadata format).
+        variables: Mapping of placeholder names to scalar replacement values.
+            If ``None`` or empty the checks are returned unchanged.
+
+    Returns:
+        A new list of check dicts with placeholders resolved, or the original list
+        when no substitution is needed.
+
+    Raises:
+        InvalidParameterError: If any variable value is not a supported scalar type.
+    """
+    if not variables:
+        return checks
+
+    _validate_variable_types(variables)
+    str_variables = {k: str(v) for k, v in variables.items()}
+    resolved: list[dict] = _substitute_variables(checks, str_variables)
+
+    # Warn about any remaining unresolved placeholders
+    for check_def in resolved:
+        for value in _iter_strings(check_def):
+            if _UNRESOLVED_PLACEHOLDER_PATTERN.search(value):
+                logger.warning(f"Unresolved placeholder found after variable substitution: '{value}'")
+
+    return resolved
+
+
+def _iter_strings(obj: Any) -> Generator[str, None, None]:
+    """Yield all string values found recursively in *obj*.
+
+    Traverses dicts (values only) and lists. Non-string leaf values are skipped.
+    Used to scan resolved check definitions for unresolved ``{{ ... }}`` placeholders.
+
+    Args:
+        obj: A string, dict, list, or other value to traverse.
+
+    Yields:
+        Every string value found in the nested structure.
+    """
+    if isinstance(obj, str):
+        yield obj
+    elif isinstance(obj, dict):
+        for value in obj.values():
+            yield from _iter_strings(value)
+    elif isinstance(obj, list):
+        for item in obj:
+            yield from _iter_strings(item)
+
+
 def get_file_extension(file_path: str | os.PathLike) -> str:
     """
     Extract file extension from a file path.
diff --git a/tests/integration/test_apply_checks.py b/tests/integration/test_apply_checks.py
index 3c6065e3c..022395f5e 100755
--- a/tests/integration/test_apply_checks.py
+++ b/tests/integration/test_apply_checks.py
@@ -9555,3 +9555,162 @@ def test_apply_checks_by_metadata_skip_checks_with_missing_columns(ws, spark):
         SCHEMA + complex_cols_schema + REPORTING_COLUMNS,
     )
     assert_df_equality(checked, expected, ignore_nullable=True)
+
+
+def test_apply_checks_by_metadata_with_variables(ws, spark):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks = [
+        {
+            "criticality": "error",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "{{ col }}"},
+            },
+        },
+    ]
+    variables = {"col": "b"}
+
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks, variables=variables)
+
+    expected = spark.createDataFrame(
+        [
+            [1, 3, 3, None, None],
+            [
+                2,
+                None,
+                4,
+                [
+                    {
+                        "name": "b_is_null_or_empty",
+                        "message": "Column 'b' value is null or empty",
+                        "columns": ["b"],
+                        "filter": None,
+                        "function": "is_not_null_and_not_empty",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {},
+                    }
+                ],
+                None,
+            ],
+            [None, 4, None, None, None],
+        ],
+        EXPECTED_SCHEMA,
+    )
+    assert_df_equality(checked, expected, ignore_nullable=True)
+
+
+def test_apply_checks_by_metadata_and_split_with_variables(ws, spark):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks = [
+        {
+            "criticality": "error",
+            "name": "{{ col }}_null_check",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "{{ col }}"},
+            },
+        },
+        {
+            "criticality": "warn",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "{{ expr_col }} > {{ threshold }}"},
+            },
+        },
+    ]
+    variables = {"col": "b", "expr_col": "a", "threshold": 1}
+
+    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks, variables=variables)
+
+    # Row [1, 3, 3]: b is not null, a > 1 passes -> good only
+    # Row [2, None, 4]: b is null (error), a > 1 passes -> bad only
+    # Row [None, 4, None]: b is not null, a is null so "a > 1" fails (warn) -> both good and bad
+    assert good.count() == 2
+    assert bad.count() == 2
+
+
+def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks = [
+        {
+            "criticality": "error",
+            "name": "{{ col }}_greater_than_{{ threshold }}",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "{{ col }} > {{ threshold }}"},
+            },
+            "filter": "{{ filter_col }} IS NOT NULL",
+        },
+    ]
+    variables = {"col": "a", "threshold": 1, "filter_col": "a"}
+
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks, variables=variables)
+
+    # Row with a=1 should have an error since a > 1 is false
+    result_rows = checked.collect()
+    row_a1 = [r for r in result_rows if r["a"] == 1][0]
+    assert row_a1["_errors"] is not None
+    assert len(row_a1["_errors"]) == 1
+    assert row_a1["_errors"][0]["name"] == "a_greater_than_1"
+
+    # Row with a=2 should have no errors
+    row_a2 = [r for r in result_rows if r["a"] == 2][0]
+    assert row_a2["_errors"] is None
+
+    # Row with a=None should have no errors (filtered out)
+    row_null = [r for r in result_rows if r["a"] is None][0]
+    assert row_null["_errors"] is None
+
+
+def test_validate_checks_with_variables(ws):
+    checks = [
+        {
+            "criticality": "{{ crit }}",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "{{ col }}"},
+            },
+        },
+    ]
+    variables = {"crit": "error", "col": "b"}
+
+    status = DQEngine.validate_checks(checks, variables=variables)
+    assert not status.has_errors
+
+
+def test_validate_checks_with_variables_invalid_after_substitution(ws):
+    checks = [
+        {
+            "criticality": "{{ crit }}",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "b"},
+            },
+        },
+    ]
+    variables = {"crit": "not_a_valid_criticality"}
+
+    status = DQEngine.validate_checks(checks, variables=variables)
+    assert status.has_errors
+
+
+def test_validate_checks_without_variables_fails_on_placeholders(ws):
+    checks = [
+        {
+            "criticality": "{{ crit }}",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "b"},
+            },
+        },
+    ]
+
+    status = DQEngine.validate_checks(checks)
+    assert status.has_errors
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index c6b8b1cca..ee9b7ec85 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -2084,3 +2084,60 @@ def test_apply_checks_and_save_in_tables_with_patterns_and_ref_df(ws, spark, mak
         schema=expected_schema,
     )
     assert_df_equality(actual_df, expected_df, ignore_nullable=True)
+
+
+def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, make_schema, make_random):
+    catalog_name = TEST_CATALOG
+    schema = make_schema(catalog_name=catalog_name)
+    input_table = f"{catalog_name}.{schema.name}.{make_random(8).lower()}"
+    output_table = f"{catalog_name}.{schema.name}.{make_random(8).lower()}"
+
+    test_schema = "a: int, b: int, c: string"
+    test_df = spark.createDataFrame([[1, 2, "valid"], [None, 3, "error"], [4, None, "warn"]], test_schema)
+    test_df.write.format("delta").mode("overwrite").saveAsTable(input_table)
+
+    checks = [
+        {
+            "name": "{{ col }}_is_null",
+            "criticality": "{{ crit }}",
+            "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}},
+        },
+    ]
+    variables = {"col": "a", "crit": "error"}
+
+    engine = DQEngine(ws, spark=spark, extra_params=EXTRA_PARAMS)
+    engine.apply_checks_by_metadata_and_save_in_table(
+        checks=checks,
+        input_config=InputConfig(location=input_table),
+        output_config=OutputConfig(location=output_table, mode="overwrite"),
+        variables=variables,
+    )
+
+    actual_df = spark.table(output_table)
+    expected_schema = test_schema + REPORTING_COLUMNS
+    expected_df = spark.createDataFrame(
+        [
+            [1, 2, "valid", None, None],
+            [
+                None,
+                3,
+                "error",
+                [
+                    {
+                        "name": "a_is_null",
+                        "message": "Column 'a' value is null",
+                        "columns": ["a"],
+                        "filter": None,
+                        "function": "is_not_null",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {},
+                    }
+                ],
+                None,
+            ],
+            [4, None, "warn", None, None],
+        ],
+        schema=expected_schema,
+    )
+    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 2537a8181..1b50a6be7 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,4 +1,6 @@
+import logging
 from datetime import date, datetime
+from decimal import Decimal
 from typing import Any
 from pathlib import Path
 from unittest.mock import Mock
@@ -17,6 +19,7 @@
     safe_strip_file_from_path,
     missing_required_packages,
     get_file_extension,
+    apply_variables,
 )
 from databricks.labs.dqx.rule import normalize_bound_args
 from databricks.labs.dqx.errors import InvalidParameterError, InvalidConfigError
@@ -414,3 +417,237 @@ def test_get_file_extension_with_path_object():
     """Test get_file_extension function with Path object."""
     file_path = Path("/path/to/file.json")
     assert get_file_extension(file_path) == ".json"
+
+
+def test_apply_variables_replaces_all_string_fields():
+    checks = [
+        {
+            "criticality": "error",
+            "name": "{{ col }}_not_null",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "{{ col }}"},
+            },
+            "filter": "{{ filter_col }} = 'active'",
+        }
+    ]
+    variables = {"col": "email", "filter_col": "status"}
+    result = apply_variables(checks, variables)
+
+    assert result[0]["name"] == "email_not_null"
+    assert result[0]["check"]["arguments"]["column"] == "email"
+    assert result[0]["filter"] == "status = 'active'"
+
+
+def test_apply_variables_none_variables():
+    checks = [{"name": "{{ x }}"}]
+    result = apply_variables(checks, None)
+    assert result is checks  # same object, no copy
+    assert result[0]["name"] == "{{ x }}"
+
+
+def test_apply_variables_empty_variables():
+    checks = [{"name": "{{ x }}"}]
+    result = apply_variables(checks, {})
+    assert result is checks  # same object, no copy
+    assert result[0]["name"] == "{{ x }}"
+
+
+def test_apply_variables_non_string_values_converted():
+    checks = [
+        {
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "{{ col }} > {{ threshold }}"},
+            },
+        }
+    ]
+    variables = {"col": "age", "threshold": 18}
+    result = apply_variables(checks, variables)
+    assert result[0]["check"]["arguments"]["expression"] == "age > 18"
+
+
+def test_apply_variables_does_not_mutate_original():
+    checks = [
+        {
+            "name": "{{ col }}_check",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "{{ col }}"},
+            },
+        }
+    ]
+    variables = {"col": "name"}
+    apply_variables(checks, variables)
+
+    # Original must be unchanged
+    assert checks[0]["name"] == "{{ col }}_check"
+    assert checks[0]["check"]["arguments"]["column"] == "{{ col }}"
+
+
+def test_apply_variables_nested_dicts():
+    checks = [
+        {
+            "check": {
+                "function": "sql_expression",
+                "arguments": {
+                    "expression": "{{ col }} IS NOT NULL",
+                },
+            },
+            "user_metadata": {"owner": "{{ team }}"},
+        }
+    ]
+    variables = {"col": "id", "team": "data-eng"}
+    result = apply_variables(checks, variables)
+
+    assert result[0]["check"]["arguments"]["expression"] == "id IS NOT NULL"
+    assert result[0]["user_metadata"]["owner"] == "data-eng"
+
+
+def test_apply_variables_partial_replacement():
+    checks = [{"name": "{{ p1 }}_greater_than_{{ threshold }}"}]
+    variables = {"p1": "column1", "threshold": 10}
+    result = apply_variables(checks, variables)
+    assert result[0]["name"] == "column1_greater_than_10"
+
+
+def test_apply_variables_unresolved_placeholder_warning(caplog):
+    checks = [{"name": "{{ resolved }}_{{ unresolved }}"}]
+    variables = {"resolved": "ok"}
+    with caplog.at_level(logging.WARNING, logger="databricks.labs.dqx.utils"):
+        result = apply_variables(checks, variables)
+
+    assert result[0]["name"] == "ok_{{ unresolved }}"
+    assert any("Unresolved placeholder" in msg for msg in caplog.messages)
+
+
+def test_apply_variables_whitespace_tolerance():
+    checks = [
+        {"a": "{{x}}", "b": "{{ x }}", "c": "{{  x  }}"},
+    ]
+    variables = {"x": "val"}
+    result = apply_variables(checks, variables)
+    assert result[0]["a"] == "val"
+    assert result[0]["b"] == "val"
+    assert result[0]["c"] == "val"
+
+
+def test_apply_variables_non_string_dict_values_untouched():
+    checks = [
+        {
+            "criticality": "error",
+            "check": {
+                "function": "is_in_list",
+                "arguments": {"column": "{{ col }}", "allowed": [1, 2, 3]},
+            },
+        }
+    ]
+    variables = {"col": "status"}
+    result = apply_variables(checks, variables)
+    assert result[0]["check"]["arguments"]["column"] == "status"
+    assert result[0]["check"]["arguments"]["allowed"] == [1, 2, 3]
+    assert result[0]["criticality"] == "error"
+
+
+def test_apply_variables_for_each_column():
+    checks = [
+        {
+            "criticality": "error",
+            "check": {
+                "function": "is_not_null",
+                "for_each_column": ["{{ col1 }}", "{{ col2 }}"],
+            },
+        }
+    ]
+    variables = {"col1": "first_name", "col2": "last_name"}
+    result = apply_variables(checks, variables)
+    assert result[0]["check"]["for_each_column"] == ["first_name", "last_name"]
+
+
+def test_apply_variables_multiple_checks():
+    checks = [
+        {
+            "name": "{{ col }}_not_null",
+            "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}},
+        },
+        {
+            "name": "{{ col2 }}_not_empty",
+            "check": {"function": "is_not_empty", "arguments": {"column": "{{ col2 }}"}},
+        },
+    ]
+    variables = {"col": "a", "col2": "b"}
+    result = apply_variables(checks, variables)
+    assert result[0]["name"] == "a_not_null"
+    assert result[0]["check"]["arguments"]["column"] == "a"
+    assert result[1]["name"] == "b_not_empty"
+    assert result[1]["check"]["arguments"]["column"] == "b"
+
+
+def test_apply_variables_empty_checks_list():
+    result = apply_variables([], {"col": "x"})
+    assert result == []
+
+
+def test_apply_variables_empty_string_value():
+    checks = [{"name": "prefix_{{ col }}_suffix"}]
+    result = apply_variables(checks, {"col": ""})
+    assert result[0]["name"] == "prefix__suffix"
+
+
+def test_apply_variables_value_contains_braces():
+    """Variable value itself contains {{ }} — should NOT be re-expanded."""
+    checks = [{"expr": "{{ col }}"}]
+    result = apply_variables(checks, {"col": "{{ other }}"})
+    assert result[0]["expr"] == "{{ other }}"
+
+
+def test_apply_variables_key_with_regex_special_chars():
+    """Variable keys with regex metacharacters must be escaped properly."""
+    checks = [{"name": "{{ col.name }}_check", "filter": "{{ col+1 }} > 0"}]
+    variables = {"col.name": "revenue", "col+1": "amount"}
+    result = apply_variables(checks, variables)
+    assert result[0]["name"] == "revenue_check"
+    assert result[0]["filter"] == "amount > 0"
+
+
+def test_apply_variables_same_placeholder_repeated_in_string():
+    checks = [{"expr": "{{ x }} + {{ x }}"}]
+    result = apply_variables(checks, {"x": "col"})
+    assert result[0]["expr"] == "col + col"
+
+
+def test_apply_variables_deeply_nested():
+    checks = [{"a": {"b": {"c": {"d": "{{ v }}"}}}}]
+    result = apply_variables(checks, {"v": "deep"})
+    assert result[0]["a"]["b"]["c"]["d"] == "deep"
+
+
+def test_apply_variables_value_with_backslash():
+    """Backslashes in values should be treated literally (no regex group refs)."""
+    checks = [{"path": "{{ p }}"}]
+    result = apply_variables(checks, {"p": r"C:\Users\test"})
+    assert result[0]["path"] == r"C:\Users\test"
+
+
+def test_apply_variables_rejects_list_value():
+    checks = [{"check": {"arguments": {"column": "{{ col }}"}}}]
+    with pytest.raises(InvalidParameterError, match="unsupported type 'list'"):
+        apply_variables(checks, {"col": ["a", "b"]})
+
+
+def test_apply_variables_rejects_dict_value():
+    checks = [{"check": {"arguments": {"column": "{{ col }}"}}}]
+    with pytest.raises(InvalidParameterError, match="unsupported type 'dict'"):
+        apply_variables(checks, {"col": {"nested": "value"}})
+
+
+def test_apply_variables_accepts_decimal_value():
+    checks = [{"expr": "col > {{ threshold }}"}]
+    result = apply_variables(checks, {"threshold": Decimal("3.14")})
+    assert result[0]["expr"] == "col > 3.14"
+
+
+def test_apply_variables_accepts_bool_value():
+    checks = [{"expr": "{{ flag }}"}]
+    result = apply_variables(checks, {"flag": True})
+    assert result[0]["expr"] == "True"

From ecc09c2067c1185a6e6171a98a95b6ee0b7db76c Mon Sep 17 00:00:00 2001
From: Federico Fiorio <45632804+fedeflowers@users.noreply.github.com>
Date: Wed, 18 Mar 2026 21:59:39 +0100
Subject: [PATCH 02/24] Apply suggestions from code review

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 src/databricks/labs/dqx/engine.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index 88c3d17ea..b02128d2c 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -647,7 +647,9 @@ def apply_checks_by_metadata(
             summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
-        return self._engine.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs, variables)
+        return self._engine.apply_checks_by_metadata(
+            df, checks, custom_check_functions, ref_dfs, variables=variables
+        )
 
     @telemetry_logger("engine", "apply_checks_by_metadata_and_split")
     def apply_checks_by_metadata_and_split(
@@ -679,7 +681,9 @@ def apply_checks_by_metadata_and_split(
             quality summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
-        return self._engine.apply_checks_by_metadata_and_split(df, checks, custom_check_functions, ref_dfs, variables)
+        return self._engine.apply_checks_by_metadata_and_split(
+            df, checks, custom_check_functions, ref_dfs, variables=variables
+        )
 
     @telemetry_logger("engine", "apply_checks_and_save_in_table")
     def apply_checks_and_save_in_table(
@@ -814,7 +818,11 @@ def apply_checks_by_metadata_and_save_in_table(
 
         if quarantine_config:
             check_result = self.apply_checks_by_metadata_and_split(
-                df, checks, custom_check_functions, ref_dfs, variables
+                df,
+                checks=checks,
+                custom_check_functions=custom_check_functions,
+                ref_dfs=ref_dfs,
+                variables=variables,
             )
             if self._engine.observer:
                 good_df, bad_df, batch_observation = check_result
@@ -824,7 +832,13 @@ def apply_checks_by_metadata_and_save_in_table(
             quarantine_streaming_query = save_dataframe_as_table(bad_df, quarantine_config)
             target_streaming_query = quarantine_streaming_query
         else:
-            check_result = self.apply_checks_by_metadata(df, checks, custom_check_functions, ref_dfs, variables)
+            check_result = self.apply_checks_by_metadata(
+                df,
+                checks=checks,
+                custom_check_functions=custom_check_functions,
+                ref_dfs=ref_dfs,
+                variables=variables,
+            )
             if self._engine.observer:
                 checked_df, batch_observation = check_result
             else:
@@ -999,7 +1013,12 @@ def validate_checks(
         Returns:
             ChecksValidationStatus indicating the validation result.
         """
-        return DQEngineCore.validate_checks(checks, custom_check_functions, validate_custom_check_functions, variables)
+        return DQEngineCore.validate_checks(
+            checks,
+            custom_check_functions,
+            validate_custom_check_functions,
+            variables=variables,
+        )
 
     def get_invalid(self, df: DataFrame) -> DataFrame:
         """

From 870afe6d971cec3d280ca4e88d31b321db458be3 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Wed, 18 Mar 2026 22:09:56 +0100
Subject: [PATCH 03/24] add variables param in contracts
 apply_checks_by_metadata, apply_checks_by_metadata_and_split, validate_checks
 to be consistent with the downstream implementation

---
 src/databricks/labs/dqx/base.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/databricks/labs/dqx/base.py b/src/databricks/labs/dqx/base.py
index 7a4fdf8f7..24a85bfca 100644
--- a/src/databricks/labs/dqx/base.py
+++ b/src/databricks/labs/dqx/base.py
@@ -1,7 +1,7 @@
 import abc
 from collections.abc import Callable
 from functools import cached_property
-from typing import final
+from typing import Any, final
 from pyspark.sql import DataFrame, Observation
 from databricks.labs.dqx.checks_validator import ChecksValidationStatus
 from databricks.labs.dqx.rule import DQRule
@@ -81,6 +81,7 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """
         Apply data quality checks defined as metadata to the given DataFrame.
@@ -94,6 +95,8 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
@@ -107,6 +110,7 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
+        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -120,6 +124,8 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -133,6 +139,7 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
+        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -144,6 +151,8 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             ChecksValidationStatus indicating the validation result.

From 877e74850e5fa70d6176e6c99cf2ae4ad34b4b25 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Thu, 19 Mar 2026 14:12:35 +0100
Subject: [PATCH 04/24] add change to parametrize variables from load_checks
 instead of apply_checks

---
 src/databricks/labs/dqx/base.py               |  13 +--
 src/databricks/labs/dqx/engine.py             |  61 +++-------
 tests/integration/test_apply_checks.py        |  21 ++--
 .../test_apply_checks_and_save_in_table.py    |   4 +-
 tests/unit/test_load_checks.py                | 108 +++++++++++++++++-
 5 files changed, 136 insertions(+), 71 deletions(-)

diff --git a/src/databricks/labs/dqx/base.py b/src/databricks/labs/dqx/base.py
index 24a85bfca..8710f75f2 100644
--- a/src/databricks/labs/dqx/base.py
+++ b/src/databricks/labs/dqx/base.py
@@ -81,7 +81,6 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """
         Apply data quality checks defined as metadata to the given DataFrame.
@@ -95,8 +94,6 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
@@ -110,7 +107,6 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -124,8 +120,6 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -139,7 +133,6 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
-        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -151,8 +144,6 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             ChecksValidationStatus indicating the validation result.
@@ -184,7 +175,7 @@ def get_valid(self, df: DataFrame) -> DataFrame:
 
     @staticmethod
     @abc.abstractmethod
-    def load_checks_from_local_file(filepath: str) -> list[dict]:
+    def load_checks_from_local_file(filepath: str, variables: dict[str, Any] | None = None) -> list[dict]:
         """
         Load DQ rules (checks) from a local JSON or YAML file.
 
@@ -192,6 +183,8 @@ def load_checks_from_local_file(filepath: str) -> list[dict]:
 
         Args:
             filepath: Path to a file containing checks definitions.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules (checks).
diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index b02128d2c..f35cc56da 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -220,7 +220,6 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame.
 
@@ -233,14 +232,11 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
             summary metrics. Summary metrics are returned by any `DQEngine` with an `observer` specified.
         """
-        checks = apply_variables(checks, variables)
         dq_rule_checks = deserialize_checks(checks, custom_check_functions)
 
         return self.apply_checks(df, dq_rule_checks, ref_dfs)
@@ -251,7 +247,6 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -265,8 +260,6 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -276,7 +269,6 @@ def apply_checks_by_metadata_and_split(
         Raises:
             InvalidCheckError: If any of the checks are invalid.
         """
-        checks = apply_variables(checks, variables)
         dq_rule_checks = deserialize_checks(checks, custom_check_functions)
 
         good_df, bad_df, *observations = self.apply_checks_and_split(df, dq_rule_checks, ref_dfs)
@@ -291,7 +283,6 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
-        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -303,13 +294,10 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation.
 
         Returns:
             ChecksValidationStatus indicating the validation result.
         """
-        checks = apply_variables(checks, variables)
         return ChecksValidator.validate_checks(checks, custom_check_functions, validate_custom_check_functions)
 
     def get_invalid(self, df: DataFrame) -> DataFrame:
@@ -342,7 +330,7 @@ def get_valid(self, df: DataFrame) -> DataFrame:
         )
 
     @staticmethod
-    def load_checks_from_local_file(filepath: str) -> list[dict]:
+    def load_checks_from_local_file(filepath: str, variables: dict[str, Any] | None = None) -> list[dict]:
         """
         Load DQ rules (checks) from a local JSON or YAML file.
 
@@ -350,11 +338,14 @@ def load_checks_from_local_file(filepath: str) -> list[dict]:
 
         Args:
             filepath: Path to a file containing checks definitions.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules.
         """
-        return FileChecksStorageHandler().load(FileChecksStorageConfig(location=filepath))
+        checks = FileChecksStorageHandler().load(FileChecksStorageConfig(location=filepath))
+        return apply_variables(checks=checks, variables=variables)
 
     @staticmethod
     def save_checks_in_local_file(checks: list[dict], filepath: str):
@@ -626,7 +617,6 @@ def apply_checks_by_metadata(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> DataFrame | tuple[DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame.
 
@@ -639,8 +629,6 @@ def apply_checks_by_metadata(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A DataFrame with errors and warnings result columns and an optional Observation which tracks data quality
@@ -648,7 +636,7 @@ def apply_checks_by_metadata(
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
         return self._engine.apply_checks_by_metadata(
-            df, checks, custom_check_functions, ref_dfs, variables=variables
+            df=df, checks=checks, custom_check_functions=custom_check_functions, ref_dfs=ref_dfs
         )
 
     @telemetry_logger("engine", "apply_checks_by_metadata_and_split")
@@ -658,7 +646,6 @@ def apply_checks_by_metadata_and_split(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> tuple[DataFrame, DataFrame] | tuple[DataFrame, DataFrame, Observation]:
         """Apply data quality checks defined as metadata to the given DataFrame and split the results into
         two DataFrames ("good" and "bad").
@@ -672,8 +659,6 @@ def apply_checks_by_metadata_and_split(
                   (rows appear in both DataFrames).
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             ref_dfs: Optional reference DataFrames to use in the checks.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
 
         Returns:
             A tuple of two DataFrames: "good" (may include rows with warnings but no result columns) and "bad" (rows
@@ -682,7 +667,7 @@ def apply_checks_by_metadata_and_split(
         """
         log_dataframe_telemetry(self.ws, self.spark, df)
         return self._engine.apply_checks_by_metadata_and_split(
-            df, checks, custom_check_functions, ref_dfs, variables=variables
+            df=df, checks=checks, custom_check_functions=custom_check_functions, ref_dfs=ref_dfs
         )
 
     @telemetry_logger("engine", "apply_checks_and_save_in_table")
@@ -780,7 +765,6 @@ def apply_checks_by_metadata_and_save_in_table(
         custom_check_functions: dict[str, Callable] | None = None,
         ref_dfs: dict[str, DataFrame] | None = None,
         checks_location: str | None = None,
-        variables: dict[str, Any] | None = None,
     ) -> None:
         """
         Apply metadata-defined data quality checks to input data and save results.
@@ -805,8 +789,6 @@ def apply_checks_by_metadata_and_save_in_table(
                 to callables/modules (e.g., globals()).
             ref_dfs: Optional reference DataFrames used by checks.
             checks_location: Optional location of the checks. Used for reporting in the summary metrics table only.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation and deserialization.
         """
         logger.info(f"Applying checks to {input_config.location}")
 
@@ -818,11 +800,7 @@ def apply_checks_by_metadata_and_save_in_table(
 
         if quarantine_config:
             check_result = self.apply_checks_by_metadata_and_split(
-                df,
-                checks=checks,
-                custom_check_functions=custom_check_functions,
-                ref_dfs=ref_dfs,
-                variables=variables,
+                df=df, checks=checks, custom_check_functions=custom_check_functions, ref_dfs=ref_dfs
             )
             if self._engine.observer:
                 good_df, bad_df, batch_observation = check_result
@@ -833,11 +811,7 @@ def apply_checks_by_metadata_and_save_in_table(
             target_streaming_query = quarantine_streaming_query
         else:
             check_result = self.apply_checks_by_metadata(
-                df,
-                checks=checks,
-                custom_check_functions=custom_check_functions,
-                ref_dfs=ref_dfs,
-                variables=variables,
+                df=df, checks=checks, custom_check_functions=custom_check_functions, ref_dfs=ref_dfs
             )
             if self._engine.observer:
                 checked_df, batch_observation = check_result
@@ -995,7 +969,6 @@ def validate_checks(
         checks: list[dict],
         custom_check_functions: dict[str, Callable] | None = None,
         validate_custom_check_functions: bool = True,
-        variables: dict[str, Any] | None = None,
     ) -> ChecksValidationStatus:
         """
         Validate checks defined as metadata to ensure they conform to the expected structure and types.
@@ -1007,17 +980,14 @@ def validate_checks(
             checks: List of checks to apply to the DataFrame. Each check should be a dictionary.
             custom_check_functions: Optional dictionary with custom check functions (e.g., *globals()* of the calling module).
             validate_custom_check_functions: If True, validate custom check functions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
-                placeholders in all string values of the check definitions before validation.
 
         Returns:
             ChecksValidationStatus indicating the validation result.
         """
         return DQEngineCore.validate_checks(
-            checks,
-            custom_check_functions,
-            validate_custom_check_functions,
-            variables=variables,
+            checks=checks,
+            custom_check_functions=custom_check_functions,
+            validate_custom_check_functions=validate_custom_check_functions,
         )
 
     def get_invalid(self, df: DataFrame) -> DataFrame:
@@ -1147,7 +1117,7 @@ def save_results_in_table(
             )
 
     @telemetry_logger("engine", "load_checks")
-    def load_checks(self, config: BaseChecksStorageConfig) -> list[dict]:
+    def load_checks(self, config: BaseChecksStorageConfig, variables: dict[str, Any] | None = None) -> list[dict]:
         """Load DQ rules (checks) from the storage backend described by *config*.
 
         This method delegates to a storage handler selected by the factory
@@ -1164,6 +1134,8 @@ def load_checks(self, config: BaseChecksStorageConfig) -> list[dict]:
 
         Args:
             config: Configuration object describing the storage backend.
+            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+                placeholders in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules (checks) represented as dictionaries.
@@ -1172,7 +1144,8 @@ def load_checks(self, config: BaseChecksStorageConfig) -> list[dict]:
             InvalidConfigError: If the configuration type is unsupported.
         """
         handler = self._checks_handler_factory.create(config)
-        return handler.load(config)
+        checks = handler.load(config)
+        return apply_variables(checks=checks, variables=variables)
 
     @telemetry_logger("engine", "save_checks")
     def save_checks(self, checks: list[dict], config: BaseChecksStorageConfig) -> None:
diff --git a/tests/integration/test_apply_checks.py b/tests/integration/test_apply_checks.py
index 022395f5e..9511ba63e 100755
--- a/tests/integration/test_apply_checks.py
+++ b/tests/integration/test_apply_checks.py
@@ -15,6 +15,7 @@
 from databricks.labs.dqx.check_funcs import sql_query
 from databricks.labs.dqx.config import OutputConfig, FileChecksStorageConfig, ExtraParams, RunConfig
 from databricks.labs.dqx.engine import DQEngine
+from databricks.labs.dqx.utils import apply_variables
 from databricks.labs.dqx.rule import (
     DQForEachColRule,
     register_rule,
@@ -9570,9 +9571,9 @@ def test_apply_checks_by_metadata_with_variables(ws, spark):
             },
         },
     ]
-    variables = {"col": "b"}
+    checks = apply_variables(checks, {"col": "b"})
 
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks, variables=variables)
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
 
     expected = spark.createDataFrame(
         [
@@ -9623,9 +9624,9 @@ def test_apply_checks_by_metadata_and_split_with_variables(ws, spark):
             },
         },
     ]
-    variables = {"col": "b", "expr_col": "a", "threshold": 1}
+    checks = apply_variables(checks, {"col": "b", "expr_col": "a", "threshold": 1})
 
-    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks, variables=variables)
+    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks)
 
     # Row [1, 3, 3]: b is not null, a > 1 passes -> good only
     # Row [2, None, 4]: b is null (error), a > 1 passes -> bad only
@@ -9649,9 +9650,9 @@ def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark):
             "filter": "{{ filter_col }} IS NOT NULL",
         },
     ]
-    variables = {"col": "a", "threshold": 1, "filter_col": "a"}
+    checks = apply_variables(checks, {"col": "a", "threshold": 1, "filter_col": "a"})
 
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks, variables=variables)
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
 
     # Row with a=1 should have an error since a > 1 is false
     result_rows = checked.collect()
@@ -9679,9 +9680,9 @@ def test_validate_checks_with_variables(ws):
             },
         },
     ]
-    variables = {"crit": "error", "col": "b"}
+    checks = apply_variables(checks, {"crit": "error", "col": "b"})
 
-    status = DQEngine.validate_checks(checks, variables=variables)
+    status = DQEngine.validate_checks(checks)
     assert not status.has_errors
 
 
@@ -9695,9 +9696,9 @@ def test_validate_checks_with_variables_invalid_after_substitution(ws):
             },
         },
     ]
-    variables = {"crit": "not_a_valid_criticality"}
+    checks = apply_variables(checks, {"crit": "not_a_valid_criticality"})
 
-    status = DQEngine.validate_checks(checks, variables=variables)
+    status = DQEngine.validate_checks(checks)
     assert status.has_errors
 
 
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index ee9b7ec85..242c88538 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -14,6 +14,7 @@
 )
 from databricks.labs.dqx.engine import DQEngine
 from databricks.labs.dqx.errors import InvalidConfigError
+from databricks.labs.dqx.utils import apply_variables
 from databricks.labs.dqx.rule import DQRowRule, DQDatasetRule
 from tests.integration.conftest import EXTRA_PARAMS, RUN_TIME, RUN_ID, REPORTING_COLUMNS
 
@@ -2103,14 +2104,13 @@ def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, ma
             "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}},
         },
     ]
-    variables = {"col": "a", "crit": "error"}
+    checks = apply_variables(checks, {"col": "a", "crit": "error"})
 
     engine = DQEngine(ws, spark=spark, extra_params=EXTRA_PARAMS)
     engine.apply_checks_by_metadata_and_save_in_table(
         checks=checks,
         input_config=InputConfig(location=input_table),
         output_config=OutputConfig(location=output_table, mode="overwrite"),
-        variables=variables,
     )
 
     actual_df = spark.table(output_table)
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index c0bdb2bd3..ab4fa90a8 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -1,15 +1,20 @@
 from unittest.mock import create_autospec
 
 import pytest
+from pyspark.sql import SparkSession
+
+from databricks.labs.dqx.checks_storage import (
+    BaseChecksStorageHandlerFactory,
+    ChecksStorageHandler,
+    VolumeFileChecksStorageHandler,
+)
+from databricks.labs.dqx.config import FileChecksStorageConfig, VolumeFileChecksStorageConfig
+from databricks.labs.dqx.engine import DQEngine, DQEngineCore
+from databricks.labs.dqx.errors import InvalidCheckError, CheckDownloadError, InvalidConfigError
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service.files import DownloadResponse
 
-from databricks.labs.dqx.checks_storage import VolumeFileChecksStorageHandler
-from databricks.labs.dqx.config import VolumeFileChecksStorageConfig
-from databricks.labs.dqx.engine import DQEngineCore
-from databricks.labs.dqx.errors import InvalidCheckError, CheckDownloadError, InvalidConfigError
-
 
 def test_load_checks_from_local_file_json(make_local_check_file_as_json, expected_checks):
     file = make_local_check_file_as_json
@@ -84,3 +89,96 @@ def test_file_download_contents_read_none():
 
     with pytest.raises(NotFound, match="No contents at Unity Catalog volume path"):
         handler.load(VolumeFileChecksStorageConfig(location="/Volumes/catalog/schema/volume/test_path.yml"))
+
+
+def test_load_checks_from_local_file_with_variables(tmp_path):
+    content = """- criticality: "{{ crit }}"
+  check:
+    function: is_not_null
+    arguments:
+      column: "{{ col }}"
+"""
+    file_path = tmp_path / "checks.yml"
+    file_path.write_text(content, encoding="utf-8")
+
+    checks = DQEngineCore.load_checks_from_local_file(str(file_path), variables={"crit": "error", "col": "id"})
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}},
+    ]
+
+
+def test_load_checks_from_local_file_variables_none(tmp_path):
+    content = """- criticality: error
+  check:
+    function: is_not_null
+    arguments:
+      column: id
+"""
+    file_path = tmp_path / "checks.yml"
+    file_path.write_text(content, encoding="utf-8")
+
+    checks = DQEngineCore.load_checks_from_local_file(str(file_path), variables=None)
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}},
+    ]
+
+
+def test_load_checks_from_local_file_variables_empty(tmp_path):
+    content = """- criticality: error
+  check:
+    function: is_not_null
+    arguments:
+      column: id
+"""
+    file_path = tmp_path / "checks.yml"
+    file_path.write_text(content, encoding="utf-8")
+
+    checks = DQEngineCore.load_checks_from_local_file(str(file_path), variables={})
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}},
+    ]
+
+
+def test_load_checks_with_variables():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+    mock_handler.load.return_value = raw_checks
+
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    checks = engine.load_checks(config, variables={"crit": "error", "col": "id"})
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}},
+    ]
+
+
+def test_load_checks_variables_none():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}}]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+    mock_handler.load.return_value = raw_checks
+
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    checks = engine.load_checks(config, variables=None)
+
+    assert checks == raw_checks

From a3a78de4c92d458f205ba20030f46ba8f912904e Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Fri, 20 Mar 2026 07:47:56 +0100
Subject: [PATCH 05/24] Restore
 test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table

The test was accidentally replaced during a merge. Restore the original
test that covers loading checks from a Delta table via checks_location,
and keep the new variables test as a separate addition.

Co-authored-by: Isaac
---
 .../test_apply_checks_and_save_in_table.py    | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index 952799b74..d07d738a0 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -2175,6 +2175,89 @@ def test_apply_checks_and_save_in_table_loads_checks_from_table(ws, spark, make_
     assert_df_equality(actual_df, expected_df, ignore_nullable=True)
 
 
+def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws, spark, make_schema, make_random):
+    catalog_name = TEST_CATALOG
+    schema = make_schema(catalog_name=catalog_name)
+    input_table = f"{catalog_name}.{schema.name}.{make_random(10).lower()}"
+    output_table = f"{catalog_name}.{schema.name}.{make_random(10).lower()}"
+    checks_table = f"{catalog_name}.{schema.name}.{make_random(10).lower()}"
+
+    # Create test data and save to source table
+    test_schema = "a: int, b: int, c: string"
+    test_df = spark.createDataFrame([[1, 2, "valid"], [None, 3, "error"], [4, None, "warn"]], test_schema)
+    test_df.write.format("delta").mode("overwrite").saveAsTable(input_table)
+
+    # Save checks to a delta table
+    checks_metadata = [
+        {
+            "name": "a_is_null",
+            "criticality": "error",
+            "check": {"function": "is_not_null", "arguments": {"column": "a"}},
+        },
+        {
+            "name": "b_is_null",
+            "criticality": "warn",
+            "check": {"function": "is_not_null", "arguments": {"column": "b"}},
+        },
+    ]
+    engine = DQEngine(ws, spark=spark, extra_params=EXTRA_PARAMS)
+    engine.save_checks(checks_metadata, config=TableChecksStorageConfig(location=checks_table))
+
+    # Apply checks by metadata loading from table via checks_location (no checks param)
+    engine.apply_checks_by_metadata_and_save_in_table(
+        input_config=InputConfig(location=input_table),
+        output_config=OutputConfig(location=output_table, mode="overwrite"),
+        checks_location=checks_table,
+    )
+
+    # Verify the table was created and contains the expected data
+    actual_df = spark.table(output_table)
+    expected_schema = test_schema + REPORTING_COLUMNS
+    expected_df = spark.createDataFrame(
+        [
+            [1, 2, "valid", None, None],
+            [
+                None,
+                3,
+                "error",
+                [
+                    {
+                        "name": "a_is_null",
+                        "message": "Column 'a' value is null",
+                        "columns": ["a"],
+                        "filter": None,
+                        "function": "is_not_null",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {},
+                    }
+                ],
+                None,
+            ],
+            [
+                4,
+                None,
+                "warn",
+                None,
+                [
+                    {
+                        "name": "b_is_null",
+                        "message": "Column 'b' value is null",
+                        "columns": ["b"],
+                        "filter": None,
+                        "function": "is_not_null",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {},
+                    }
+                ],
+            ],
+        ],
+        schema=expected_schema,
+    )
+    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
+
+
 def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, make_schema, make_random):
     catalog_name = TEST_CATALOG
     schema = make_schema(catalog_name=catalog_name)

From b896032bed9ea1b76f961b9fdbbe2cf747286f36 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Mon, 23 Mar 2026 17:02:50 +0100
Subject: [PATCH 06/24] add EXTRA_PARAMS compatibility, added unit and
 integrations tests for checking new parametrization of varaibles
 implementation, improved resolve_varaible algorithm, fixed variable hints and
 support for datetime variables resolution

---
 src/databricks/labs/dqx/base.py               |  14 +-
 src/databricks/labs/dqx/config.py             |   2 +
 src/databricks/labs/dqx/engine.py             |  43 ++-
 src/databricks/labs/dqx/utils.py              | 114 +++----
 tests/integration/test_apply_checks.py        | 160 ---------
 .../test_apply_checks_and_save_in_table.py    |  24 +-
 .../test_apply_checks_variables.py            | 303 ++++++++++++++++++
 tests/unit/test_load_checks.py                |  68 +++-
 tests/unit/test_utils.py                      | 186 ++++++++---
 9 files changed, 616 insertions(+), 298 deletions(-)
 create mode 100644 tests/integration/test_apply_checks_variables.py

diff --git a/src/databricks/labs/dqx/base.py b/src/databricks/labs/dqx/base.py
index 8710f75f2..74d77f9e6 100644
--- a/src/databricks/labs/dqx/base.py
+++ b/src/databricks/labs/dqx/base.py
@@ -1,12 +1,15 @@
 import abc
 from collections.abc import Callable
 from functools import cached_property
-from typing import Any, final
+from typing import final
+
 from pyspark.sql import DataFrame, Observation
+
+from databricks.labs.dqx.__about__ import __version__
 from databricks.labs.dqx.checks_validator import ChecksValidationStatus
 from databricks.labs.dqx.rule import DQRule
+from databricks.labs.dqx.utils import VariableValue
 from databricks.sdk import WorkspaceClient
-from databricks.labs.dqx.__about__ import __version__
 
 
 class DQEngineBase(abc.ABC):
@@ -175,15 +178,18 @@ def get_valid(self, df: DataFrame) -> DataFrame:
 
     @staticmethod
     @abc.abstractmethod
-    def load_checks_from_local_file(filepath: str, variables: dict[str, Any] | None = None) -> list[dict]:
+    def load_checks_from_local_file(filepath: str, variables: dict[str, VariableValue] | None = None) -> list[dict]:
         """
         Load DQ rules (checks) from a local JSON or YAML file.
 
         The returned checks can be used as input to *apply_checks_by_metadata*.
 
+        **Security note:** variable values substituted into **sql_expression** checks are
+        not sanitized. Callers must ensure that variable values come from trusted sources.
+
         Args:
             filepath: Path to a file containing checks definitions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
                 placeholders in all string values of the check definitions before returning.
 
         Returns:
diff --git a/src/databricks/labs/dqx/config.py b/src/databricks/labs/dqx/config.py
index d4041c29f..2b139b116 100644
--- a/src/databricks/labs/dqx/config.py
+++ b/src/databricks/labs/dqx/config.py
@@ -4,6 +4,7 @@
 
 from databricks.labs.dqx.checks_serializer import SerializerFactory
 from databricks.labs.dqx.errors import InvalidConfigError, InvalidParameterError
+from databricks.labs.dqx.utils import VariableValue
 
 __all__ = [
     "WorkspaceConfig",
@@ -215,6 +216,7 @@ class ExtraParams:
     user_metadata: dict[str, str] = field(default_factory=dict)
     run_time_overwrite: str | None = None
     run_id_overwrite: str | None = None
+    variables: dict[str, VariableValue] = field(default_factory=dict)
 
 
 @dataclass
diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index ff0d4b63c..ea267da4f 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -51,7 +51,7 @@
 from databricks.labs.dqx.telemetry import telemetry_logger, log_telemetry, log_dataframe_telemetry
 from databricks.sdk import WorkspaceClient
 from databricks.labs.dqx.errors import InvalidCheckError, InvalidConfigError, InvalidParameterError
-from databricks.labs.dqx.utils import list_tables, safe_strip_file_from_path, apply_variables
+from databricks.labs.dqx.utils import list_tables, safe_strip_file_from_path, resolve_variables, VariableValue
 from databricks.labs.dqx.io import is_one_time_trigger
 
 logger = logging.getLogger(__name__)
@@ -337,22 +337,25 @@ def get_valid(self, df: DataFrame) -> DataFrame:
         )
 
     @staticmethod
-    def load_checks_from_local_file(filepath: str, variables: dict[str, Any] | None = None) -> list[dict]:
+    def load_checks_from_local_file(filepath: str, variables: dict[str, VariableValue] | None = None) -> list[dict]:
         """
         Load DQ rules (checks) from a local JSON or YAML file.
 
         The returned checks can be used as input to *apply_checks_by_metadata*.
 
+        **Security note:** variable values substituted into **sql_expression** checks are
+        not sanitized. Callers must ensure that variable values come from trusted sources.
+
         Args:
             filepath: Path to a file containing checks definitions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
                 placeholders in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules.
         """
         checks = FileChecksStorageHandler().load(FileChecksStorageConfig(location=filepath))
-        return apply_variables(checks=checks, variables=variables)
+        return resolve_variables(checks=checks, variables=variables)
 
     @staticmethod
     def save_checks_in_local_file(checks: list[dict], filepath: str):
@@ -573,8 +576,9 @@ def __init__(
     ):
         super().__init__(workspace_client)
 
+        self._extra_params = extra_params or ExtraParams()
         self.spark = SparkSession.builder.getOrCreate() if spark is None else spark
-        self._engine = engine or DQEngineCore(workspace_client, spark, extra_params, observer)
+        self._engine = engine or DQEngineCore(workspace_client, spark, self._extra_params, observer)
         self._config_serializer = config_serializer or ConfigSerializer(workspace_client)
         self._checks_handler_factory: BaseChecksStorageHandlerFactory = (
             checks_handler_factory or ChecksStorageHandlerFactory(self.ws, self.spark)
@@ -1174,7 +1178,9 @@ def save_results_in_table(
             )
 
     @telemetry_logger("engine", "load_checks")
-    def load_checks(self, config: BaseChecksStorageConfig, variables: dict[str, Any] | None = None) -> list[dict]:
+    def load_checks(
+        self, config: BaseChecksStorageConfig, variables: dict[str, VariableValue] | None = None
+    ) -> list[dict]:
         """Load DQ rules (checks) from the storage backend described by *config*.
 
         This method delegates to a storage handler selected by the factory
@@ -1189,9 +1195,15 @@ def load_checks(self, config: BaseChecksStorageConfig, variables: dict[str, Any]
         - *InstallationChecksStorageConfig* (installation directory);
         - *VolumeFileChecksStorageConfig* (Unity Catalog volume file);
 
+        Per-call *variables* are merged with engine-level defaults from
+        *ExtraParams.variables* (per-call values take precedence on conflict).
+
+        **Security note:** variable values substituted into **sql_expression** checks are
+        not sanitized. Callers must ensure that variable values come from trusted sources.
+
         Args:
             config: Configuration object describing the storage backend.
-            variables: Optional mapping of placeholder names to replacement values. Replaces ``{{ key }}``
+            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
                 placeholders in all string values of the check definitions before returning.
 
         Returns:
@@ -1202,7 +1214,22 @@ def load_checks(self, config: BaseChecksStorageConfig, variables: dict[str, Any]
         """
         handler = self._checks_handler_factory.create(config)
         checks = handler.load(config)
-        return apply_variables(checks=checks, variables=variables)
+        merged = self._merge_variables(variables)
+        return resolve_variables(checks=checks, variables=merged)
+
+    def _merge_variables(self, per_call: dict[str, VariableValue] | None) -> dict[str, VariableValue] | None:
+        """Merge engine-level default variables with per-call overrides.
+
+        Per-call values take precedence over engine-level defaults.
+        """
+        defaults = self._extra_params.variables
+        if not defaults and not per_call:
+            return None
+        if not defaults:
+            return per_call
+        if not per_call:
+            return defaults
+        return {**defaults, **per_call}
 
     @telemetry_logger("engine", "save_checks")
     def save_checks(self, checks: list[dict], config: BaseChecksStorageConfig) -> None:
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index a42a2f6a7..9a0dc4241 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -6,7 +6,6 @@
 from decimal import Decimal
 from enum import Enum
 from importlib.util import find_spec
-from collections.abc import Callable, Generator
 from typing import Any
 from fnmatch import fnmatch
 from pathlib import Path
@@ -33,8 +32,11 @@
 COLUMN_NORMALIZE_EXPRESSION = re.compile("[^a-zA-Z0-9]+")
 COLUMN_PATTERN = re.compile(r"Column<'(.*?)(?: AS (\w+))?'>$", re.DOTALL)
 INVALID_COLUMN_NAME_PATTERN = re.compile(r"[\s,;{}\(\)\n\t=]+")
-_UNRESOLVED_PLACEHOLDER_PATTERN = re.compile(r"\{\{.*?\}\}")
-_SCALAR_VARIABLE_TYPES = (str, int, float, bool, Decimal)
+_UNRESOLVED_PLACEHOLDER_PATTERN = re.compile(r"\{\{[^}]*\}\}")
+_SCALAR_VARIABLE_TYPES = (str, int, float, bool, Decimal, datetime.date, datetime.datetime, datetime.time)
+
+VariableValue = str | int | float | bool | Decimal | datetime.date | datetime.datetime | datetime.time
+"""Supported scalar types for variable substitution values."""
 
 
 def get_column_name_or_alias(
@@ -544,36 +546,41 @@ def missing_required_packages(packages: list[str]) -> bool:
     return not all(find_spec(spec) for spec in packages)
 
 
-def _literal_replacer(val: str) -> Callable[[re.Match], str]:
-    """Return a ``re.sub`` replacer that always returns *val* literally."""
-
-    def replacer(_: re.Match) -> str:
-        return val
-
-    return replacer
-
-
 def _replace_template(text: str, variables: dict[str, str]) -> str:
-    """Replace ``{{ key }}`` placeholders in *text* with values from *variables*.
+    """Replace **{{ key }}** placeholders in *text* with values from *variables*.
 
-    Tolerates whitespace inside braces (e.g. ``{{ key }}``, ``{{key}}``).
-    Uses a lambda replacement to avoid backslash interpretation in values.
+    Uses a single-pass regex substitution.
+    Tolerates whitespace inside braces (e.g. **{{ key }}**, **{{key}}**).
+    Logs a warning if any unresolved **{{ ... }}** placeholders remain after substitution.
 
     Args:
-        text: Input string potentially containing ``{{ key }}`` placeholders.
+        text: Input string potentially containing **{{ key }}** placeholders.
         variables: Pre-stringified mapping of placeholder names to values.
 
     Returns:
         String with all matching placeholders replaced.
     """
-    for key, val in variables.items():
-        pattern = r"\{\{\s*" + re.escape(key) + r"\s*\}\}"
-        text = re.sub(pattern, _literal_replacer(val), text)
-    return text
+    if not variables:
+        if _UNRESOLVED_PLACEHOLDER_PATTERN.search(text):
+            logger.warning("Unresolved placeholder found: '%s'", text)  # pylint: disable=logging-too-many-args
+        return text
+
+    def _resolve(match_obj: re.Match[str]) -> str:
+        key = match_obj.group(0).strip("{} \t")
+        if key in variables:
+            return variables[key]
+        unresolved.append(key)
+        return match_obj.group(0)
 
+    unresolved: list[str] = []
+    output = _UNRESOLVED_PLACEHOLDER_PATTERN.sub(_resolve, text)
+    if unresolved:
+        logger.warning("Unresolved placeholders found: %s", unresolved)  # pylint: disable=logging-too-many-args
+    return output
 
-def _substitute_variables(obj: Any, variables: dict[str, str]) -> Any:
-    """Recursively replace ``{{ key }}`` placeholders in all string values within *obj*.
+
+def _substitute_variables(obj: object, variables: dict[str, str]) -> object:
+    """Recursively replace **{{ key }}** placeholders in all string values within *obj*.
 
     Traverses dicts, lists, and strings. Non-string/non-collection values are
     returned unchanged. Dict keys are not substituted.
@@ -594,36 +601,41 @@ def _substitute_variables(obj: Any, variables: dict[str, str]) -> Any:
     return obj
 
 
-def _validate_variable_types(variables: dict[str, Any]) -> None:
+def _validate_variable_types(variables: dict[str, VariableValue]) -> None:
     """Raise :class:`InvalidParameterError` if any variable value is not a supported scalar type."""
     for key, val in variables.items():
         if not isinstance(val, _SCALAR_VARIABLE_TYPES):
             raise InvalidParameterError(
                 f"Variable '{key}' has unsupported type '{type(val).__name__}'. "
-                f"Only scalar types are supported: str, int, float, bool, Decimal."
+                f"Only scalar types are supported: str, int, float, bool, Decimal, "
+                f"datetime.date, datetime.datetime, datetime.time."
             )
 
 
-def apply_variables(checks: list[dict], variables: dict[str, Any] | None) -> list[dict]:
-    """Apply variable substitution to check definitions.
+def resolve_variables(checks: list[dict], variables: dict[str, VariableValue] | None) -> list[dict]:
+    """Resolve variable substitution in check definitions.
 
-    Replaces ``{{ key }}`` placeholders in all string values of *checks* with the
+    Replaces **{{ key }}** placeholders in all string values of *checks* with the
     corresponding values from *variables*. The original *checks* list is never mutated.
 
-    Variable values must be scalar types (``str``, ``int``, ``float``, ``bool``,
-    ``Decimal``). Non-string scalars are converted via ``str()`` — for example,
-    ``{"threshold": 10}`` becomes ``"10"`` in the substituted string. Collection
-    types (``list``, ``dict``, ``set``, etc.) are rejected with
-    :class:`~databricks.labs.dqx.errors.InvalidParameterError` because their
-    ``str()`` representation is rarely meaningful in SQL or column expressions.
+    Variable values must be scalar types (**str**, **int**, **float**, **bool**,
+    **Decimal**, **datetime.date**, **datetime.datetime**, **datetime.time**).
+    Non-string scalars are converted via **str()** — for example, **{"threshold": 10}** becomes **"10"** in
+    the substituted string. Collection types (**list**, **dict**, **set**, etc.) are
+    rejected with :class:`~databricks.labs.dqx.errors.InvalidParameterError` because
+    their **str()** representation is rarely meaningful in SQL or column expressions.
 
-    Logs a warning for any ``{{ ... }}`` placeholders that remain unresolved after
+    Logs a warning for any **{{ ... }}** placeholders that remain unresolved after
     substitution (e.g. misspelled variable names).
 
+    **Security note:** variable values substituted into **sql_expression** checks are
+    not sanitized and are passed directly to **F.expr()**. Callers must ensure that
+    variable values come from trusted sources to prevent SQL injection.
+
     Args:
         checks: List of check definition dictionaries (metadata format).
         variables: Mapping of placeholder names to scalar replacement values.
-            If ``None`` or empty the checks are returned unchanged.
+            If **None** or empty the checks are returned unchanged.
 
     Returns:
         A new list of check dicts with placeholders resolved, or the original list
@@ -637,37 +649,7 @@ def apply_variables(checks: list[dict], variables: dict[str, Any] | None) -> lis
 
     _validate_variable_types(variables)
     str_variables = {k: str(v) for k, v in variables.items()}
-    resolved: list[dict] = _substitute_variables(checks, str_variables)
-
-    # Warn about any remaining unresolved placeholders
-    for check_def in resolved:
-        for value in _iter_strings(check_def):
-            if _UNRESOLVED_PLACEHOLDER_PATTERN.search(value):
-                logger.warning(f"Unresolved placeholder found after variable substitution: '{value}'")
-
-    return resolved
-
-
-def _iter_strings(obj: Any) -> Generator[str, None, None]:
-    """Yield all string values found recursively in *obj*.
-
-    Traverses dicts (values only) and lists. Non-string leaf values are skipped.
-    Used to scan resolved check definitions for unresolved ``{{ ... }}`` placeholders.
-
-    Args:
-        obj: A string, dict, list, or other value to traverse.
-
-    Yields:
-        Every string value found in the nested structure.
-    """
-    if isinstance(obj, str):
-        yield obj
-    elif isinstance(obj, dict):
-        for value in obj.values():
-            yield from _iter_strings(value)
-    elif isinstance(obj, list):
-        for item in obj:
-            yield from _iter_strings(item)
+    return _substitute_variables(checks, str_variables)  # type: ignore[return-value]
 
 
 def get_file_extension(file_path: str | os.PathLike) -> str:
diff --git a/tests/integration/test_apply_checks.py b/tests/integration/test_apply_checks.py
index 2faadbb36..98715e5dd 100755
--- a/tests/integration/test_apply_checks.py
+++ b/tests/integration/test_apply_checks.py
@@ -13,7 +13,6 @@
 from databricks.labs.dqx.check_funcs import sql_query
 from databricks.labs.dqx.config import OutputConfig, FileChecksStorageConfig, ExtraParams, RunConfig
 from databricks.labs.dqx.engine import DQEngine
-from databricks.labs.dqx.utils import apply_variables
 from databricks.labs.dqx.rule import (
     DQForEachColRule,
     register_rule,
@@ -9991,162 +9990,3 @@ def test_apply_checks_by_metadata_skip_checks_with_missing_columns(ws, spark):
         SCHEMA + complex_cols_schema + REPORTING_COLUMNS,
     )
     assert_df_equality(checked, expected, ignore_nullable=True)
-
-
-def test_apply_checks_by_metadata_with_variables(ws, spark):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
-
-    checks = [
-        {
-            "criticality": "error",
-            "check": {
-                "function": "is_not_null_and_not_empty",
-                "arguments": {"column": "{{ col }}"},
-            },
-        },
-    ]
-    checks = apply_variables(checks, {"col": "b"})
-
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
-
-    expected = spark.createDataFrame(
-        [
-            [1, 3, 3, None, None],
-            [
-                2,
-                None,
-                4,
-                [
-                    {
-                        "name": "b_is_null_or_empty",
-                        "message": "Column 'b' value is null or empty",
-                        "columns": ["b"],
-                        "filter": None,
-                        "function": "is_not_null_and_not_empty",
-                        "run_time": RUN_TIME,
-                        "run_id": RUN_ID,
-                        "user_metadata": {},
-                    }
-                ],
-                None,
-            ],
-            [None, 4, None, None, None],
-        ],
-        EXPECTED_SCHEMA,
-    )
-    assert_df_equality(checked, expected, ignore_nullable=True)
-
-
-def test_apply_checks_by_metadata_and_split_with_variables(ws, spark):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
-
-    checks = [
-        {
-            "criticality": "error",
-            "name": "{{ col }}_null_check",
-            "check": {
-                "function": "is_not_null_and_not_empty",
-                "arguments": {"column": "{{ col }}"},
-            },
-        },
-        {
-            "criticality": "warn",
-            "check": {
-                "function": "sql_expression",
-                "arguments": {"expression": "{{ expr_col }} > {{ threshold }}"},
-            },
-        },
-    ]
-    checks = apply_variables(checks, {"col": "b", "expr_col": "a", "threshold": 1})
-
-    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks)
-
-    # Row [1, 3, 3]: b is not null, a > 1 passes -> good only
-    # Row [2, None, 4]: b is null (error), a > 1 passes -> bad only
-    # Row [None, 4, None]: b is not null, a is null so "a > 1" fails (warn) -> both good and bad
-    assert good.count() == 2
-    assert bad.count() == 2
-
-
-def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
-
-    checks = [
-        {
-            "criticality": "error",
-            "name": "{{ col }}_greater_than_{{ threshold }}",
-            "check": {
-                "function": "sql_expression",
-                "arguments": {"expression": "{{ col }} > {{ threshold }}"},
-            },
-            "filter": "{{ filter_col }} IS NOT NULL",
-        },
-    ]
-    checks = apply_variables(checks, {"col": "a", "threshold": 1, "filter_col": "a"})
-
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
-
-    # Row with a=1 should have an error since a > 1 is false
-    result_rows = checked.collect()
-    row_a1 = [r for r in result_rows if r["a"] == 1][0]
-    assert row_a1["_errors"] is not None
-    assert len(row_a1["_errors"]) == 1
-    assert row_a1["_errors"][0]["name"] == "a_greater_than_1"
-
-    # Row with a=2 should have no errors
-    row_a2 = [r for r in result_rows if r["a"] == 2][0]
-    assert row_a2["_errors"] is None
-
-    # Row with a=None should have no errors (filtered out)
-    row_null = [r for r in result_rows if r["a"] is None][0]
-    assert row_null["_errors"] is None
-
-
-def test_validate_checks_with_variables(ws):
-    checks = [
-        {
-            "criticality": "{{ crit }}",
-            "check": {
-                "function": "is_not_null",
-                "arguments": {"column": "{{ col }}"},
-            },
-        },
-    ]
-    checks = apply_variables(checks, {"crit": "error", "col": "b"})
-
-    status = DQEngine.validate_checks(checks)
-    assert not status.has_errors
-
-
-def test_validate_checks_with_variables_invalid_after_substitution(ws):
-    checks = [
-        {
-            "criticality": "{{ crit }}",
-            "check": {
-                "function": "is_not_null",
-                "arguments": {"column": "b"},
-            },
-        },
-    ]
-    checks = apply_variables(checks, {"crit": "not_a_valid_criticality"})
-
-    status = DQEngine.validate_checks(checks)
-    assert status.has_errors
-
-
-def test_validate_checks_without_variables_fails_on_placeholders(ws):
-    checks = [
-        {
-            "criticality": "{{ crit }}",
-            "check": {
-                "function": "is_not_null",
-                "arguments": {"column": "b"},
-            },
-        },
-    ]
-
-    status = DQEngine.validate_checks(checks)
-    assert status.has_errors
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index d07d738a0..937f6e22d 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -11,9 +11,8 @@
     WorkspaceFileChecksStorageConfig,
     TableChecksStorageConfig,
 )
-from databricks.labs.dqx.engine import DQEngine
+from databricks.labs.dqx.engine import DQEngine, DQEngineCore
 from databricks.labs.dqx.errors import InvalidConfigError
-from databricks.labs.dqx.utils import apply_variables
 from databricks.labs.dqx.rule import DQRowRule, DQDatasetRule
 from tests.integration.conftest import (
     EXTRA_PARAMS,
@@ -2258,7 +2257,7 @@ def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws,
     assert_df_equality(actual_df, expected_df, ignore_nullable=True)
 
 
-def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, make_schema, make_random):
+def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, make_schema, make_random, tmp_path):
     catalog_name = TEST_CATALOG
     schema = make_schema(catalog_name=catalog_name)
     input_table = f"{catalog_name}.{schema.name}.{make_random(8).lower()}"
@@ -2268,14 +2267,17 @@ def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, ma
     test_df = spark.createDataFrame([[1, 2, "valid"], [None, 3, "error"], [4, None, "warn"]], test_schema)
     test_df.write.format("delta").mode("overwrite").saveAsTable(input_table)
 
-    checks = [
-        {
-            "name": "{{ col }}_is_null",
-            "criticality": "{{ crit }}",
-            "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}},
-        },
-    ]
-    checks = apply_variables(checks, {"col": "a", "crit": "error"})
+    checks_yaml = """
+        - name: "{{ col }}_is_null"
+          criticality: "{{ crit }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ col }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"col": "a", "crit": "error"})
 
     engine = DQEngine(ws, spark=spark, extra_params=EXTRA_PARAMS)
     engine.apply_checks_by_metadata_and_save_in_table(
diff --git a/tests/integration/test_apply_checks_variables.py b/tests/integration/test_apply_checks_variables.py
new file mode 100644
index 000000000..5598288fb
--- /dev/null
+++ b/tests/integration/test_apply_checks_variables.py
@@ -0,0 +1,303 @@
+import dataclasses
+from databricks.labs.dqx.engine import DQEngine, DQEngineCore
+from databricks.labs.dqx.config import FileChecksStorageConfig
+from tests.integration.conftest import (
+    REPORTING_COLUMNS,
+    RUN_TIME,
+    EXTRA_PARAMS,
+    RUN_ID,
+    assert_df_equality_ignore_fingerprints as assert_df_equality,
+)
+
+SCHEMA = "a: int, b: int, c: int"
+EXPECTED_SCHEMA = SCHEMA + REPORTING_COLUMNS
+
+
+def test_apply_checks_by_metadata_with_variables(ws, spark, tmp_path):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks_yaml = """
+        - criticality: error
+          check:
+            function: is_not_null_and_not_empty
+            arguments:
+              column: "{{ col }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"col": "b"})
+
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
+
+    expected = spark.createDataFrame(
+        [
+            [1, 3, 3, None, None],
+            [
+                2,
+                None,
+                4,
+                [
+                    {
+                        "name": "b_is_null_or_empty",
+                        "message": "Column 'b' value is null or empty",
+                        "columns": ["b"],
+                        "filter": None,
+                        "function": "is_not_null_and_not_empty",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {},
+                    }
+                ],
+                None,
+            ],
+            [None, 4, None, None, None],
+        ],
+        EXPECTED_SCHEMA,
+    )
+    assert_df_equality(checked, expected, ignore_nullable=True)
+
+
+def test_apply_checks_by_metadata_and_split_with_variables(ws, spark, tmp_path):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks_yaml = """
+        - criticality: error
+          name: "{{ col }}_null_check"
+          check:
+            function: is_not_null_and_not_empty
+            arguments:
+              column: "{{ col }}"
+        - criticality: warn
+          check:
+            function: sql_expression
+            arguments:
+              expression: "{{ expr_col }} > {{ threshold }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(
+        str(checks_file), variables={"col": "b", "expr_col": "a", "threshold": 1}
+    )
+
+    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks)
+
+    # Row [1, 3, 3]: b is not null, a > 1 passes -> good only
+    # Row [2, None, 4]: b is null (error), a > 1 passes -> bad only
+    # Row [None, 4, None]: b is not null, a is null so "a > 1" fails (warn) -> both good and bad
+    assert good.count() == 2
+    assert bad.count() == 2
+
+
+def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark, tmp_path):
+    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks_yaml = """
+        - criticality: error
+          name: "{{ col }}_greater_than_{{ threshold }}"
+          check:
+            function: sql_expression
+            arguments:
+              expression: "{{ col }} > {{ threshold }}"
+          filter: "{{ filter_col }} IS NOT NULL"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(
+        str(checks_file), variables={"col": "a", "threshold": 1, "filter_col": "a"}
+    )
+
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
+
+    # Row with a=1 should have an error since a > 1 is false
+    result_rows = checked.collect()
+    row_a1 = [r for r in result_rows if r["a"] == 1][0]
+    assert row_a1["_errors"] is not None
+    assert len(row_a1["_errors"]) == 1
+    assert row_a1["_errors"][0]["name"] == "a_greater_than_1"
+
+    # Row with a=2 should have no errors
+    row_a2 = [r for r in result_rows if r["a"] == 2][0]
+    assert row_a2["_errors"] is None
+
+    # Row with a=None should have no errors (filtered out)
+    row_null = [r for r in result_rows if r["a"] is None][0]
+    assert row_null["_errors"] is None
+
+
+def test_validate_checks_with_variables(ws, tmp_path):
+    checks_yaml = """
+        - criticality: "{{ crit }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ col }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "error", "col": "b"})
+
+    status = DQEngine.validate_checks(checks)
+    assert not status.has_errors
+
+
+def test_validate_checks_with_variables_invalid_after_substitution(ws, tmp_path):
+    checks_yaml = """
+        - criticality: "{{ crit }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: b
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "not_a_valid_criticality"})
+
+    status = DQEngine.validate_checks(checks)
+    assert status.has_errors
+
+
+def test_validate_checks_without_variables_fails_on_placeholders(ws):
+    checks = [
+        {
+            "criticality": "{{ crit }}",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "b"},
+            },
+        },
+    ]
+
+    status = DQEngine.validate_checks(checks)
+    assert status.has_errors
+
+
+def test_extra_params_variables_substitution_and_overrides(ws, spark, tmp_path):
+    # Setup data specific to this test
+    schema = "id int, name string"
+    expected_schema = schema + REPORTING_COLUMNS
+    df = spark.createDataFrame([(1, "John"), (None, "Doe")], schema)
+
+    # Define Checks with placeholders in nested structure (user_metadata)
+    # and deep inside check arguments
+    checks_yaml = """
+        - criticality: error
+          name: "id_check"
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ target_col }}"
+          user_metadata:
+            env: "{{ environment }}"
+            rule_id: "{{ nested_var }}"
+        """
+    checks_file = tmp_path / "checks_extra.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    # Setup DQEngine with ExtraParams variables (Default values)
+    # Default variables: target_col=id, environment=dev, nested_var=old
+    extra_params = dataclasses.replace(
+        EXTRA_PARAMS,
+        variables={
+            "target_col": "id",
+            "environment": "dev",
+            "nested_var": "old",
+        },
+    )
+    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
+
+    # Load Checks with overrides
+    # target_col: id (from ExtraParams default)
+    # environment: prod (per-call override wins)
+    # nested_var: new (per-call override wins)
+    config = FileChecksStorageConfig(location=str(checks_file))
+    checks = dq_engine.load_checks(config, variables={"environment": "prod", "nested_var": "new"})
+
+    # Verify substitution (Structural check)
+    assert checks[0]["check"]["arguments"]["column"] == "id"
+    assert checks[0]["user_metadata"]["env"] == "prod"
+    assert checks[0]["user_metadata"]["rule_id"] == "new"
+
+    # Apply checks to DataFrame (Functional check)
+    checked_df = dq_engine.apply_checks_by_metadata(df, checks)
+
+    expected = spark.createDataFrame(
+        [
+            [1, "John", None, None],
+            [
+                None,
+                "Doe",
+                [
+                    {
+                        "name": "id_check",
+                        "message": "Column 'id' value is null",
+                        "columns": ["id"],
+                        "filter": None,
+                        "function": "is_not_null",
+                        "run_time": RUN_TIME,
+                        "run_id": RUN_ID,
+                        "user_metadata": {"env": "prod", "rule_id": "new"},
+                    }
+                ],
+                None,
+            ],
+        ],
+        expected_schema,
+    )
+
+    assert_df_equality(checked_df, expected, ignore_nullable=True)
+
+
+def test_extra_params_variables_conflict_resolution(ws, spark, tmp_path):
+    # Verify that a conflict where a variable is defined in both ExtraParams and per-call
+    # results in the per-call variable taking precedence.
+
+    # 1. Setup DQEngine with ExtraParams variables
+    extra_params = dataclasses.replace(EXTRA_PARAMS, variables={"my_var": "default"})
+    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
+
+    # 2. File with placeholder
+    checks_yaml = """
+        - name: "check_{{ my_var }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: id
+        """
+    checks_file = tmp_path / "checks_conflict.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    config = FileChecksStorageConfig(location=str(checks_file))
+
+    # 3. Load with override
+    checks = dq_engine.load_checks(config, variables={"my_var": "override"})
+
+    # 4. Verify that "override" won
+    assert checks[0]["name"] == "check_override"
+
+
+def test_extra_params_variables_fallback_to_defaults(ws, spark, tmp_path):
+    # Verify that if a variable is NOT provided in the call, it falls back to ExtraParams.
+
+    # 1. Setup DQEngine with ExtraParams variables
+    extra_params = dataclasses.replace(EXTRA_PARAMS, variables={"my_var": "default"})
+    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
+
+    # 2. File with placeholder
+    checks_yaml = """
+        - name: "check_{{ my_var }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: id
+        """
+    checks_file = tmp_path / "checks_fallback.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    config = FileChecksStorageConfig(location=str(checks_file))
+
+    # 3. Load WITHOUT specific variables in the call - should use engine defaults
+    checks = dq_engine.load_checks(config)
+
+    # 4. Verify that "default" was used
+    assert checks[0]["name"] == "check_default"
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index ab4fa90a8..8b79cd14d 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -1,3 +1,4 @@
+import logging
 from unittest.mock import create_autospec
 
 import pytest
@@ -8,7 +9,7 @@
     ChecksStorageHandler,
     VolumeFileChecksStorageHandler,
 )
-from databricks.labs.dqx.config import FileChecksStorageConfig, VolumeFileChecksStorageConfig
+from databricks.labs.dqx.config import FileChecksStorageConfig, VolumeFileChecksStorageConfig, ExtraParams
 from databricks.labs.dqx.engine import DQEngine, DQEngineCore
 from databricks.labs.dqx.errors import InvalidCheckError, CheckDownloadError, InvalidConfigError
 from databricks.sdk import WorkspaceClient
@@ -182,3 +183,68 @@ def test_load_checks_variables_none():
     checks = engine.load_checks(config, variables=None)
 
     assert checks == raw_checks
+
+
+def test_load_checks_from_local_file_unresolved_placeholder(tmp_path, caplog):
+    content = """- criticality: error
+  check:
+    function: is_not_null
+    arguments:
+      column: "{{ col }}"
+"""
+    file_path = tmp_path / "checks.yml"
+    file_path.write_text(content, encoding="utf-8")
+
+    with caplog.at_level(logging.WARNING):
+        checks = DQEngineCore.load_checks_from_local_file(str(file_path), variables={"other": "value"})
+
+    assert checks[0]["check"]["arguments"]["column"] == "{{ col }}"
+    assert any("Unresolved placeholder" in msg for msg in caplog.messages)
+
+
+def test_load_checks_with_engine_default_variables():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+    mock_handler.load.return_value = raw_checks
+
+    extra_params = ExtraParams(variables={"crit": "error", "col": "default_col"})
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory, extra_params=extra_params)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    checks = engine.load_checks(config)
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "default_col"}}},
+    ]
+
+
+def test_load_checks_per_call_overrides_engine_defaults():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+    mock_handler.load.return_value = raw_checks
+
+    extra_params = ExtraParams(variables={"crit": "warn", "col": "default_col"})
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory, extra_params=extra_params)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    checks = engine.load_checks(config, variables={"crit": "error"})
+
+    assert checks == [
+        {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "default_col"}}},
+    ]
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 36122a6ce..b60bc1e41 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,5 +1,5 @@
 import logging
-from datetime import date, datetime
+from datetime import date, datetime, time
 from decimal import Decimal
 from enum import Enum
 from typing import Any
@@ -21,7 +21,7 @@
     safe_strip_file_from_path,
     missing_required_packages,
     get_file_extension,
-    apply_variables,
+    resolve_variables,
 )
 from databricks.labs.dqx.rule import normalize_bound_args
 from databricks.labs.dqx.errors import InvalidParameterError, InvalidConfigError
@@ -524,7 +524,7 @@ def test_get_file_extension_with_path_object():
     assert get_file_extension(file_path) == ".json"
 
 
-def test_apply_variables_replaces_all_string_fields():
+def test_resolve_variables_replaces_all_string_fields():
     checks = [
         {
             "criticality": "error",
@@ -537,28 +537,28 @@ def test_apply_variables_replaces_all_string_fields():
         }
     ]
     variables = {"col": "email", "filter_col": "status"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
 
     assert result[0]["name"] == "email_not_null"
     assert result[0]["check"]["arguments"]["column"] == "email"
     assert result[0]["filter"] == "status = 'active'"
 
 
-def test_apply_variables_none_variables():
+def test_resolve_variables_none_variables():
     checks = [{"name": "{{ x }}"}]
-    result = apply_variables(checks, None)
+    result = resolve_variables(checks, None)
     assert result is checks  # same object, no copy
     assert result[0]["name"] == "{{ x }}"
 
 
-def test_apply_variables_empty_variables():
+def test_resolve_variables_empty_variables():
     checks = [{"name": "{{ x }}"}]
-    result = apply_variables(checks, {})
+    result = resolve_variables(checks, {})
     assert result is checks  # same object, no copy
     assert result[0]["name"] == "{{ x }}"
 
 
-def test_apply_variables_non_string_values_converted():
+def test_resolve_variables_non_string_values_converted():
     checks = [
         {
             "check": {
@@ -568,11 +568,11 @@ def test_apply_variables_non_string_values_converted():
         }
     ]
     variables = {"col": "age", "threshold": 18}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["check"]["arguments"]["expression"] == "age > 18"
 
 
-def test_apply_variables_does_not_mutate_original():
+def test_resolve_variables_does_not_mutate_original():
     checks = [
         {
             "name": "{{ col }}_check",
@@ -583,14 +583,14 @@ def test_apply_variables_does_not_mutate_original():
         }
     ]
     variables = {"col": "name"}
-    apply_variables(checks, variables)
+    resolve_variables(checks, variables)
 
     # Original must be unchanged
     assert checks[0]["name"] == "{{ col }}_check"
     assert checks[0]["check"]["arguments"]["column"] == "{{ col }}"
 
 
-def test_apply_variables_nested_dicts():
+def test_resolve_variables_nested_dicts():
     checks = [
         {
             "check": {
@@ -603,41 +603,41 @@ def test_apply_variables_nested_dicts():
         }
     ]
     variables = {"col": "id", "team": "data-eng"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
 
     assert result[0]["check"]["arguments"]["expression"] == "id IS NOT NULL"
     assert result[0]["user_metadata"]["owner"] == "data-eng"
 
 
-def test_apply_variables_partial_replacement():
+def test_resolve_variables_partial_replacement():
     checks = [{"name": "{{ p1 }}_greater_than_{{ threshold }}"}]
     variables = {"p1": "column1", "threshold": 10}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["name"] == "column1_greater_than_10"
 
 
-def test_apply_variables_unresolved_placeholder_warning(caplog):
+def test_resolve_variables_unresolved_placeholder_warning(caplog):
     checks = [{"name": "{{ resolved }}_{{ unresolved }}"}]
     variables = {"resolved": "ok"}
     with caplog.at_level(logging.WARNING, logger="databricks.labs.dqx.utils"):
-        result = apply_variables(checks, variables)
+        result = resolve_variables(checks, variables)
 
     assert result[0]["name"] == "ok_{{ unresolved }}"
     assert any("Unresolved placeholder" in msg for msg in caplog.messages)
 
 
-def test_apply_variables_whitespace_tolerance():
+def test_resolve_variables_whitespace_tolerance():
     checks = [
         {"a": "{{x}}", "b": "{{ x }}", "c": "{{  x  }}"},
     ]
     variables = {"x": "val"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["a"] == "val"
     assert result[0]["b"] == "val"
     assert result[0]["c"] == "val"
 
 
-def test_apply_variables_non_string_dict_values_untouched():
+def test_resolve_variables_non_string_dict_values_untouched():
     checks = [
         {
             "criticality": "error",
@@ -648,13 +648,13 @@ def test_apply_variables_non_string_dict_values_untouched():
         }
     ]
     variables = {"col": "status"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["check"]["arguments"]["column"] == "status"
     assert result[0]["check"]["arguments"]["allowed"] == [1, 2, 3]
     assert result[0]["criticality"] == "error"
 
 
-def test_apply_variables_for_each_column():
+def test_resolve_variables_for_each_column():
     checks = [
         {
             "criticality": "error",
@@ -665,11 +665,11 @@ def test_apply_variables_for_each_column():
         }
     ]
     variables = {"col1": "first_name", "col2": "last_name"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["check"]["for_each_column"] == ["first_name", "last_name"]
 
 
-def test_apply_variables_multiple_checks():
+def test_resolve_variables_multiple_checks():
     checks = [
         {
             "name": "{{ col }}_not_null",
@@ -681,78 +681,168 @@ def test_apply_variables_multiple_checks():
         },
     ]
     variables = {"col": "a", "col2": "b"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["name"] == "a_not_null"
     assert result[0]["check"]["arguments"]["column"] == "a"
     assert result[1]["name"] == "b_not_empty"
     assert result[1]["check"]["arguments"]["column"] == "b"
 
 
-def test_apply_variables_empty_checks_list():
-    result = apply_variables([], {"col": "x"})
+def test_resolve_variables_empty_checks_list():
+    result = resolve_variables([], {"col": "x"})
     assert result == []
 
 
-def test_apply_variables_empty_string_value():
+def test_resolve_variables_empty_string_value():
     checks = [{"name": "prefix_{{ col }}_suffix"}]
-    result = apply_variables(checks, {"col": ""})
+    result = resolve_variables(checks, {"col": ""})
     assert result[0]["name"] == "prefix__suffix"
 
 
-def test_apply_variables_value_contains_braces():
+def test_resolve_variables_value_contains_braces():
     """Variable value itself contains {{ }} — should NOT be re-expanded."""
     checks = [{"expr": "{{ col }}"}]
-    result = apply_variables(checks, {"col": "{{ other }}"})
+    result = resolve_variables(checks, {"col": "{{ other }}"})
     assert result[0]["expr"] == "{{ other }}"
 
 
-def test_apply_variables_key_with_regex_special_chars():
+def test_resolve_variables_key_with_regex_special_chars():
     """Variable keys with regex metacharacters must be escaped properly."""
     checks = [{"name": "{{ col.name }}_check", "filter": "{{ col+1 }} > 0"}]
     variables = {"col.name": "revenue", "col+1": "amount"}
-    result = apply_variables(checks, variables)
+    result = resolve_variables(checks, variables)
     assert result[0]["name"] == "revenue_check"
     assert result[0]["filter"] == "amount > 0"
 
 
-def test_apply_variables_same_placeholder_repeated_in_string():
+def test_resolve_variables_same_placeholder_repeated_in_string():
     checks = [{"expr": "{{ x }} + {{ x }}"}]
-    result = apply_variables(checks, {"x": "col"})
+    result = resolve_variables(checks, {"x": "col"})
     assert result[0]["expr"] == "col + col"
 
 
-def test_apply_variables_deeply_nested():
+def test_resolve_variables_deeply_nested():
     checks = [{"a": {"b": {"c": {"d": "{{ v }}"}}}}]
-    result = apply_variables(checks, {"v": "deep"})
+    result = resolve_variables(checks, {"v": "deep"})
     assert result[0]["a"]["b"]["c"]["d"] == "deep"
 
 
-def test_apply_variables_value_with_backslash():
+def test_resolve_variables_value_with_backslash():
     """Backslashes in values should be treated literally (no regex group refs)."""
     checks = [{"path": "{{ p }}"}]
-    result = apply_variables(checks, {"p": r"C:\Users\test"})
+    result = resolve_variables(checks, {"p": r"C:\Users\test"})
     assert result[0]["path"] == r"C:\Users\test"
 
 
-def test_apply_variables_rejects_list_value():
+def test_resolve_variables_rejects_list_value():
     checks = [{"check": {"arguments": {"column": "{{ col }}"}}}]
     with pytest.raises(InvalidParameterError, match="unsupported type 'list'"):
-        apply_variables(checks, {"col": ["a", "b"]})
+        resolve_variables(checks, {"col": ["a", "b"]})
 
 
-def test_apply_variables_rejects_dict_value():
+def test_resolve_variables_rejects_dict_value():
     checks = [{"check": {"arguments": {"column": "{{ col }}"}}}]
     with pytest.raises(InvalidParameterError, match="unsupported type 'dict'"):
-        apply_variables(checks, {"col": {"nested": "value"}})
+        resolve_variables(checks, {"col": {"nested": "value"}})
 
 
-def test_apply_variables_accepts_decimal_value():
+def test_resolve_variables_accepts_decimal_value():
     checks = [{"expr": "col > {{ threshold }}"}]
-    result = apply_variables(checks, {"threshold": Decimal("3.14")})
+    result = resolve_variables(checks, {"threshold": Decimal("3.14")})
     assert result[0]["expr"] == "col > 3.14"
 
 
-def test_apply_variables_accepts_bool_value():
+def test_resolve_variables_accepts_bool_value():
     checks = [{"expr": "{{ flag }}"}]
-    result = apply_variables(checks, {"flag": True})
+    result = resolve_variables(checks, {"flag": True})
     assert result[0]["expr"] == "True"
+
+
+def test_resolve_variables_false_bool():
+    checks = [{"expr": "{{ flag }}"}]
+    result = resolve_variables(checks, {"flag": False})
+    assert result[0]["expr"] == "False"
+
+
+def test_resolve_variables_rejects_none_value():
+    checks = [{"col": "{{ col }}"}]
+    with pytest.raises(InvalidParameterError, match="unsupported type 'NoneType'"):
+        resolve_variables(checks, {"col": None})
+
+
+def test_resolve_variables_rejects_set_value():
+    checks = [{"col": "{{ col }}"}]
+    with pytest.raises(InvalidParameterError, match="unsupported type 'set'"):
+        resolve_variables(checks, {"col": {1, 2}})
+
+
+def test_resolve_variables_rejects_tuple_value():
+    checks = [{"col": "{{ col }}"}]
+    with pytest.raises(InvalidParameterError, match="unsupported type 'tuple'"):
+        resolve_variables(checks, {"col": (1, 2)})
+
+
+def test_resolve_variables_dict_keys_not_substituted():
+    checks = [{"{{ col }}": "value", "other": "{{ col }}"}]
+    result = resolve_variables(checks, {"col": "replaced"})
+    assert "{{ col }}" in result[0]
+    assert result[0]["{{ col }}"] == "value"
+    assert result[0]["other"] == "replaced"
+
+
+def test_resolve_variables_nan():
+    checks = [{"expr": "{{ val }}"}]
+    result = resolve_variables(checks, {"val": float("nan")})
+    assert result[0]["expr"] == "nan"
+
+
+def test_resolve_variables_inf():
+    checks = [{"expr": "{{ val }}"}]
+    result = resolve_variables(checks, {"val": float("inf")})
+    assert result[0]["expr"] == "inf"
+
+
+def test_resolve_variables_multiple_unresolved_warns(caplog):
+    checks = [{"expr": "{{ a }} and {{ b }}"}]
+    with caplog.at_level(logging.WARNING):
+        result = resolve_variables(checks, {"a": "x"})
+    assert result[0]["expr"] == "x and {{ b }}"
+    assert any("Unresolved placeholder" in msg for msg in caplog.messages)
+
+
+def test_resolve_variables_none_vars_no_warning(caplog):
+    checks = [{"col": "{{ x }}"}]
+    with caplog.at_level(logging.WARNING):
+        result = resolve_variables(checks, None)
+    assert result[0]["col"] == "{{ x }}"
+    assert not any("Unresolved placeholder" in msg for msg in caplog.messages)
+
+
+def test_resolve_variables_whitespace_in_key():
+    checks = [{"col": "{{col_a}}"}]
+    result = resolve_variables(checks, {"col_a": "replaced"})
+    assert result[0]["col"] == "replaced"
+
+
+def test_resolve_variables_unicode_values():
+    checks = [{"col": "{{ col }}"}]
+    result = resolve_variables(checks, {"col": "prénom"})
+    assert result[0]["col"] == "prénom"
+
+
+def test_resolve_variables_accepts_date():
+    checks = [{"expr": "date > '{{ d }}'"}]
+    result = resolve_variables(checks, {"d": date(2024, 1, 15)})
+    assert result[0]["expr"] == "date > '2024-01-15'"
+
+
+def test_resolve_variables_accepts_datetime():
+    checks = [{"expr": "ts > '{{ ts }}'"}]
+    result = resolve_variables(checks, {"ts": datetime(2024, 1, 15, 10, 30)})
+    assert "2024-01-15" in result[0]["expr"]
+
+
+def test_resolve_variables_accepts_time():
+    checks = [{"expr": "t > '{{ t }}'"}]
+    result = resolve_variables(checks, {"t": time(10, 30)})
+    assert result[0]["expr"] == "t > '10:30:00'"

From c1ff1461312f447713a6b82c01f7d88c6d4310f1 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Mon, 23 Mar 2026 20:14:54 +0100
Subject: [PATCH 07/24] add test parametrization variables, checked col is
 missing and another variable is given

---
 .../test_apply_checks_variables.py            | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/integration/test_apply_checks_variables.py b/tests/integration/test_apply_checks_variables.py
index 5598288fb..2ae9d2f33 100644
--- a/tests/integration/test_apply_checks_variables.py
+++ b/tests/integration/test_apply_checks_variables.py
@@ -301,3 +301,34 @@ def test_extra_params_variables_fallback_to_defaults(ws, spark, tmp_path):
 
     # 4. Verify that "default" was used
     assert checks[0]["name"] == "check_default"
+
+
+def test_apply_checks_with_missing_variable(ws, spark, tmp_path):
+    dq_engine = DQEngine(workspace_client=ws, spark=spark, extra_params=EXTRA_PARAMS)
+    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+
+    checks_yaml = """
+        - criticality: error
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ missing_col }}"
+        """
+    checks_file = tmp_path / "checks_missing.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    # Load file, which will warn and leave the placeholder
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"different_var": "val"})
+
+    # Assert that the placeholder was left in the metadata (unresolved variable)
+    assert checks[0]["check"]["arguments"]["column"] == "{{ missing_col }}"
+
+    # Check function apply should not raise an exception, but instead skip the check and report it in the results
+    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
+
+    errors = checked.select("_errors").collect()
+    for row in errors:
+        assert row["_errors"] is not None
+        assert len(row["_errors"]) == 1
+        assert "Check evaluation skipped due to invalid check columns" in row["_errors"][0]["message"]
+        assert "{{ missing_col }}" in row["_errors"][0]["message"]

From 100c9cfadb06a75f82dca067470aa5ca572c8f68 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Tue, 31 Mar 2026 16:54:30 +0200
Subject: [PATCH 08/24] fix tests for variable parametrization of core on load
 checks, reverted accidental change on config file, removed pylint ignore

---
 src/databricks/labs/dqx/config.py             |   1 +
 src/databricks/labs/dqx/utils.py              |   4 +-
 .../test_apply_checks_and_save_in_table.py    |  58 +-----
 .../test_apply_checks_variables.py            | 171 ++++++------------
 4 files changed, 60 insertions(+), 174 deletions(-)

diff --git a/src/databricks/labs/dqx/config.py b/src/databricks/labs/dqx/config.py
index 2b139b116..7185bb5e1 100644
--- a/src/databricks/labs/dqx/config.py
+++ b/src/databricks/labs/dqx/config.py
@@ -216,6 +216,7 @@ class ExtraParams:
     user_metadata: dict[str, str] = field(default_factory=dict)
     run_time_overwrite: str | None = None
     run_id_overwrite: str | None = None
+    suppress_skipped: bool = False
     variables: dict[str, VariableValue] = field(default_factory=dict)
 
 
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index 9a0dc4241..141af686f 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -562,7 +562,7 @@ def _replace_template(text: str, variables: dict[str, str]) -> str:
     """
     if not variables:
         if _UNRESOLVED_PLACEHOLDER_PATTERN.search(text):
-            logger.warning("Unresolved placeholder found: '%s'", text)  # pylint: disable=logging-too-many-args
+            logger.warning(f"Unresolved placeholder found: '{text}'")
         return text
 
     def _resolve(match_obj: re.Match[str]) -> str:
@@ -575,7 +575,7 @@ def _resolve(match_obj: re.Match[str]) -> str:
     unresolved: list[str] = []
     output = _UNRESOLVED_PLACEHOLDER_PATTERN.sub(_resolve, text)
     if unresolved:
-        logger.warning("Unresolved placeholders found: %s", unresolved)  # pylint: disable=logging-too-many-args
+        logger.warning(f"Unresolved placeholders found: {unresolved}")
     return output
 
 
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index 937f6e22d..5c71a2659 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -11,7 +11,7 @@
     WorkspaceFileChecksStorageConfig,
     TableChecksStorageConfig,
 )
-from databricks.labs.dqx.engine import DQEngine, DQEngineCore
+from databricks.labs.dqx.engine import DQEngine
 from databricks.labs.dqx.errors import InvalidConfigError
 from databricks.labs.dqx.rule import DQRowRule, DQDatasetRule
 from tests.integration.conftest import (
@@ -2257,60 +2257,4 @@ def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws,
     assert_df_equality(actual_df, expected_df, ignore_nullable=True)
 
 
-def test_apply_checks_by_metadata_and_save_in_table_with_variables(ws, spark, make_schema, make_random, tmp_path):
-    catalog_name = TEST_CATALOG
-    schema = make_schema(catalog_name=catalog_name)
-    input_table = f"{catalog_name}.{schema.name}.{make_random(8).lower()}"
-    output_table = f"{catalog_name}.{schema.name}.{make_random(8).lower()}"
-
-    test_schema = "a: int, b: int, c: string"
-    test_df = spark.createDataFrame([[1, 2, "valid"], [None, 3, "error"], [4, None, "warn"]], test_schema)
-    test_df.write.format("delta").mode("overwrite").saveAsTable(input_table)
-
-    checks_yaml = """
-        - name: "{{ col }}_is_null"
-          criticality: "{{ crit }}"
-          check:
-            function: is_not_null
-            arguments:
-              column: "{{ col }}"
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"col": "a", "crit": "error"})
-
-    engine = DQEngine(ws, spark=spark, extra_params=EXTRA_PARAMS)
-    engine.apply_checks_by_metadata_and_save_in_table(
-        checks=checks,
-        input_config=InputConfig(location=input_table),
-        output_config=OutputConfig(location=output_table, mode="overwrite"),
-    )
 
-    actual_df = spark.table(output_table)
-    expected_schema = test_schema + REPORTING_COLUMNS
-    expected_df = spark.createDataFrame(
-        [
-            [1, 2, "valid", None, None],
-            [
-                None,
-                3,
-                "error",
-                [
-                    {
-                        "name": "a_is_null",
-                        "message": "Column 'a' value is null",
-                        "columns": ["a"],
-                        "filter": None,
-                        "function": "is_not_null",
-                        "run_time": RUN_TIME,
-                        "run_id": RUN_ID,
-                        "user_metadata": {},
-                    }
-                ],
-                None,
-            ],
-            [4, None, "warn", None, None],
-        ],
-        schema=expected_schema,
-    )
-    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
diff --git a/tests/integration/test_apply_checks_variables.py b/tests/integration/test_apply_checks_variables.py
index 2ae9d2f33..96837e372 100644
--- a/tests/integration/test_apply_checks_variables.py
+++ b/tests/integration/test_apply_checks_variables.py
@@ -3,19 +3,14 @@
 from databricks.labs.dqx.config import FileChecksStorageConfig
 from tests.integration.conftest import (
     REPORTING_COLUMNS,
-    RUN_TIME,
     EXTRA_PARAMS,
-    RUN_ID,
-    assert_df_equality_ignore_fingerprints as assert_df_equality,
 )
 
 SCHEMA = "a: int, b: int, c: int"
 EXPECTED_SCHEMA = SCHEMA + REPORTING_COLUMNS
 
 
-def test_apply_checks_by_metadata_with_variables(ws, spark, tmp_path):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+def test_load_checks_by_metadata_with_variables(tmp_path):
 
     checks_yaml = """
         - criticality: error
@@ -28,39 +23,18 @@ def test_apply_checks_by_metadata_with_variables(ws, spark, tmp_path):
     checks_file.write_text(checks_yaml, encoding="utf-8")
     checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"col": "b"})
 
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
-
-    expected = spark.createDataFrame(
-        [
-            [1, 3, 3, None, None],
-            [
-                2,
-                None,
-                4,
-                [
-                    {
-                        "name": "b_is_null_or_empty",
-                        "message": "Column 'b' value is null or empty",
-                        "columns": ["b"],
-                        "filter": None,
-                        "function": "is_not_null_and_not_empty",
-                        "run_time": RUN_TIME,
-                        "run_id": RUN_ID,
-                        "user_metadata": {},
-                    }
-                ],
-                None,
-            ],
-            [None, 4, None, None, None],
-        ],
-        EXPECTED_SCHEMA,
-    )
-    assert_df_equality(checked, expected, ignore_nullable=True)
+    assert checks == [
+        {
+            "criticality": "error",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "b"},
+            },
+        }
+    ]
 
 
-def test_apply_checks_by_metadata_and_split_with_variables(ws, spark, tmp_path):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+def test_load_checks_by_metadata_and_split_with_variables(tmp_path):
 
     checks_yaml = """
         - criticality: error
@@ -81,18 +55,26 @@ def test_apply_checks_by_metadata_and_split_with_variables(ws, spark, tmp_path):
         str(checks_file), variables={"col": "b", "expr_col": "a", "threshold": 1}
     )
 
-    good, bad = dq_engine.apply_checks_by_metadata_and_split(test_df, checks)
-
-    # Row [1, 3, 3]: b is not null, a > 1 passes -> good only
-    # Row [2, None, 4]: b is null (error), a > 1 passes -> bad only
-    # Row [None, 4, None]: b is not null, a is null so "a > 1" fails (warn) -> both good and bad
-    assert good.count() == 2
-    assert bad.count() == 2
+    assert checks == [
+        {
+            "criticality": "error",
+            "name": "b_null_check",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "b"},
+            },
+        },
+        {
+            "criticality": "warn",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "a > 1"},
+            },
+        },
+    ]
 
 
-def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark, tmp_path):
-    dq_engine = DQEngine(workspace_client=ws, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
 
     checks_yaml = """
         - criticality: error
@@ -109,25 +91,20 @@ def test_apply_checks_by_metadata_with_variables_name_and_filter(ws, spark, tmp_
         str(checks_file), variables={"col": "a", "threshold": 1, "filter_col": "a"}
     )
 
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
-
-    # Row with a=1 should have an error since a > 1 is false
-    result_rows = checked.collect()
-    row_a1 = [r for r in result_rows if r["a"] == 1][0]
-    assert row_a1["_errors"] is not None
-    assert len(row_a1["_errors"]) == 1
-    assert row_a1["_errors"][0]["name"] == "a_greater_than_1"
-
-    # Row with a=2 should have no errors
-    row_a2 = [r for r in result_rows if r["a"] == 2][0]
-    assert row_a2["_errors"] is None
-
-    # Row with a=None should have no errors (filtered out)
-    row_null = [r for r in result_rows if r["a"] is None][0]
-    assert row_null["_errors"] is None
+    assert checks == [
+        {
+            "criticality": "error",
+            "name": "a_greater_than_1",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "a > 1"},
+            },
+            "filter": "a IS NOT NULL",
+        }
+    ]
 
 
-def test_validate_checks_with_variables(ws, tmp_path):
+def test_validate_checks_with_variables(tmp_path):
     checks_yaml = """
         - criticality: "{{ crit }}"
           check:
@@ -143,7 +120,7 @@ def test_validate_checks_with_variables(ws, tmp_path):
     assert not status.has_errors
 
 
-def test_validate_checks_with_variables_invalid_after_substitution(ws, tmp_path):
+def test_validate_checks_with_variables_invalid_after_substitution(tmp_path):
     checks_yaml = """
         - criticality: "{{ crit }}"
           check:
@@ -156,10 +133,15 @@ def test_validate_checks_with_variables_invalid_after_substitution(ws, tmp_path)
     checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "not_a_valid_criticality"})
 
     status = DQEngine.validate_checks(checks)
-    assert status.has_errors
+    expected_error = (
+        "Invalid 'criticality' value: 'not_a_valid_criticality'. Expected 'warn' or 'error'. "
+        "Check details: {'criticality': 'not_a_valid_criticality', "
+        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
+    )
+    assert status.errors[0] == expected_error
 
 
-def test_validate_checks_without_variables_fails_on_placeholders(ws):
+def test_validate_checks_without_variables_fails_on_placeholders():
     checks = [
         {
             "criticality": "{{ crit }}",
@@ -171,15 +153,15 @@ def test_validate_checks_without_variables_fails_on_placeholders(ws):
     ]
 
     status = DQEngine.validate_checks(checks)
-    assert status.has_errors
+    expected_error = (
+        "Invalid 'criticality' value: '{{ crit }}'. Expected 'warn' or 'error'. "
+        "Check details: {'criticality': '{{ crit }}', "
+        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
+    )
+    assert status.errors[0] == expected_error
 
 
 def test_extra_params_variables_substitution_and_overrides(ws, spark, tmp_path):
-    # Setup data specific to this test
-    schema = "id int, name string"
-    expected_schema = schema + REPORTING_COLUMNS
-    df = spark.createDataFrame([(1, "John"), (None, "Doe")], schema)
-
     # Define Checks with placeholders in nested structure (user_metadata)
     # and deep inside check arguments
     checks_yaml = """
@@ -220,35 +202,6 @@ def test_extra_params_variables_substitution_and_overrides(ws, spark, tmp_path):
     assert checks[0]["user_metadata"]["env"] == "prod"
     assert checks[0]["user_metadata"]["rule_id"] == "new"
 
-    # Apply checks to DataFrame (Functional check)
-    checked_df = dq_engine.apply_checks_by_metadata(df, checks)
-
-    expected = spark.createDataFrame(
-        [
-            [1, "John", None, None],
-            [
-                None,
-                "Doe",
-                [
-                    {
-                        "name": "id_check",
-                        "message": "Column 'id' value is null",
-                        "columns": ["id"],
-                        "filter": None,
-                        "function": "is_not_null",
-                        "run_time": RUN_TIME,
-                        "run_id": RUN_ID,
-                        "user_metadata": {"env": "prod", "rule_id": "new"},
-                    }
-                ],
-                None,
-            ],
-        ],
-        expected_schema,
-    )
-
-    assert_df_equality(checked_df, expected, ignore_nullable=True)
-
 
 def test_extra_params_variables_conflict_resolution(ws, spark, tmp_path):
     # Verify that a conflict where a variable is defined in both ExtraParams and per-call
@@ -303,9 +256,7 @@ def test_extra_params_variables_fallback_to_defaults(ws, spark, tmp_path):
     assert checks[0]["name"] == "check_default"
 
 
-def test_apply_checks_with_missing_variable(ws, spark, tmp_path):
-    dq_engine = DQEngine(workspace_client=ws, spark=spark, extra_params=EXTRA_PARAMS)
-    test_df = spark.createDataFrame([[1, 3, 3], [2, None, 4], [None, 4, None]], SCHEMA)
+def test_load_checks_with_missing_variable(tmp_path):
 
     checks_yaml = """
         - criticality: error
@@ -322,13 +273,3 @@ def test_apply_checks_with_missing_variable(ws, spark, tmp_path):
 
     # Assert that the placeholder was left in the metadata (unresolved variable)
     assert checks[0]["check"]["arguments"]["column"] == "{{ missing_col }}"
-
-    # Check function apply should not raise an exception, but instead skip the check and report it in the results
-    checked = dq_engine.apply_checks_by_metadata(test_df, checks)
-
-    errors = checked.select("_errors").collect()
-    for row in errors:
-        assert row["_errors"] is not None
-        assert len(row["_errors"]) == 1
-        assert "Check evaluation skipped due to invalid check columns" in row["_errors"][0]["message"]
-        assert "{{ missing_col }}" in row["_errors"][0]["message"]

From e2e1f1e3ebe4edc24f6921ff72b7f0833eb9858f Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Tue, 31 Mar 2026 16:57:56 +0200
Subject: [PATCH 09/24] fix reverted extra space on apply checks in table file

---
 tests/integration/test_apply_checks_and_save_in_table.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index 5c71a2659..73fc12e47 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -2254,7 +2254,4 @@ def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws,
         ],
         schema=expected_schema,
     )
-    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
-
-
-
+    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
\ No newline at end of file

From 4e3a81deae5652a272cc7d59da16ecb286ea74d7 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Fri, 3 Apr 2026 17:31:21 +0200
Subject: [PATCH 10/24] add docs, fix overloading, deduplication of tests,
 removed integration testing not using databricks sdk

---
 demos/dqx_demo_library.py                     |  60 +++-
 demos/dqx_quick_start_demo_library.py         |  18 ++
 .../docs/guide/quality_checks_definition.mdx  |  54 ++++
 .../dqx/docs/guide/quality_checks_storage.mdx |   7 +
 docs/dqx/docs/reference/engine.mdx            |   2 +-
 docs/dqx/docs/reference/quality_checks.mdx    |   4 +
 src/databricks/labs/dqx/utils.py              |  26 +-
 .../test_apply_checks_and_save_in_table.py    |   3 +-
 .../test_apply_checks_variables.py            | 275 ------------------
 tests/unit/test_checks_validation.py          |  59 +++-
 tests/unit/test_load_checks.py                |  89 ++++++
 11 files changed, 315 insertions(+), 282 deletions(-)
 delete mode 100644 tests/integration/test_apply_checks_variables.py

diff --git a/demos/dqx_demo_library.py b/demos/dqx_demo_library.py
index 34b250b3b..ae924e64c 100644
--- a/demos/dqx_demo_library.py
+++ b/demos/dqx_demo_library.py
@@ -1481,4 +1481,62 @@ def safe_parse_json(col):
 
 # explode warnings
 warnings_df = valid_and_quarantine_df.select(F.explode(F.col("dq_warnings")).alias("dq")).select(F.expr("dq.*"))
-display(warnings_df)
\ No newline at end of file
+display(warnings_df)
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Advanced: Variable Substitution
+# MAGIC
+# MAGIC DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables).
+# MAGIC This allows you to parameterize your rules and inject values at **load time** via the `variables` parameter in `load_checks`.
+# MAGIC
+# MAGIC ### Example Usage
+# MAGIC
+# MAGIC 1. Define a rule with `{{ placeholder }}` syntax.
+# MAGIC 2. Pass a dictionary of variables when loading the rules.
+
+# COMMAND ----------
+
+from databricks.labs.dqx.config import WorkspaceFileChecksStorageConfig
+
+# Save to a temporary file
+
+# Define parameterized checks
+parameterized_checks_yaml = """
+- criticality: error
+  name: "threshold_check_{{ threshold_name }}"
+  check:
+    function: is_not_greater_than
+    arguments:
+      column: "{{ target_column }}"
+      limit: "{{ max_value }}"
+"""
+
+# Save to a temporary file
+# demo_file_directory is defined at the beginning of this notebook
+temp_checks_path = os.path.join(demo_file_directory, "parameterized_checks.yml")
+with open(temp_checks_path, "w") as f:
+    f.write(parameterized_checks_yaml)
+
+dq_engine = DQEngine(WorkspaceClient())
+
+# Load checks with variable resolution
+# Resolution happens during the load process
+resolved_checks = dq_engine.load_checks(
+    config=WorkspaceFileChecksStorageConfig(location=temp_checks_path),
+    variables={
+        "threshold_name": "critical",
+        "target_column": "col1",
+        "max_value": 100
+    }
+)
+
+# The resolved checks now have the values injected
+# Note: DQEngine internally converts string numbers to their appropriate types if needed during validation or apply
+print(yaml.dump(resolved_checks))
+
+# Apply the resolved checks to a DataFrame
+data = spark.createDataFrame([[50], [150]], "col1: int")
+result_df = dq_engine.apply_checks_by_metadata(data, resolved_checks)
+display(result_df)
\ No newline at end of file
diff --git a/demos/dqx_quick_start_demo_library.py b/demos/dqx_quick_start_demo_library.py
index 6b586428e..c6901b2af 100644
--- a/demos/dqx_quick_start_demo_library.py
+++ b/demos/dqx_quick_start_demo_library.py
@@ -125,6 +125,24 @@
 print(f"Checks from YAML: {status}")
 
 # COMMAND ----------
+ 
+# MAGIC %md
+# MAGIC ### Variable Substitution
+# MAGIC
+# MAGIC You can parameterize your YAML checks using `{{ variable }}` syntax and resolve them at load time.
+# MAGIC
+# MAGIC ```python
+# MAGIC # Example: Load checks with a dynamic age limit
+# MAGIC #
+# MAGIC # from databricks.labs.dqx.config import FileChecksStorageConfig
+# MAGIC #
+# MAGIC # resolved_checks = dq_engine.load_checks(
+# MAGIC #     config=FileChecksStorageConfig(location="checks.yml"),
+# MAGIC #     variables={"max_age": 120}
+# MAGIC # )
+# MAGIC ```
+# MAGIC
+# COMMAND ----------
 
 # MAGIC %md
 # MAGIC ### Setup `DQEngine`
diff --git a/docs/dqx/docs/guide/quality_checks_definition.mdx b/docs/dqx/docs/guide/quality_checks_definition.mdx
index d5d5fa817..c2c97b133 100644
--- a/docs/dqx/docs/guide/quality_checks_definition.mdx
+++ b/docs/dqx/docs/guide/quality_checks_definition.mdx
@@ -720,6 +720,60 @@ Example checks saved in a Delta or Lakebase table (compact format — `for_each_
 
 If `run_config_name` is not provided, "default" is used. Typically, the input table or job name is used for run config name to establish a one-to-one mapping between tables or jobs and checks.
 
+## Variable Substitution
+
+DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables). This allows you to parameterize your quality rules and inject values at **load time** via the `variables` parameter in `load_checks`.
+
+### Syntax and Scope
+
+Placeholders are defined using the `{{ variable_name }}` syntax. Variable substitution is supported in **all string values** within the check definitions, including:
+- `name`
+- `filter`
+- `check` function arguments (`arguments`) and column names (`for_each_column`)
+- any other top-level or nested string field
+
+### Resolution
+
+Variables are resolved at **load time** when the checks are loaded from the storage backend. To resolve variables, pass a dictionary to the `variables` parameter of the `load_checks` method.
+
+<Admonition type="info" title="Note">
+Variable substitution is only available when defining checks declaratively (as dictionaries or in files/tables). It is not supported when using DQX classes (e.g., `DQRowRule`) directly.
+</Admonition>
+
+<Tabs>
+  <TabItem value="YAML" label="YAML" default>
+    ```yaml
+    - criticality: error
+      check:
+        function: is_in_range
+        arguments:
+          column: temperature
+          min_limit: {{ min_temp }}
+          max_limit: {{ max_temp }}
+      filter: "region = '{{ region }}'"
+    ```
+  </TabItem>
+  <TabItem value="Python" label="Python">
+    ```python
+    from databricks.labs.dqx.engine import DQEngine
+    from databricks.labs.dqx.config import FileChecksStorageConfig
+    from databricks.sdk import WorkspaceClient
+
+    dq_engine = DQEngine(WorkspaceClient())
+
+    # Load checks with variable resolution
+    resolved_checks = dq_engine.load_checks(
+        config=FileChecksStorageConfig(location="checks.yml"),
+        variables={
+            "min_temp": 0,
+            "max_temp": 100,
+            "region": "EMEA"
+        }
+    )
+    ```
+  </TabItem>
+</Tabs>
+
 ## Validating syntax of quality checks
 
 You can validate the syntax of checks loaded from a storage system or checks defined programmatically before applying them.
diff --git a/docs/dqx/docs/guide/quality_checks_storage.mdx b/docs/dqx/docs/guide/quality_checks_storage.mdx
index aff515c5a..ef8fe083f 100644
--- a/docs/dqx/docs/guide/quality_checks_storage.mdx
+++ b/docs/dqx/docs/guide/quality_checks_storage.mdx
@@ -180,6 +180,13 @@ If you create checks as a list of DQRule objects, you can convert them using the
     # also works for absolute and relative workspace paths if invoked from Databricks notebook or job
     checks: list[dict] = dq_engine.load_checks(config=FileChecksStorageConfig(location="checks.yml"))
 
+    # load checks from a local file with variable substitution
+    # see more on variable substitution [here](/docs/guide/quality_checks_definition/#variable-substitution)
+    checks: list[dict] = dq_engine.load_checks(
+        FileChecksStorageConfig(location="checks.yml"),
+        variables={"threshold": 100, "column_name": "total_amount"}
+    )
+
     # load checks from arbitrary workspace location using absolute path
     checks: list[dict] = dq_engine.load_checks(config=WorkspaceFileChecksStorageConfig(location="/Shared/App1/checks.yml"))
 
diff --git a/docs/dqx/docs/reference/engine.mdx b/docs/dqx/docs/reference/engine.mdx
index 76bab212b..8866c2ce1 100644
--- a/docs/dqx/docs/reference/engine.mdx
+++ b/docs/dqx/docs/reference/engine.mdx
@@ -62,7 +62,7 @@ The following table outlines the available methods of the `DQEngine` and their f
 | `validate_checks`                              | Validates the provided quality checks to ensure they conform to the expected structure and types.                                                                                                                                               | `checks`: List of checks to validate; `custom_check_functions`: (optional) Dictionary of custom check functions that can be used; `validate_custom_check_functions`: (optional) If True, validates custom check functions (defaults to True).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Yes                                       |
 | `get_invalid`                                  | Retrieves records from the DataFrame that violate data quality checks (records with warnings and errors).                                                                                                                                       | `df`: Input DataFrame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Yes                                       |
 | `get_valid`                                    | Retrieves records from the DataFrame that pass all data quality checks.                                                                                                                                                                         | `df`: Input DataFrame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Yes                                       |
-| `load_checks`                                  | Loads quality rules (checks) from storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed sources inferred from run config.                                                  | `config`: Configuration for loading checks from a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config).                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Yes (only with `FileChecksStorageConfig`) |
+| `load_checks`                                  | Loads quality rules (checks) from storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed sources inferred from run config.                                                  | `config`: Configuration for loading checks from a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config); `variables`: (optional) dictionary of variables for [variable substitution](/docs/guide/quality_checks_definition/#variable-substitution). | Yes (only with `FileChecksStorageConfig`) |
 | `save_checks`                                  | Saves quality rules (checks) to a storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed targets inferred from run config.                                                  | `checks`: List of checks defined as dictionary; `config`: Configuration for saving checks in a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config).                                                                                                                                                                                                                                                                                                                                                                                                     | Yes (only with `FileChecksStorageConfig`) |
 | `save_results_in_table`                        | Saves DataFrames as tables using Unity Catalog table references or storage paths. Supports both batch and streaming writes. For streaming DataFrames, returns a StreamingQuery that can be used to monitor or wait for completion. For batch DataFrames, data is written synchronously and None is returned. | `output_df`: (optional) DataFrame containing the output data (batch or streaming); `quarantine_df`: (optional) DataFrame containing invalid data (batch or streaming); `observation`: (optional) Spark Observation tracking summary metrics; `output_config`: `OutputConfig` with location (table name or storage path), mode, format, options, and optional trigger (supports `partition_by` or `cluster_by`, only one applies;); `quarantine_config`: (optional) `OutputConfig` with location (table name or storage path), mode, format, options, and optional trigger (supports `partition_by` or `cluster_by`, only one applies;); `metrics_config`: (optional) `OutputConfig` with location for summary metrics; `rule_set_fingerprint`: (optional) SHA-256 fingerprint of the rule set used for this run, included in summary metrics when metrics_config is provided; `run_config_name`: Name of the run config to use; `install_folder`: (optional) Installation folder where DQX is installed (only required for custom folder); `assume_user`: (optional) If True, assume user installation, otherwise global.                                                                                                                                  | No                                        |
 | `save_summary_metrics`                         | Saves quality checking summary metrics to a Delta table.                                                                                                                                                                                        | `observed_metrics`: `dict[str, Any]` Collected summary metrics from Spark Observation; `metrics_config`: `OutputConfig` object with the table name, output mode, and options for the summary metrics data; `input_config`: (optional) `InputConfig` object with the table name for reading the input data; `output_config`: (optional) `OutputConfig` object with the table name for the output data (supports `partition_by` or `cluster_by`, only one applies); `quarantine_config`: (optional) `OutputConfig` object with the table name for the quarantine data (supports `partition_by` or `cluster_by`, only one applies); `checks_location`: (optional) Location where checks are stored; `rule_set_fingerprint`: (optional) SHA-256 fingerprint of the rule set used for this run.                                                                                                                                                                                                                                                                                                                                                                            | No                                        |
diff --git a/docs/dqx/docs/reference/quality_checks.mdx b/docs/dqx/docs/reference/quality_checks.mdx
index e96040994..adfc71bc1 100644
--- a/docs/dqx/docs/reference/quality_checks.mdx
+++ b/docs/dqx/docs/reference/quality_checks.mdx
@@ -18,6 +18,10 @@ All rule types, including row-level and dataset-level rules, can be defined and
 
 You can explore the implementation details of the check functions [here](https://github.com/databrickslabs/dqx/blob/v0.13.0/src/databricks/labs/dqx/check_funcs.py).
 
+<Admonition type="tip" title="Variable Substitution">
+All declarative check definitions (YAML, JSON, or Delta tables) support **variable substitution** for string-based fields using the `{{ variable_name }}` syntax. This allows for dynamic parameterization of column names, thresholds, and filters at load time. See the [User Guide](/docs/guide/quality_checks_definition/#variable-substitution) for more details.
+</Admonition>
+
 ## Row-level checks reference
 
 Row-level checks are applied to each row in a PySpark DataFrame. The quality check results are reported for individual rows in the result columns.
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index 141af686f..6f302d5a3 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -6,10 +6,11 @@
 from decimal import Decimal
 from enum import Enum
 from importlib.util import find_spec
-from typing import Any
+from typing import Any, TypeVar, overload
 from fnmatch import fnmatch
 from pathlib import Path
 
+
 from pyspark.sql import Column
 from pyspark.sql.types import StructType
 
@@ -29,6 +30,9 @@
 logger = logging.getLogger(__name__)
 
 
+T = TypeVar("T")
+
+
 COLUMN_NORMALIZE_EXPRESSION = re.compile("[^a-zA-Z0-9]+")
 COLUMN_PATTERN = re.compile(r"Column<'(.*?)(?: AS (\w+))?'>$", re.DOTALL)
 INVALID_COLUMN_NAME_PATTERN = re.compile(r"[\s,;{}\(\)\n\t=]+")
@@ -579,7 +583,23 @@ def _resolve(match_obj: re.Match[str]) -> str:
     return output
 
 
-def _substitute_variables(obj: object, variables: dict[str, str]) -> object:
+@overload
+def _substitute_variables(obj: str, variables: dict[str, str]) -> str: ...
+
+
+@overload
+def _substitute_variables(obj: list[T], variables: dict[str, str]) -> list[T]: ...
+
+
+@overload
+def _substitute_variables(obj: dict[str, T], variables: dict[str, str]) -> dict[str, T]: ...
+
+
+@overload
+def _substitute_variables(obj: T, variables: dict[str, str]) -> T: ...
+
+
+def _substitute_variables(obj: Any, variables: dict[str, str]) -> Any:
     """Recursively replace **{{ key }}** placeholders in all string values within *obj*.
 
     Traverses dicts, lists, and strings. Non-string/non-collection values are
@@ -649,7 +669,7 @@ def resolve_variables(checks: list[dict], variables: dict[str, VariableValue] |
 
     _validate_variable_types(variables)
     str_variables = {k: str(v) for k, v in variables.items()}
-    return _substitute_variables(checks, str_variables)  # type: ignore[return-value]
+    return _substitute_variables(checks, str_variables)
 
 
 def get_file_extension(file_path: str | os.PathLike) -> str:
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index 73fc12e47..0a2816784 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -2254,4 +2254,5 @@ def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws,
         ],
         schema=expected_schema,
     )
-    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
\ No newline at end of file
+    assert_df_equality(actual_df, expected_df, ignore_nullable=True)
+    
\ No newline at end of file
diff --git a/tests/integration/test_apply_checks_variables.py b/tests/integration/test_apply_checks_variables.py
deleted file mode 100644
index 96837e372..000000000
--- a/tests/integration/test_apply_checks_variables.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import dataclasses
-from databricks.labs.dqx.engine import DQEngine, DQEngineCore
-from databricks.labs.dqx.config import FileChecksStorageConfig
-from tests.integration.conftest import (
-    REPORTING_COLUMNS,
-    EXTRA_PARAMS,
-)
-
-SCHEMA = "a: int, b: int, c: int"
-EXPECTED_SCHEMA = SCHEMA + REPORTING_COLUMNS
-
-
-def test_load_checks_by_metadata_with_variables(tmp_path):
-
-    checks_yaml = """
-        - criticality: error
-          check:
-            function: is_not_null_and_not_empty
-            arguments:
-              column: "{{ col }}"
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"col": "b"})
-
-    assert checks == [
-        {
-            "criticality": "error",
-            "check": {
-                "function": "is_not_null_and_not_empty",
-                "arguments": {"column": "b"},
-            },
-        }
-    ]
-
-
-def test_load_checks_by_metadata_and_split_with_variables(tmp_path):
-
-    checks_yaml = """
-        - criticality: error
-          name: "{{ col }}_null_check"
-          check:
-            function: is_not_null_and_not_empty
-            arguments:
-              column: "{{ col }}"
-        - criticality: warn
-          check:
-            function: sql_expression
-            arguments:
-              expression: "{{ expr_col }} > {{ threshold }}"
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(
-        str(checks_file), variables={"col": "b", "expr_col": "a", "threshold": 1}
-    )
-
-    assert checks == [
-        {
-            "criticality": "error",
-            "name": "b_null_check",
-            "check": {
-                "function": "is_not_null_and_not_empty",
-                "arguments": {"column": "b"},
-            },
-        },
-        {
-            "criticality": "warn",
-            "check": {
-                "function": "sql_expression",
-                "arguments": {"expression": "a > 1"},
-            },
-        },
-    ]
-
-
-def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
-
-    checks_yaml = """
-        - criticality: error
-          name: "{{ col }}_greater_than_{{ threshold }}"
-          check:
-            function: sql_expression
-            arguments:
-              expression: "{{ col }} > {{ threshold }}"
-          filter: "{{ filter_col }} IS NOT NULL"
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(
-        str(checks_file), variables={"col": "a", "threshold": 1, "filter_col": "a"}
-    )
-
-    assert checks == [
-        {
-            "criticality": "error",
-            "name": "a_greater_than_1",
-            "check": {
-                "function": "sql_expression",
-                "arguments": {"expression": "a > 1"},
-            },
-            "filter": "a IS NOT NULL",
-        }
-    ]
-
-
-def test_validate_checks_with_variables(tmp_path):
-    checks_yaml = """
-        - criticality: "{{ crit }}"
-          check:
-            function: is_not_null
-            arguments:
-              column: "{{ col }}"
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "error", "col": "b"})
-
-    status = DQEngine.validate_checks(checks)
-    assert not status.has_errors
-
-
-def test_validate_checks_with_variables_invalid_after_substitution(tmp_path):
-    checks_yaml = """
-        - criticality: "{{ crit }}"
-          check:
-            function: is_not_null
-            arguments:
-              column: b
-        """
-    checks_file = tmp_path / "checks.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "not_a_valid_criticality"})
-
-    status = DQEngine.validate_checks(checks)
-    expected_error = (
-        "Invalid 'criticality' value: 'not_a_valid_criticality'. Expected 'warn' or 'error'. "
-        "Check details: {'criticality': 'not_a_valid_criticality', "
-        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
-    )
-    assert status.errors[0] == expected_error
-
-
-def test_validate_checks_without_variables_fails_on_placeholders():
-    checks = [
-        {
-            "criticality": "{{ crit }}",
-            "check": {
-                "function": "is_not_null",
-                "arguments": {"column": "b"},
-            },
-        },
-    ]
-
-    status = DQEngine.validate_checks(checks)
-    expected_error = (
-        "Invalid 'criticality' value: '{{ crit }}'. Expected 'warn' or 'error'. "
-        "Check details: {'criticality': '{{ crit }}', "
-        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
-    )
-    assert status.errors[0] == expected_error
-
-
-def test_extra_params_variables_substitution_and_overrides(ws, spark, tmp_path):
-    # Define Checks with placeholders in nested structure (user_metadata)
-    # and deep inside check arguments
-    checks_yaml = """
-        - criticality: error
-          name: "id_check"
-          check:
-            function: is_not_null
-            arguments:
-              column: "{{ target_col }}"
-          user_metadata:
-            env: "{{ environment }}"
-            rule_id: "{{ nested_var }}"
-        """
-    checks_file = tmp_path / "checks_extra.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-
-    # Setup DQEngine with ExtraParams variables (Default values)
-    # Default variables: target_col=id, environment=dev, nested_var=old
-    extra_params = dataclasses.replace(
-        EXTRA_PARAMS,
-        variables={
-            "target_col": "id",
-            "environment": "dev",
-            "nested_var": "old",
-        },
-    )
-    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
-
-    # Load Checks with overrides
-    # target_col: id (from ExtraParams default)
-    # environment: prod (per-call override wins)
-    # nested_var: new (per-call override wins)
-    config = FileChecksStorageConfig(location=str(checks_file))
-    checks = dq_engine.load_checks(config, variables={"environment": "prod", "nested_var": "new"})
-
-    # Verify substitution (Structural check)
-    assert checks[0]["check"]["arguments"]["column"] == "id"
-    assert checks[0]["user_metadata"]["env"] == "prod"
-    assert checks[0]["user_metadata"]["rule_id"] == "new"
-
-
-def test_extra_params_variables_conflict_resolution(ws, spark, tmp_path):
-    # Verify that a conflict where a variable is defined in both ExtraParams and per-call
-    # results in the per-call variable taking precedence.
-
-    # 1. Setup DQEngine with ExtraParams variables
-    extra_params = dataclasses.replace(EXTRA_PARAMS, variables={"my_var": "default"})
-    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
-
-    # 2. File with placeholder
-    checks_yaml = """
-        - name: "check_{{ my_var }}"
-          check:
-            function: is_not_null
-            arguments:
-              column: id
-        """
-    checks_file = tmp_path / "checks_conflict.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    config = FileChecksStorageConfig(location=str(checks_file))
-
-    # 3. Load with override
-    checks = dq_engine.load_checks(config, variables={"my_var": "override"})
-
-    # 4. Verify that "override" won
-    assert checks[0]["name"] == "check_override"
-
-
-def test_extra_params_variables_fallback_to_defaults(ws, spark, tmp_path):
-    # Verify that if a variable is NOT provided in the call, it falls back to ExtraParams.
-
-    # 1. Setup DQEngine with ExtraParams variables
-    extra_params = dataclasses.replace(EXTRA_PARAMS, variables={"my_var": "default"})
-    dq_engine = DQEngine(ws, spark, extra_params=extra_params)
-
-    # 2. File with placeholder
-    checks_yaml = """
-        - name: "check_{{ my_var }}"
-          check:
-            function: is_not_null
-            arguments:
-              column: id
-        """
-    checks_file = tmp_path / "checks_fallback.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-    config = FileChecksStorageConfig(location=str(checks_file))
-
-    # 3. Load WITHOUT specific variables in the call - should use engine defaults
-    checks = dq_engine.load_checks(config)
-
-    # 4. Verify that "default" was used
-    assert checks[0]["name"] == "check_default"
-
-
-def test_load_checks_with_missing_variable(tmp_path):
-
-    checks_yaml = """
-        - criticality: error
-          check:
-            function: is_not_null
-            arguments:
-              column: "{{ missing_col }}"
-        """
-    checks_file = tmp_path / "checks_missing.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-
-    # Load file, which will warn and leave the placeholder
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"different_var": "val"})
-
-    # Assert that the placeholder was left in the metadata (unresolved variable)
-    assert checks[0]["check"]["arguments"]["column"] == "{{ missing_col }}"
diff --git a/tests/unit/test_checks_validation.py b/tests/unit/test_checks_validation.py
index 936a2d814..2467b4a84 100644
--- a/tests/unit/test_checks_validation.py
+++ b/tests/unit/test_checks_validation.py
@@ -1,5 +1,5 @@
 from pyspark.sql.functions import col
-from databricks.labs.dqx.engine import DQEngine
+from databricks.labs.dqx.engine import DQEngine, DQEngineCore
 
 
 def dummy_func(column):
@@ -456,3 +456,60 @@ def test_is_in_range_float_arguments():
     ]
     status = DQEngine.validate_checks(checks)
     assert not status.has_errors
+
+
+def test_validate_checks_with_variables(tmp_path):
+    checks_yaml = """
+        - criticality: "{{ crit }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ col }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "error", "col": "b"})
+
+    status = DQEngine.validate_checks(checks)
+    assert not status.has_errors
+
+
+def test_validate_checks_with_variables_invalid_after_substitution(tmp_path):
+    checks_yaml = """
+        - criticality: "{{ crit }}"
+          check:
+            function: is_not_null
+            arguments:
+              column: b
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "not_a_valid_criticality"})
+
+    status = DQEngine.validate_checks(checks)
+    expected_error = (
+        "Invalid 'criticality' value: 'not_a_valid_criticality'. Expected 'warn' or 'error'. "
+        "Check details: {'criticality': 'not_a_valid_criticality', "
+        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
+    )
+    assert status.errors[0] == expected_error
+
+
+def test_validate_checks_without_variables_fails_on_placeholders():
+    checks = [
+        {
+            "criticality": "{{ crit }}",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "b"},
+            },
+        },
+    ]
+
+    status = DQEngine.validate_checks(checks)
+    expected_error = (
+        "Invalid 'criticality' value: '{{ crit }}'. Expected 'warn' or 'error'. "
+        "Check details: {'criticality': '{{ crit }}', "
+        "'check': {'function': 'is_not_null', 'arguments': {'column': 'b'}}}"
+    )
+    assert status.errors[0] == expected_error
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index 8b79cd14d..ef1e15d05 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -248,3 +248,92 @@ def test_load_checks_per_call_overrides_engine_defaults():
     assert checks == [
         {"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "default_col"}}},
     ]
+
+
+def test_load_checks_by_metadata_and_split_with_variables(tmp_path):
+
+    checks_yaml = """
+        - criticality: error
+          name: "{{ col }}_null_check"
+          check:
+            function: is_not_null_and_not_empty
+            arguments:
+              column: "{{ col }}"
+        - criticality: warn
+          check:
+            function: sql_expression
+            arguments:
+              expression: "{{ expr_col }} > {{ threshold }}"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(
+        str(checks_file), variables={"col": "b", "expr_col": "a", "threshold": 1}
+    )
+
+    assert checks == [
+        {
+            "criticality": "error",
+            "name": "b_null_check",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "b"},
+            },
+        },
+        {
+            "criticality": "warn",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "a > 1"},
+            },
+        },
+    ]
+
+
+def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
+
+    checks_yaml = """
+        - criticality: error
+          name: "{{ col }}_greater_than_{{ threshold }}"
+          check:
+            function: sql_expression
+            arguments:
+              expression: "{{ col }} > {{ threshold }}"
+          filter: "{{ filter_col }} IS NOT NULL"
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+    checks = DQEngineCore.load_checks_from_local_file(
+        str(checks_file), variables={"col": "a", "threshold": 1, "filter_col": "a"}
+    )
+
+    assert checks == [
+        {
+            "criticality": "error",
+            "name": "a_greater_than_1",
+            "check": {
+                "function": "sql_expression",
+                "arguments": {"expression": "a > 1"},
+            },
+            "filter": "a IS NOT NULL",
+        }
+    ]
+
+
+def test_load_checks_with_missing_variable(tmp_path):
+
+    checks_yaml = """
+        - criticality: error
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ missing_col }}"
+        """
+    checks_file = tmp_path / "checks_missing.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    # Load file, which will warn and leave the placeholder
+    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"different_var": "val"})
+
+    # Assert that the placeholder was left in the metadata (unresolved variable)
+    assert checks[0]["check"]["arguments"]["column"] == "{{ missing_col }}"

From 7fdd172fe45b779e4a274fd4efa3b3516c9979b7 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Sun, 5 Apr 2026 12:12:26 +0200
Subject: [PATCH 11/24] add docs for variable parametrization, fix dqx demo

---
 demos/dqx_quick_start_demo_library.py         | 18 -----
 .../docs/guide/additional_configuration.mdx   | 72 +++++++++++++++++++
 .../docs/guide/quality_checks_definition.mdx  |  6 ++
 3 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/demos/dqx_quick_start_demo_library.py b/demos/dqx_quick_start_demo_library.py
index c6901b2af..6b586428e 100644
--- a/demos/dqx_quick_start_demo_library.py
+++ b/demos/dqx_quick_start_demo_library.py
@@ -125,24 +125,6 @@
 print(f"Checks from YAML: {status}")
 
 # COMMAND ----------
- 
-# MAGIC %md
-# MAGIC ### Variable Substitution
-# MAGIC
-# MAGIC You can parameterize your YAML checks using `{{ variable }}` syntax and resolve them at load time.
-# MAGIC
-# MAGIC ```python
-# MAGIC # Example: Load checks with a dynamic age limit
-# MAGIC #
-# MAGIC # from databricks.labs.dqx.config import FileChecksStorageConfig
-# MAGIC #
-# MAGIC # resolved_checks = dq_engine.load_checks(
-# MAGIC #     config=FileChecksStorageConfig(location="checks.yml"),
-# MAGIC #     variables={"max_age": 120}
-# MAGIC # )
-# MAGIC ```
-# MAGIC
-# COMMAND ----------
 
 # MAGIC %md
 # MAGIC ### Setup `DQEngine`
diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index 69701e6fb..fea9884f9 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -171,3 +171,75 @@ from pyspark.sql import functions as F
 
 skipped = checked_df.select(F.explode("_errors").alias("e")).filter(F.col("e.skipped") == True)
 ```
+
+## Defining default variables for substitution
+
+DQX allows you to define engine-level defaults for variables used in declarative check definitions (YAML, JSON, or Delta tables). These defaults are automatically applied during `load_checks` unless overridden by the per-call `variables` parameter.
+
+<Tabs>
+  <TabItem value="Python" label="Python" default>
+    ```python
+    from databricks.labs.dqx.engine import DQEngine
+    from databricks.labs.dqx.config import ExtraParams
+    from databricks.sdk import WorkspaceClient
+
+    # Initialize engine with default variables
+    dq_engine = DQEngine(
+        WorkspaceClient(),
+        extra_params=ExtraParams(
+            variables={
+                "min_temp": 0,
+                "max_temp": 50,
+                "region": "GLOBAL"
+            }
+        )
+    )
+
+    # Load checks - uses 'min_temp' and 'max_temp' from defaults,
+    # but overrides 'region' specifically for this call.
+    resolved_checks = dq_engine.load_checks(
+        config=FileChecksStorageConfig(location="checks.yml"),
+    )
+    ```
+  </TabItem>
+  <TabItem value="Workflows" label="Workflows">
+    You can set the following fields in the [configuration file](/docs/installation/#configuration-file) to define default variables for substitution when using DQX workflows:
+    ```yaml
+    extra_params:
+      variables:
+        min_temp: 0
+        max_temp: 50
+        region: GLOBAL
+    ```
+  </TabItem>
+</Tabs>
+
+## Overwriting run metadata
+
+By default, DQX automatically generates a unique `run_id` for each engine instance and uses the current timestamp as the `run_time`. You can manually overwrite these values using `ExtraParams` if you need to align DQX results with external systems or re-run checks for a specific historical point in time.
+
+<Tabs>
+  <TabItem value="Python" label="Python" default>
+    ```python
+    from databricks.labs.dqx.engine import DQEngine
+    from databricks.labs.dqx.config import ExtraParams
+    from databricks.sdk import WorkspaceClient
+
+    extra_params = ExtraParams(
+        run_id_overwrite="custom-execution-id-123",
+        run_time_overwrite="2024-01-01T12:00:00Z"
+    )
+
+    dq_engine = DQEngine(WorkspaceClient(), extra_params=extra_params)
+    ```
+  </TabItem>
+  <TabItem value="Workflows" label="Workflows">
+    You can set the following fields in the [configuration file](/docs/installation/#configuration-file) to overwrite the run metadata when using DQX workflows:
+    ```yaml
+    extra_params:
+      run_id_overwrite: custom-execution-id-123
+      run_time_overwrite: 2024-01-01T12:00:00Z
+    ```
+  </TabItem>
+</Tabs>
+
diff --git a/docs/dqx/docs/guide/quality_checks_definition.mdx b/docs/dqx/docs/guide/quality_checks_definition.mdx
index c2c97b133..83e8147eb 100644
--- a/docs/dqx/docs/guide/quality_checks_definition.mdx
+++ b/docs/dqx/docs/guide/quality_checks_definition.mdx
@@ -774,6 +774,12 @@ Variable substitution is only available when defining checks declaratively (as d
   </TabItem>
 </Tabs>
 
+## Default Variables
+
+In addition to specifying variables during the load process, you can define engine-level defaults using the `ExtraParams` class. These constants are automatically applied to all checks unless explicitly overridden.
+
+For technical details and configuration examples, see [Default Variables](/docs/guide/additional_configuration#defining-default-variables-for-substitution) in the Additional Configuration guide.
+
 ## Validating syntax of quality checks
 
 You can validate the syntax of checks loaded from a storage system or checks defined programmatically before applying them.

From a3a21a6272f6367ba31ecd3378a6d3ef86b3a35b Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Sun, 5 Apr 2026 12:27:26 +0200
Subject: [PATCH 12/24] fix tests duplication

---
 tests/unit/test_load_checks.py | 18 ------------------
 tests/unit/test_utils.py       | 13 -------------
 2 files changed, 31 deletions(-)

diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index ef1e15d05..24a87e06f 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -319,21 +319,3 @@ def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
         }
     ]
 
-
-def test_load_checks_with_missing_variable(tmp_path):
-
-    checks_yaml = """
-        - criticality: error
-          check:
-            function: is_not_null
-            arguments:
-              column: "{{ missing_col }}"
-        """
-    checks_file = tmp_path / "checks_missing.yml"
-    checks_file.write_text(checks_yaml, encoding="utf-8")
-
-    # Load file, which will warn and leave the placeholder
-    checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"different_var": "val"})
-
-    # Assert that the placeholder was left in the metadata (unresolved variable)
-    assert checks[0]["check"]["arguments"]["column"] == "{{ missing_col }}"
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index b60bc1e41..119d7786c 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -544,13 +544,6 @@ def test_resolve_variables_replaces_all_string_fields():
     assert result[0]["filter"] == "status = 'active'"
 
 
-def test_resolve_variables_none_variables():
-    checks = [{"name": "{{ x }}"}]
-    result = resolve_variables(checks, None)
-    assert result is checks  # same object, no copy
-    assert result[0]["name"] == "{{ x }}"
-
-
 def test_resolve_variables_empty_variables():
     checks = [{"name": "{{ x }}"}]
     result = resolve_variables(checks, {})
@@ -818,12 +811,6 @@ def test_resolve_variables_none_vars_no_warning(caplog):
     assert not any("Unresolved placeholder" in msg for msg in caplog.messages)
 
 
-def test_resolve_variables_whitespace_in_key():
-    checks = [{"col": "{{col_a}}"}]
-    result = resolve_variables(checks, {"col_a": "replaced"})
-    assert result[0]["col"] == "replaced"
-
-
 def test_resolve_variables_unicode_values():
     checks = [{"col": "{{ col }}"}]
     result = resolve_variables(checks, {"col": "prénom"})

From 908873cee60137068b90d5a717b3ca72378baf1d Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Sun, 5 Apr 2026 14:32:17 +0200
Subject: [PATCH 13/24] fix test readded
 test_extra_params_variables_substitution_and_overrides as unit test

---
 tests/unit/test_load_checks.py | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index 24a87e06f..d1d4d870a 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -250,6 +250,41 @@ def test_load_checks_per_call_overrides_engine_defaults():
     ]
 
 
+def test_extra_params_variables_substitution_and_overrides(tmp_path):
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    checks_yaml = """
+        - criticality: error
+          name: "id_check"
+          check:
+            function: is_not_null
+            arguments:
+              column: "{{ target_col }}"
+          user_metadata:
+            env: "{{ environment }}"
+            rule_id: "{{ nested_var }}"
+        """
+    checks_file = tmp_path / "checks_extra.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    raw_checks = DQEngineCore.load_checks_from_local_file(str(checks_file))
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+    mock_handler.load.return_value = raw_checks
+
+    extra_params = ExtraParams(variables={"target_col": "id", "environment": "dev", "nested_var": "old"})
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory, extra_params=extra_params)
+    config = FileChecksStorageConfig(location=str(checks_file))
+
+    checks = engine.load_checks(config, variables={"environment": "prod", "nested_var": "new"})
+
+    assert checks[0]["check"]["arguments"]["column"] == "id"
+    assert checks[0]["user_metadata"]["env"] == "prod"
+    assert checks[0]["user_metadata"]["rule_id"] == "new"
+
+
 def test_load_checks_by_metadata_and_split_with_variables(tmp_path):
 
     checks_yaml = """

From 4446b52975ca533fdaff1419125ce3a7e60a5222 Mon Sep 17 00:00:00 2001
From: fedeflowers <fioriofederico99@gmail.com>
Date: Tue, 7 Apr 2026 00:19:07 +0200
Subject: [PATCH 14/24] add doc warnign and test with empty dictionary

---
 docs/dqx/docs/guide/additional_configuration.mdx | 3 +++
 tests/unit/test_utils.py                         | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index fea9884f9..8ac2c8d09 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -203,6 +203,9 @@ DQX allows you to define engine-level defaults for variables used in declarative
     ```
   </TabItem>
   <TabItem value="Workflows" label="Workflows">
+    :::warning
+    Variable substitution is not currently supported in DQX installable workflows. Variables defined in the configuration file will be stored but not applied during workflow execution.
+    :::
     You can set the following fields in the [configuration file](/docs/installation/#configuration-file) to define default variables for substitution when using DQX workflows:
     ```yaml
     extra_params:
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 119d7786c..8b2aed714 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -810,6 +810,11 @@ def test_resolve_variables_none_vars_no_warning(caplog):
     assert result[0]["col"] == "{{ x }}"
     assert not any("Unresolved placeholder" in msg for msg in caplog.messages)
 
+    with caplog.at_level(logging.WARNING):
+        result = resolve_variables(checks, {})
+    assert result[0]["col"] == "{{ x }}"
+    assert not any("Unresolved placeholder" in msg for msg in caplog.messages)
+
 
 def test_resolve_variables_unicode_values():
     checks = [{"col": "{{ col }}"}]

From 8a9eb799062cd77b23aa23432d37eab09007a0b7 Mon Sep 17 00:00:00 2001
From: Greg Hansen <gregory.hansen@databricks.com>
Date: Tue, 7 Apr 2026 13:24:46 -0400
Subject: [PATCH 15/24] Update docs and fmt

---
 .../docs/guide/additional_configuration.mdx   | 17 +++--------
 src/databricks/labs/dqx/base.py               |  4 +--
 src/databricks/labs/dqx/engine.py             |  8 ++---
 src/databricks/labs/dqx/utils.py              | 29 ++++++++++---------
 .../test_apply_checks_and_save_in_table.py    |  1 -
 tests/unit/test_load_checks.py                |  1 -
 6 files changed, 25 insertions(+), 35 deletions(-)

diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index 8ac2c8d09..f165e9498 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -202,21 +202,12 @@ DQX allows you to define engine-level defaults for variables used in declarative
     )
     ```
   </TabItem>
-  <TabItem value="Workflows" label="Workflows">
-    :::warning
-    Variable substitution is not currently supported in DQX installable workflows. Variables defined in the configuration file will be stored but not applied during workflow execution.
-    :::
-    You can set the following fields in the [configuration file](/docs/installation/#configuration-file) to define default variables for substitution when using DQX workflows:
-    ```yaml
-    extra_params:
-      variables:
-        min_temp: 0
-        max_temp: 50
-        region: GLOBAL
-    ```
-  </TabItem>
 </Tabs>
 
+<Admonition type="warning" title="Variable substitution in workflows">
+Variable substitution is not currently supported in DQX installable workflows. Variables cam be defined and stored as YAML in the configuration file but will not be applied during workflow execution.
+</Admonition> 
+
 ## Overwriting run metadata
 
 By default, DQX automatically generates a unique `run_id` for each engine instance and uses the current timestamp as the `run_time`. You can manually overwrite these values using `ExtraParams` if you need to align DQX results with external systems or re-run checks for a specific historical point in time.
diff --git a/src/databricks/labs/dqx/base.py b/src/databricks/labs/dqx/base.py
index 74d77f9e6..7a5c6a8d1 100644
--- a/src/databricks/labs/dqx/base.py
+++ b/src/databricks/labs/dqx/base.py
@@ -189,8 +189,8 @@ def load_checks_from_local_file(filepath: str, variables: dict[str, VariableValu
 
         Args:
             filepath: Path to a file containing checks definitions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
-                placeholders in all string values of the check definitions before returning.
+            variables: Optional mapping of placeholder names to replacement values. Replaces placeholders
+                in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules (checks).
diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index 0c0d149e3..df6fc6402 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -349,8 +349,8 @@ def load_checks_from_local_file(filepath: str, variables: dict[str, VariableValu
 
         Args:
             filepath: Path to a file containing checks definitions.
-            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
-                placeholders in all string values of the check definitions before returning.
+            variables: Optional mapping of placeholder names to replacement values. Replaces placeholders
+                in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules.
@@ -1205,8 +1205,8 @@ def load_checks(
 
         Args:
             config: Configuration object describing the storage backend.
-            variables: Optional mapping of placeholder names to replacement values. Replaces **{{ key }}**
-                placeholders in all string values of the check definitions before returning.
+            variables: Optional mapping of placeholder names to replacement values. Replaces placeholders
+                in all string values of the check definitions before returning.
 
         Returns:
             List of DQ rules (checks) represented as dictionaries.
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index 6f302d5a3..c02f83cb2 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -635,27 +635,28 @@ def _validate_variable_types(variables: dict[str, VariableValue]) -> None:
 def resolve_variables(checks: list[dict], variables: dict[str, VariableValue] | None) -> list[dict]:
     """Resolve variable substitution in check definitions.
 
-    Replaces **{{ key }}** placeholders in all string values of *checks* with the
-    corresponding values from *variables*. The original *checks* list is never mutated.
+    Replaces placeholders in all string values of *checks* with the corresponding values
+    from *variables*.
 
-    Variable values must be scalar types (**str**, **int**, **float**, **bool**,
-    **Decimal**, **datetime.date**, **datetime.datetime**, **datetime.time**).
-    Non-string scalars are converted via **str()** — for example, **{"threshold": 10}** becomes **"10"** in
-    the substituted string. Collection types (**list**, **dict**, **set**, etc.) are
-    rejected with :class:`~databricks.labs.dqx.errors.InvalidParameterError` because
-    their **str()** representation is rarely meaningful in SQL or column expressions.
+    Variable values must be scalar types (e.g. *str*, *int*, *float*, *bool*, *Decimal*,
+    *datetime.date*, *datetime.datetime*, *datetime.time*). Non-string scalars are
+    converted to strings via *str()* in the substituted string. Collection type
+    variables (e.g. *list*, *dict*, *set*, etc.) are rejected with
+    *databricks.labs.dqx.errors.InvalidParameterError* because their string representation
+    is rarely meaningful in SQL or column expressions.
 
-    Logs a warning for any **{{ ... }}** placeholders that remain unresolved after
-    substitution (e.g. misspelled variable names).
+    Logs a warning for any placeholders that remain unresolved after substitution
+    (e.g. misspelled variable names).
 
-    **Security note:** variable values substituted into **sql_expression** checks are
-    not sanitized and are passed directly to **F.expr()**. Callers must ensure that
-    variable values come from trusted sources to prevent SQL injection.
+    Note:
+    Variable values substituted into *sql_expression* checks are not sanitized and are
+    passed directly to *F.expr()*. Callers must **ensure variable values come from trusted
+    sources** to prevent SQL injection.
 
     Args:
         checks: List of check definition dictionaries (metadata format).
         variables: Mapping of placeholder names to scalar replacement values.
-            If **None** or empty the checks are returned unchanged.
+            If *None* or empty the checks are returned unchanged.
 
     Returns:
         A new list of check dicts with placeholders resolved, or the original list
diff --git a/tests/integration/test_apply_checks_and_save_in_table.py b/tests/integration/test_apply_checks_and_save_in_table.py
index 0a2816784..3f0514b92 100644
--- a/tests/integration/test_apply_checks_and_save_in_table.py
+++ b/tests/integration/test_apply_checks_and_save_in_table.py
@@ -2255,4 +2255,3 @@ def test_apply_checks_by_metadata_and_save_in_table_loads_checks_from_table(ws,
         schema=expected_schema,
     )
     assert_df_equality(actual_df, expected_df, ignore_nullable=True)
-    
\ No newline at end of file
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index d1d4d870a..3e6c55cca 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -353,4 +353,3 @@ def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
             "filter": "a IS NOT NULL",
         }
     ]
-

From ffd982ab75d9e39a4fa7d9b754cd8975863e43e7 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 12:59:11 +0200
Subject: [PATCH 16/24] Apply suggestion from @mwojtyczka

---
 docs/dqx/docs/guide/additional_configuration.mdx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index f165e9498..5d4b14645 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -205,7 +205,9 @@ DQX allows you to define engine-level defaults for variables used in declarative
 </Tabs>
 
 <Admonition type="warning" title="Variable substitution in workflows">
-Variable substitution is not currently supported in DQX installable workflows. Variables cam be defined and stored as YAML in the configuration file but will not be applied during workflow execution.
+Variable substitution is not currently supported in DQX installable workflows. Variables can be defined and stored as YAML in the configuration file but will not be applied during workflow execution.
+
+Variable substitution is only available when defining checks declaratively (as dictionaries or in files/tables). It is not supported when using DQX classes (e.g., `DQRowRule`) directly.
 </Admonition> 
 
 ## Overwriting run metadata

From 1eae942ff12b2a02b21aaf47a66fb3fb2190cbe0 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 13:01:23 +0200
Subject: [PATCH 17/24] Apply suggestions from code review

Code review feedback implementation

Co-authored-by: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
---
 docs/dqx/docs/guide/additional_configuration.mdx  | 2 +-
 docs/dqx/docs/guide/quality_checks_definition.mdx | 2 +-
 docs/dqx/docs/guide/quality_checks_storage.mdx    | 3 +--
 src/databricks/labs/dqx/engine.py                 | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index 5d4b14645..12d1e6d50 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -198,7 +198,7 @@ DQX allows you to define engine-level defaults for variables used in declarative
     # Load checks - uses 'min_temp' and 'max_temp' from defaults,
     # but overrides 'region' specifically for this call.
     resolved_checks = dq_engine.load_checks(
-        config=FileChecksStorageConfig(location="checks.yml"),
+        config=FileChecksStorageConfig(location="checks.yml"),  variables={ "region": "EMEA" }
     )
     ```
   </TabItem>
diff --git a/docs/dqx/docs/guide/quality_checks_definition.mdx b/docs/dqx/docs/guide/quality_checks_definition.mdx
index 83e8147eb..93a4b11a4 100644
--- a/docs/dqx/docs/guide/quality_checks_definition.mdx
+++ b/docs/dqx/docs/guide/quality_checks_definition.mdx
@@ -722,7 +722,7 @@ If `run_config_name` is not provided, "default" is used. Typically, the input ta
 
 ## Variable Substitution
 
-DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables). This allows you to parameterize your quality rules and inject values at **load time** via the `variables` parameter in `load_checks`.
+DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables). This allows you to parameterize your quality rules and inject values at **load time** from engine-level defaults and/or via the `variables` parameter in `load_checks`.
 
 ### Syntax and Scope
 
diff --git a/docs/dqx/docs/guide/quality_checks_storage.mdx b/docs/dqx/docs/guide/quality_checks_storage.mdx
index ef8fe083f..a1ca120cb 100644
--- a/docs/dqx/docs/guide/quality_checks_storage.mdx
+++ b/docs/dqx/docs/guide/quality_checks_storage.mdx
@@ -181,7 +181,6 @@ If you create checks as a list of DQRule objects, you can convert them using the
     checks: list[dict] = dq_engine.load_checks(config=FileChecksStorageConfig(location="checks.yml"))
 
     # load checks from a local file with variable substitution
-    # see more on variable substitution [here](/docs/guide/quality_checks_definition/#variable-substitution)
     checks: list[dict] = dq_engine.load_checks(
         FileChecksStorageConfig(location="checks.yml"),
         variables={"threshold": 100, "column_name": "total_amount"}
@@ -215,7 +214,7 @@ If you create checks as a list of DQRule objects, you can convert them using the
 
     # validate loaded checks
     assert not dq_engine.validate_checks(checks).has_errors
-    ```
+    
   </TabItem>
   <TabItem value="Workflows" label="Workflows">
     When using the quality checker or e2e workflows to apply quality checks, they load checks from the `checks_location` field defined in the [configuration file](/docs/installation/#configuration-file).
diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index df6fc6402..326aaaace 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -1217,7 +1217,7 @@ def load_checks(
         handler = self._checks_handler_factory.create(config)
         checks = handler.load(config)
         merged = self._merge_variables(variables)
-        return resolve_variables(checks=checks, variables=merged)
+        return resolve_variables(checks=checks, variables=merged_variables)
 
     def _merge_variables(self, per_call: dict[str, VariableValue] | None) -> dict[str, VariableValue] | None:
         """Merge engine-level default variables with per-call overrides.

From a30271bbdc6cabd89b8916541c819f23f2880bae Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 13:40:19 +0200
Subject: [PATCH 18/24] added tests

---
 src/databricks/labs/dqx/engine.py |  2 +-
 src/databricks/labs/dqx/utils.py  |  5 ++-
 tests/unit/test_load_checks.py    | 66 +++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index 326aaaace..3db11b1c1 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -1216,7 +1216,7 @@ def load_checks(
         """
         handler = self._checks_handler_factory.create(config)
         checks = handler.load(config)
-        merged = self._merge_variables(variables)
+        merged_variables = self._merge_variables(variables)
         return resolve_variables(checks=checks, variables=merged_variables)
 
     def _merge_variables(self, per_call: dict[str, VariableValue] | None) -> dict[str, VariableValue] | None:
diff --git a/src/databricks/labs/dqx/utils.py b/src/databricks/labs/dqx/utils.py
index c02f83cb2..0aa57aa57 100644
--- a/src/databricks/labs/dqx/utils.py
+++ b/src/databricks/labs/dqx/utils.py
@@ -579,7 +579,10 @@ def _resolve(match_obj: re.Match[str]) -> str:
     unresolved: list[str] = []
     output = _UNRESOLVED_PLACEHOLDER_PATTERN.sub(_resolve, text)
     if unresolved:
-        logger.warning(f"Unresolved placeholders found: {unresolved}")
+        logger.warning(
+            f"Unresolved placeholders found: {unresolved}. "
+            f"They may be resolved at runtime for certain checks (e.g. sql_query)."
+        )
     return output
 
 
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index 3e6c55cca..7c05a3bb1 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -325,6 +325,72 @@ def test_load_checks_by_metadata_and_split_with_variables(tmp_path):
     ]
 
 
+def test_load_checks_sql_query_no_variables(tmp_path, caplog):
+    checks_yaml = """
+        - criticality: error
+          check:
+            function: sql_query
+            arguments:
+              query: "SELECT id, COUNT(*) > 0 AS condition FROM {{ input_view }} GROUP BY id"
+              merge_columns:
+                - id
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    with caplog.at_level(logging.WARNING):
+        checks = DQEngineCore.load_checks_from_local_file(str(checks_file))
+
+    assert not any("input_view" in msg for msg in caplog.messages)
+
+    assert checks == [
+        {
+            "criticality": "error",
+            "check": {
+                "function": "sql_query",
+                "arguments": {
+                    "query": "SELECT id, COUNT(*) > 0 AS condition FROM {{ input_view }} GROUP BY id",
+                    "merge_columns": ["id"],
+                },
+            },
+        },
+    ]
+
+
+def test_load_checks_sql_query_with_variables(tmp_path, caplog):
+    checks_yaml = """
+        - criticality: "{{ crit }}"
+          name: "count_check"
+          check:
+            function: sql_query
+            arguments:
+              query: "SELECT id, COUNT(*) > 0 AS condition FROM {{ input_view }} GROUP BY id"
+              merge_columns:
+                - id
+        """
+    checks_file = tmp_path / "checks.yml"
+    checks_file.write_text(checks_yaml, encoding="utf-8")
+
+    with caplog.at_level(logging.WARNING):
+        checks = DQEngineCore.load_checks_from_local_file(str(checks_file), variables={"crit": "error"})
+
+    assert checks == [
+        {
+            "criticality": "error",
+            "name": "count_check",
+            "check": {
+                "function": "sql_query",
+                "arguments": {
+                    "query": "SELECT id, COUNT(*) > 0 AS condition FROM {{ input_view }} GROUP BY id",
+                    "merge_columns": ["id"],
+                },
+            },
+        },
+    ]
+    # {{ input_view }} is left unresolved — it is resolved at runtime by sql_query itself
+    assert any("input_view" in msg for msg in caplog.messages)
+
+
 def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
 
     checks_yaml = """

From 07a09682dece3195443e0bc82eaa017074d4b326 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 16:23:37 +0200
Subject: [PATCH 19/24] added vars resolution when saving checks and discourage
 using vars for criticality

---
 .../docs/guide/additional_configuration.mdx   | 15 +++-
 .../docs/guide/quality_checks_definition.mdx  | 37 +++++---
 docs/dqx/docs/reference/engine.mdx            |  2 +-
 src/databricks/labs/dqx/engine.py             | 18 +++-
 .../test_save_and_load_checks_from_table.py   | 53 +++++++++++
 tests/unit/test_load_checks.py                | 89 +++++++++++++++++++
 6 files changed, 198 insertions(+), 16 deletions(-)

diff --git a/docs/dqx/docs/guide/additional_configuration.mdx b/docs/dqx/docs/guide/additional_configuration.mdx
index 12d1e6d50..e078f9902 100644
--- a/docs/dqx/docs/guide/additional_configuration.mdx
+++ b/docs/dqx/docs/guide/additional_configuration.mdx
@@ -174,13 +174,13 @@ skipped = checked_df.select(F.explode("_errors").alias("e")).filter(F.col("e.ski
 
 ## Defining default variables for substitution
 
-DQX allows you to define engine-level defaults for variables used in declarative check definitions (YAML, JSON, or Delta tables). These defaults are automatically applied during `load_checks` unless overridden by the per-call `variables` parameter.
+DQX allows you to define engine-level defaults for variables used in declarative check definitions (YAML, JSON, or Delta tables). These defaults are automatically applied during `load_checks` and `save_checks` unless overridden by the per-call `variables` parameter.
 
 <Tabs>
   <TabItem value="Python" label="Python" default>
     ```python
     from databricks.labs.dqx.engine import DQEngine
-    from databricks.labs.dqx.config import ExtraParams
+    from databricks.labs.dqx.config import ExtraParams, FileChecksStorageConfig, TableChecksStorageConfig
     from databricks.sdk import WorkspaceClient
 
     # Initialize engine with default variables
@@ -198,7 +198,16 @@ DQX allows you to define engine-level defaults for variables used in declarative
     # Load checks - uses 'min_temp' and 'max_temp' from defaults,
     # but overrides 'region' specifically for this call.
     resolved_checks = dq_engine.load_checks(
-        config=FileChecksStorageConfig(location="checks.yml"),  variables={ "region": "EMEA" }
+        config=FileChecksStorageConfig(location="checks.yml"),
+        variables={"region": "EMEA"},
+    )
+
+    # Save checks - resolves variables before computing fingerprints and persisting.
+    # Uses 'min_temp' and 'max_temp' from defaults, overrides 'region' for this call.
+    dq_engine.save_checks(
+        checks=checks,
+        config=TableChecksStorageConfig(location="catalog.schema.checks_table"),
+        variables={"region": "EMEA"},
     )
     ```
   </TabItem>
diff --git a/docs/dqx/docs/guide/quality_checks_definition.mdx b/docs/dqx/docs/guide/quality_checks_definition.mdx
index 93a4b11a4..b585beb71 100644
--- a/docs/dqx/docs/guide/quality_checks_definition.mdx
+++ b/docs/dqx/docs/guide/quality_checks_definition.mdx
@@ -722,7 +722,7 @@ If `run_config_name` is not provided, "default" is used. Typically, the input ta
 
 ## Variable Substitution
 
-DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables). This allows you to parameterize your quality rules and inject values at **load time** from engine-level defaults and/or via the `variables` parameter in `load_checks`.
+DQX supports variable substitution in declarative check definitions (YAML, JSON, or Delta tables). This allows you to parameterize your quality rules and inject values at **load time** or **save time** from engine-level defaults and/or via the `variables` parameter in `load_checks` or `save_checks`.
 
 ### Syntax and Scope
 
@@ -732,9 +732,17 @@ Placeholders are defined using the `{{ variable_name }}` syntax. Variable substi
 - `check` function arguments (`arguments`) and column names (`for_each_column`)
 - any other top-level or nested string field
 
+<Admonition type="warning" title="Do not use variable substitution for criticality">
+The `criticality` field only accepts fixed values (`error` or `warn`). Do not use variable placeholders for `criticality` — the resolved value must be a valid criticality and substituting it defeats the purpose of having an explicit severity level in the check definition.
+</Admonition>
+
 ### Resolution
 
-Variables are resolved at **load time** when the checks are loaded from the storage backend. To resolve variables, pass a dictionary to the `variables` parameter of the `load_checks` method.
+Variables are resolved when checks are loaded or saved via the engine. To resolve variables, pass a dictionary to the `variables` parameter of `load_checks` or `save_checks`. User can decide whether to provide variables when loading or saving checks.
+
+<Admonition type="tip" title="Resolving variables at save time">
+When using `save_checks` with variables, placeholders are resolved **before** computing rule fingerprints and persisting. This ensures that stored checks and their fingerprints reflect the actual resolved check logic. Without resolving at save time, fingerprints would be computed on unresolved `{{ }}` placeholders, causing a mismatch between the fingerprints stored in the checks table and those recorded in the summary metrics and per-row detailed results tables.
+</Admonition>
 
 <Admonition type="info" title="Note">
 Variable substitution is only available when defining checks declaratively (as dictionaries or in files/tables). It is not supported when using DQX classes (e.g., `DQRowRule`) directly.
@@ -755,20 +763,29 @@ Variable substitution is only available when defining checks declaratively (as d
   </TabItem>
   <TabItem value="Python" label="Python">
     ```python
-    from databricks.labs.dqx.engine import DQEngine
-    from databricks.labs.dqx.config import FileChecksStorageConfig
+    from databricks.labs.dqx.engine import DQEngine, DQEngineCore
+    from databricks.labs.dqx.config import FileChecksStorageConfig, TableChecksStorageConfig
     from databricks.sdk import WorkspaceClient
 
     dq_engine = DQEngine(WorkspaceClient())
+    variables = {
+        "min_temp": 0,
+        "max_temp": 100,
+        "region": "EMEA"
+    }
 
     # Load checks with variable resolution
     resolved_checks = dq_engine.load_checks(
         config=FileChecksStorageConfig(location="checks.yml"),
-        variables={
-            "min_temp": 0,
-            "max_temp": 100,
-            "region": "EMEA"
-        }
+        variables=variables,
+    )
+
+    # Or resolve variables when saving checks (ensures fingerprints are consistent)
+    checks = DQEngineCore.load_checks_from_local_file("checks.yml")
+    dq_engine.save_checks(
+        checks=checks,
+        config=TableChecksStorageConfig(location="catalog.schema.checks_table"),
+        variables=variables,
     )
     ```
   </TabItem>
@@ -776,7 +793,7 @@ Variable substitution is only available when defining checks declaratively (as d
 
 ## Default Variables
 
-In addition to specifying variables during the load process, you can define engine-level defaults using the `ExtraParams` class. These constants are automatically applied to all checks unless explicitly overridden.
+In addition to specifying variables during the load or save process, you can define engine-level defaults using the `ExtraParams` class. These constants are automatically applied to all checks unless explicitly overridden.
 
 For technical details and configuration examples, see [Default Variables](/docs/guide/additional_configuration#defining-default-variables-for-substitution) in the Additional Configuration guide.
 
diff --git a/docs/dqx/docs/reference/engine.mdx b/docs/dqx/docs/reference/engine.mdx
index 8866c2ce1..f4aced15c 100644
--- a/docs/dqx/docs/reference/engine.mdx
+++ b/docs/dqx/docs/reference/engine.mdx
@@ -63,7 +63,7 @@ The following table outlines the available methods of the `DQEngine` and their f
 | `get_invalid`                                  | Retrieves records from the DataFrame that violate data quality checks (records with warnings and errors).                                                                                                                                       | `df`: Input DataFrame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Yes                                       |
 | `get_valid`                                    | Retrieves records from the DataFrame that pass all data quality checks.                                                                                                                                                                         | `df`: Input DataFrame.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Yes                                       |
 | `load_checks`                                  | Loads quality rules (checks) from storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed sources inferred from run config.                                                  | `config`: Configuration for loading checks from a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config); `variables`: (optional) dictionary of variables for [variable substitution](/docs/guide/quality_checks_definition/#variable-substitution). | Yes (only with `FileChecksStorageConfig`) |
-| `save_checks`                                  | Saves quality rules (checks) to a storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed targets inferred from run config.                                                  | `checks`: List of checks defined as dictionary; `config`: Configuration for saving checks in a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config).                                                                                                                                                                                                                                                                                                                                                                                                     | Yes (only with `FileChecksStorageConfig`) |
+| `save_checks`                                  | Saves quality rules (checks) to a storage backend. Multiple storage backends are supported including tables, files, workspace files, or installation-managed targets inferred from run config. Variables are resolved before computing fingerprints and persisting. | `checks`: List of checks defined as dictionary; `config`: Configuration for saving checks in a storage backend, e.g., `FileChecksStorageConfig` (local YAML/JSON file or workspace file), `WorkspaceFileChecksStorageConfig` (workspace file with absolute path), `VolumeFileChecksStorageConfig` (Unity Catalog Volume YAML/JSON), `TableChecksStorageConfig` (table), `InstallationChecksStorageConfig` (installation-managed backend using `checks_location` in run config); `variables`: (optional) dictionary of variables for [variable substitution](/docs/guide/quality_checks_definition/#variable-substitution). | Yes (only with `FileChecksStorageConfig`) |
 | `save_results_in_table`                        | Saves DataFrames as tables using Unity Catalog table references or storage paths. Supports both batch and streaming writes. For streaming DataFrames, returns a StreamingQuery that can be used to monitor or wait for completion. For batch DataFrames, data is written synchronously and None is returned. | `output_df`: (optional) DataFrame containing the output data (batch or streaming); `quarantine_df`: (optional) DataFrame containing invalid data (batch or streaming); `observation`: (optional) Spark Observation tracking summary metrics; `output_config`: `OutputConfig` with location (table name or storage path), mode, format, options, and optional trigger (supports `partition_by` or `cluster_by`, only one applies;); `quarantine_config`: (optional) `OutputConfig` with location (table name or storage path), mode, format, options, and optional trigger (supports `partition_by` or `cluster_by`, only one applies;); `metrics_config`: (optional) `OutputConfig` with location for summary metrics; `rule_set_fingerprint`: (optional) SHA-256 fingerprint of the rule set used for this run, included in summary metrics when metrics_config is provided; `run_config_name`: Name of the run config to use; `install_folder`: (optional) Installation folder where DQX is installed (only required for custom folder); `assume_user`: (optional) If True, assume user installation, otherwise global.                                                                                                                                  | No                                        |
 | `save_summary_metrics`                         | Saves quality checking summary metrics to a Delta table.                                                                                                                                                                                        | `observed_metrics`: `dict[str, Any]` Collected summary metrics from Spark Observation; `metrics_config`: `OutputConfig` object with the table name, output mode, and options for the summary metrics data; `input_config`: (optional) `InputConfig` object with the table name for reading the input data; `output_config`: (optional) `OutputConfig` object with the table name for the output data (supports `partition_by` or `cluster_by`, only one applies); `quarantine_config`: (optional) `OutputConfig` object with the table name for the quarantine data (supports `partition_by` or `cluster_by`, only one applies); `checks_location`: (optional) Location where checks are stored; `rule_set_fingerprint`: (optional) SHA-256 fingerprint of the rule set used for this run.                                                                                                                                                                                                                                                                                                                                                                            | No                                        |
 | `get_streaming_metrics_listener`               | Gets a streaming metrics listener for writing metrics to an output table. Only required when using streaming DataFrames.                                                                                                                        | `metrics_config`: `OutputConfig` object with the table name, output mode, and options for the summary metrics data; `input_config`: (optional) `InputConfig` object with the table name for reading the input data; `output_config`: (optional) `OutputConfig` object with the table name for the output data (supports `partition_by` or `cluster_by`, only one applies); `quarantine_config`: (optional) `OutputConfig` object with the table name for the quarantine data (supports `partition_by` or `cluster_by`, only one applies); `checks_location`: (optional) checks location; `rule_set_fingerprint`: (optional) SHA-256 fingerprint of the rule set used for this run; `target_query_id`: (optional) Query ID of the specific streaming query to monitor, if provided, metrics will be collected only for this query.                                                                                                                                                                                                                                                                     | No                                        |
diff --git a/src/databricks/labs/dqx/engine.py b/src/databricks/labs/dqx/engine.py
index 3db11b1c1..47d3eaf07 100644
--- a/src/databricks/labs/dqx/engine.py
+++ b/src/databricks/labs/dqx/engine.py
@@ -1234,7 +1234,12 @@ def _merge_variables(self, per_call: dict[str, VariableValue] | None) -> dict[st
         return {**defaults, **per_call}
 
     @telemetry_logger("engine", "save_checks")
-    def save_checks(self, checks: list[dict], config: BaseChecksStorageConfig) -> None:
+    def save_checks(
+        self,
+        checks: list[dict],
+        config: BaseChecksStorageConfig,
+        variables: dict[str, VariableValue] | None = None,
+    ) -> None:
         """Persist DQ rules (checks) to the storage backend described by *config*.
 
         The appropriate storage handler is resolved from the configuration
@@ -1250,9 +1255,16 @@ def save_checks(self, checks: list[dict], config: BaseChecksStorageConfig) -> No
         - *InstallationChecksStorageConfig* (installation directory);
         - *VolumeFileChecksStorageConfig* (Unity Catalog volume file);
 
+        Per-call *variables* are merged with engine-level defaults from
+        *ExtraParams.variables* (per-call values take precedence on conflict).
+        Variables are resolved before computing fingerprints and persisting,
+        ensuring that stored checks and their fingerprints are consistent.
+
         Args:
             checks: List of DQ rules (checks) to save (as dictionaries).
             config: Configuration object describing the storage backend and write options.
+            variables: Optional mapping of placeholder names to replacement values. Replaces placeholders
+                in all string values of the check definitions before saving.
 
         Returns:
             None
@@ -1260,8 +1272,10 @@ def save_checks(self, checks: list[dict], config: BaseChecksStorageConfig) -> No
         Raises:
             InvalidConfigError: If the configuration type is unsupported.
         """
+        merged_variables = self._merge_variables(variables)
+        resolved_checks = resolve_variables(checks=checks, variables=merged_variables)
         handler = self._checks_handler_factory.create(config)
-        handler.save(checks, config)
+        handler.save(resolved_checks, config)
 
     @telemetry_logger("engine", "save_summary_metrics")
     def save_summary_metrics(
diff --git a/tests/integration/test_save_and_load_checks_from_table.py b/tests/integration/test_save_and_load_checks_from_table.py
index 06794226f..523f249b1 100644
--- a/tests/integration/test_save_and_load_checks_from_table.py
+++ b/tests/integration/test_save_and_load_checks_from_table.py
@@ -12,6 +12,7 @@
     TableChecksStorageConfig,
     InstallationChecksStorageConfig,
     BaseChecksStorageConfig,
+    ExtraParams,
 )
 from databricks.labs.dqx.engine import DQEngine
 from databricks.labs.dqx.errors import InvalidConfigError, UnsafeSqlQueryError
@@ -677,3 +678,55 @@ def test_save_idempotency_overwrite_mode(ws, make_schema, make_random, spark):
 
     checks = engine.load_checks(config=TableChecksStorageConfig(location=table_name))
     assert checks == EXPECTED_CHECKS_FROM_TABLE_LOAD[1:], "Idempotency guard must prevent duplicate overwrite"
+
+
+def test_save_and_load_checks_from_table_with_variables(ws, make_schema, make_random, spark):
+    """Save checks with {{ }} placeholders resolved via engine-level + per-call variables, then load and apply."""
+    catalog_name = TEST_CATALOG
+    schema_name = make_schema(catalog_name=catalog_name).name
+    table_name = f"{catalog_name}.{schema_name}.{make_random(10).lower()}"
+
+    checks_with_placeholders = [
+        {
+            "criticality": "error",
+            "name": "{{ col1 }}_null_check",
+            "check": {
+                "function": "is_not_null",
+                "arguments": {"column": "{{ col1 }}"},
+            },
+        },
+        {
+            "criticality": "warn",
+            "name": "{{ col2 }}_not_empty_check",
+            "check": {
+                "function": "is_not_null_and_not_empty",
+                "arguments": {"column": "{{ col2 }}"},
+            },
+            "filter": "{{ filter_col }} IS NOT NULL",
+        },
+    ]
+
+    # Engine-level defaults; per-call override: crit "warn" -> "error"
+    extra_params = ExtraParams(variables={"crit": "warn", "col1": "a", "col2": "b", "filter_col": "a"})
+    engine = DQEngine(ws, spark, extra_params=extra_params)
+
+    config = TableChecksStorageConfig(location=table_name)
+    engine.save_checks(checks_with_placeholders, config=config, variables={"crit": "error"})
+
+    # Load — checks are already resolved, no variables needed
+    loaded = engine.load_checks(config=config)
+
+    expected = [
+        {
+            "name": "a_null_check",
+            "criticality": "error",
+            "check": {"function": "is_not_null", "arguments": {"column": "a"}},
+        },
+        {
+            "name": "b_not_empty_check",
+            "criticality": "warn",
+            "check": {"function": "is_not_null_and_not_empty", "arguments": {"column": "b"}},
+            "filter": "a IS NOT NULL",
+        },
+    ]
+    assert loaded == expected, "Variable substitution did not resolve correctly after table roundtrip."
diff --git a/tests/unit/test_load_checks.py b/tests/unit/test_load_checks.py
index 7c05a3bb1..22a5e111f 100644
--- a/tests/unit/test_load_checks.py
+++ b/tests/unit/test_load_checks.py
@@ -391,6 +391,95 @@ def test_load_checks_sql_query_with_variables(tmp_path, caplog):
     assert any("input_view" in msg for msg in caplog.messages)
 
 
+def test_save_checks_with_variables():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    engine.save_checks(raw_checks, config, variables={"crit": "error", "col": "id"})
+
+    mock_handler.save.assert_called_once_with(
+        [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}}],
+        config,
+    )
+
+
+def test_save_checks_variables_none():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "id"}}}]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    engine.save_checks(raw_checks, config, variables=None)
+
+    mock_handler.save.assert_called_once_with(raw_checks, config)
+
+
+def test_save_checks_with_engine_default_variables():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+
+    extra_params = ExtraParams(variables={"crit": "error", "col": "default_col"})
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory, extra_params=extra_params)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    engine.save_checks(raw_checks, config)
+
+    mock_handler.save.assert_called_once_with(
+        [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "default_col"}}}],
+        config,
+    )
+
+
+def test_save_checks_per_call_overrides_engine_defaults():
+    ws = create_autospec(WorkspaceClient)
+    mock_spark = create_autospec(SparkSession)
+
+    raw_checks = [
+        {"criticality": "{{ crit }}", "check": {"function": "is_not_null", "arguments": {"column": "{{ col }}"}}}
+    ]
+
+    mock_factory = create_autospec(BaseChecksStorageHandlerFactory)
+    mock_handler = create_autospec(ChecksStorageHandler)
+    mock_factory.create.return_value = mock_handler
+
+    extra_params = ExtraParams(variables={"crit": "warn", "col": "default_col"})
+    engine = DQEngine(ws, spark=mock_spark, checks_handler_factory=mock_factory, extra_params=extra_params)
+    config = FileChecksStorageConfig(location="checks.yml")
+
+    engine.save_checks(raw_checks, config, variables={"crit": "error"})
+
+    mock_handler.save.assert_called_once_with(
+        [{"criticality": "error", "check": {"function": "is_not_null", "arguments": {"column": "default_col"}}}],
+        config,
+    )
+
+
 def test_load_checks_by_metadata_with_variables_name_and_filter(tmp_path):
 
     checks_yaml = """

From 55d4e1f7f1e99473c1425e4e14c3d00df203ebc0 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 16:26:25 +0200
Subject: [PATCH 20/24] updated tests

---
 tests/integration/test_save_and_load_checks_from_table.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_save_and_load_checks_from_table.py b/tests/integration/test_save_and_load_checks_from_table.py
index 523f249b1..d1b56b729 100644
--- a/tests/integration/test_save_and_load_checks_from_table.py
+++ b/tests/integration/test_save_and_load_checks_from_table.py
@@ -688,7 +688,7 @@ def test_save_and_load_checks_from_table_with_variables(ws, make_schema, make_ra
 
     checks_with_placeholders = [
         {
-            "criticality": "error",
+            "criticality": "{{ crit }}",
             "name": "{{ col1 }}_null_check",
             "check": {
                 "function": "is_not_null",
@@ -730,3 +730,7 @@ def test_save_and_load_checks_from_table_with_variables(ws, make_schema, make_ra
         },
     ]
     assert loaded == expected, "Variable substitution did not resolve correctly after table roundtrip."
+
+    # Verify the resolved checks are valid and can be applied end-to-end
+    assert not engine.validate_checks(loaded).has_errors
+

From 6b8687bb179dbe6a2414e145425d9b2483d98b26 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 16:42:34 +0200
Subject: [PATCH 21/24] fix docs

---
 .../docs/guide/quality_checks_definition.mdx  | 69 +++++++++----------
 .../dqx/docs/guide/quality_checks_storage.mdx |  2 +-
 2 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/docs/dqx/docs/guide/quality_checks_definition.mdx b/docs/dqx/docs/guide/quality_checks_definition.mdx
index b585beb71..71861dcd7 100644
--- a/docs/dqx/docs/guide/quality_checks_definition.mdx
+++ b/docs/dqx/docs/guide/quality_checks_definition.mdx
@@ -748,48 +748,45 @@ When using `save_checks` with variables, placeholders are resolved **before** co
 Variable substitution is only available when defining checks declaratively (as dictionaries or in files/tables). It is not supported when using DQX classes (e.g., `DQRowRule`) directly.
 </Admonition>
 
-<Tabs>
-  <TabItem value="YAML" label="YAML" default>
-    ```yaml
+```python
+import yaml
+from databricks.labs.dqx.engine import DQEngine
+from databricks.labs.dqx.config import FileChecksStorageConfig, TableChecksStorageConfig
+from databricks.sdk import WorkspaceClient
+
+dq_engine = DQEngine(WorkspaceClient())
+
+# Define checks with variable placeholders
+checks = yaml.safe_load("""
     - criticality: error
       check:
         function: is_in_range
         arguments:
           column: temperature
-          min_limit: {{ min_temp }}
-          max_limit: {{ max_temp }}
+          min_limit: "{{ min_temp }}"
+          max_limit: "{{ max_temp }}"
       filter: "region = '{{ region }}'"
-    ```
-  </TabItem>
-  <TabItem value="Python" label="Python">
-    ```python
-    from databricks.labs.dqx.engine import DQEngine, DQEngineCore
-    from databricks.labs.dqx.config import FileChecksStorageConfig, TableChecksStorageConfig
-    from databricks.sdk import WorkspaceClient
-
-    dq_engine = DQEngine(WorkspaceClient())
-    variables = {
-        "min_temp": 0,
-        "max_temp": 100,
-        "region": "EMEA"
-    }
-
-    # Load checks with variable resolution
-    resolved_checks = dq_engine.load_checks(
-        config=FileChecksStorageConfig(location="checks.yml"),
-        variables=variables,
-    )
-
-    # Or resolve variables when saving checks (ensures fingerprints are consistent)
-    checks = DQEngineCore.load_checks_from_local_file("checks.yml")
-    dq_engine.save_checks(
-        checks=checks,
-        config=TableChecksStorageConfig(location="catalog.schema.checks_table"),
-        variables=variables,
-    )
-    ```
-  </TabItem>
-</Tabs>
+""")
+
+variables = {
+    "min_temp": 0,
+    "max_temp": 100,
+    "region": "EMEA",
+}
+
+# Load checks from file with variable resolution
+resolved_checks = dq_engine.load_checks(
+    config=FileChecksStorageConfig(location="checks.yml"),
+    variables=variables,
+)
+
+# Or resolve variables when saving checks (ensures fingerprints are consistent)
+dq_engine.save_checks(
+    checks=checks,
+    config=TableChecksStorageConfig(location="catalog.schema.checks_table"),
+    variables=variables,
+)
+```
 
 ## Default Variables
 
diff --git a/docs/dqx/docs/guide/quality_checks_storage.mdx b/docs/dqx/docs/guide/quality_checks_storage.mdx
index a1ca120cb..eea90da0e 100644
--- a/docs/dqx/docs/guide/quality_checks_storage.mdx
+++ b/docs/dqx/docs/guide/quality_checks_storage.mdx
@@ -214,7 +214,7 @@ If you create checks as a list of DQRule objects, you can convert them using the
 
     # validate loaded checks
     assert not dq_engine.validate_checks(checks).has_errors
-    
+    ```
   </TabItem>
   <TabItem value="Workflows" label="Workflows">
     When using the quality checker or e2e workflows to apply quality checks, they load checks from the `checks_location` field defined in the [configuration file](/docs/installation/#configuration-file).

From aa4883cc3188beba943d081342c12e97bd86b0c8 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 17:22:41 +0200
Subject: [PATCH 22/24] fixed ci

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b5e1b2c17..7dd161dfd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -149,7 +149,7 @@ yq = [
 [tool.uv]
 required-version = "~=0.11.0"
 exclude-newer = "7 days"
-exclude-newer-package = { "databricks-sdk" = false, "databricks-connect" = false }
+exclude-newer-package = { "databricks-sdk" = false, "databricks-connect" = false, "setuptools" = false }
 
 [tool.pydoc-markdown]
 loaders = [

From b6fdb8cb010bfa211ba7353d7c33a638142d3910 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Tue, 14 Apr 2026 17:29:53 +0200
Subject: [PATCH 23/24] fix CI

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7dd161dfd..e1557ed34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -149,7 +149,7 @@ yq = [
 [tool.uv]
 required-version = "~=0.11.0"
 exclude-newer = "7 days"
-exclude-newer-package = { "databricks-sdk" = false, "databricks-connect" = false, "setuptools" = false }
+exclude-newer-package = { "databricks-sdk" = false, "databricks-connect" = false, "setuptools" = false, "hatchling" = false, "hatch-fancy-pypi-readme" = false }
 
 [tool.pydoc-markdown]
 loaders = [

From 68112581ed7f7a0ea31dd7febaecc6cacb40dfb2 Mon Sep 17 00:00:00 2001
From: Marcin Wojtyczka <marcin.wojtyczka@databricks.com>
Date: Wed, 22 Apr 2026 16:38:05 +0200
Subject: [PATCH 24/24] fmt

---
 tests/integration/test_save_and_load_checks_from_table.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_save_and_load_checks_from_table.py b/tests/integration/test_save_and_load_checks_from_table.py
index 257019cae..a506902fc 100644
--- a/tests/integration/test_save_and_load_checks_from_table.py
+++ b/tests/integration/test_save_and_load_checks_from_table.py
@@ -733,4 +733,3 @@ def test_save_and_load_checks_from_table_with_variables(ws, make_schema, make_ra
 
     # Verify the resolved checks are valid and can be applied end-to-end
     assert not engine.validate_checks(loaded).has_errors
-