databrickslabs · ghanse · Mar 20, 2026 · Mar 21, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -4080,6 +4080,85 @@ Using DQX classes:
 When using dataset-level checks, the top-level `filter` condition is pushed down as `row_filter` to the check function and applied before aggregation, ensuring that the check operates only on the relevant subset of rows rather than on the aggregated results.
 </Admonition>
 
+## Customizing check messages
+
+Users can override the default failure message of any `DQRule` by specifying a custom message expression. Set `message_expr` to either a Spark SQL expression string or a Spark `Column` expression that returns a string-valued message when a check fails.
+
+<Admonition type="tip" title="Null-safe dynamic messages">
+When your expression references a column, wrap it with `coalesce` to avoid null messages. In Spark SQL, `concat(..., null)` returns `null`.
+</Admonition>
+
+<Tabs>
+  <TabItem value="Python" label="Python" default>
+    ```python
+    import pyspark.sql.functions as F
+    from databricks.labs.dqx import check_funcs
+    from databricks.labs.dqx.rule import DQRowRule
+
+    # static message: "Email must not be null"
+    checks = [
+      DQRowRule(
+        name="email_not_null",
+        criticality="error",
+        check_func=check_funcs.is_not_null,
+        column="email",
+        message_expr="'Email must not be null'",
+      )
+    ]
+
+    # dynamic message using a SQL expression string: "age_positive: age <value> is not valid"
+    checks = [
+      DQRowRule(
+        name="age_positive",
+        criticality="error",
+        check_func=check_funcs.is_not_less_than,
+        column="age",
+        check_func_kwargs={"limit": 0},
+        message_expr="concat('age_positive: age ', coalesce(cast(age as string), 'null'), ' is not valid')",
+      )
+    ]
+
+    # dynamic message using a Spark Column expression: "age_positive: age <value> is not valid"
+    checks = [
+      DQRowRule(
+        name="age_positive",
+        criticality="error",
+        check_func=check_funcs.is_not_less_than,
+        column="age",
+        check_func_kwargs={"limit": 0},
+        message_expr=F.concat(
+            F.lit("age_positive: age "),
+            F.coalesce(F.col("age").cast("string"), F.lit("null")),
+            F.lit(" is not valid"),
+        ),
+      )
+    ]
+    ```
+  </TabItem>
+  <TabItem value="YAML" label="YAML">
+    ```yaml
+    # static message: "Email must not be null"
+    - name: email_not_null
+      criticality: error
+      message_expr: "'Email must not be null'"
+      check:
+        function: is_not_null
+        arguments:
+          column: email
+
+    # dynamic message using a SQL expression string: "age_positive: age <value> is not valid"
+    - name: age_positive
+      criticality: error
+      message_expr: "concat('age_positive: age ', coalesce(cast(age as string), 'null'), ' is not valid')"
+      check:
+        function: is_not_less_than
+        arguments:
+          column: age
+          limit: 0
+    ```
+  </TabItem>
+</Tabs>
+
 ## Converting checks between formats
 
 In DQX, checks can be defined either as Python classes or YAML declarations. When using YAML, the files are first parsed into dictionaries and then transformed into DQX class instances under the hood. Since both formats share the same internal structure, they are interchangeable and can be safely converted between one another.

@@ -267,6 +267,7 @@ def deserialize(self, checks: list[dict]) -> list[DQRule]:
             criticality = check_def.get("criticality", "error")
             filter_str = check_def.get("filter")
             user_metadata = check_def.get("user_metadata")
+            message_expr = check_def.get("message_expr")
 
             # Exclude `column` and `columns` from check_func_kwargs
             # as these are always included in the check function call
@@ -282,6 +283,7 @@ def deserialize(self, checks: list[dict]) -> list[DQRule]:
                     filter=filter_str,
                     check_func_kwargs=check_func_kwargs,
                     user_metadata=user_metadata,
+                    message_expr=message_expr,
                 ).get_rules()
             else:
                 rule_type = CHECK_FUNC_REGISTRY.get(func_name)
@@ -296,6 +298,7 @@ def deserialize(self, checks: list[dict]) -> list[DQRule]:
                             criticality=criticality,
                             filter=filter_str,
                             user_metadata=user_metadata,
+                            message_expr=message_expr,
                         )
                     )
                 else:  # default to row-level rule
@@ -309,6 +312,7 @@ def deserialize(self, checks: list[dict]) -> list[DQRule]:
                             criticality=criticality,
                             filter=filter_str,
                             user_metadata=user_metadata,
+                            message_expr=message_expr,
                         )
                     )
 

@@ -151,9 +151,11 @@ def _build_result_struct(self, condition: Column, skipped: bool = False) -> Colu
         # or use literal run time if explicitly overridden
         run_time_expr = F.current_timestamp() if self.run_time_overwrite is None else F.lit(self.run_time_overwrite)
 
+        message_col = self._build_message_col(condition)
+
         return F.struct(
             F.lit(self.check.name).alias("name"),
-            condition.alias("message"),
+            message_col.alias("message"),
             self.check.columns_as_string_expr.alias("columns"),
             F.lit(self.check.filter or None).cast("string").alias("filter"),
             F.lit(self.check.check_func.__name__).alias("function"),
@@ -167,6 +169,28 @@ def _build_result_struct(self, condition: Column, skipped: bool = False) -> Colu
             F.lit(skipped or None).alias("skipped"),
         ).cast(dq_result_item_schema)
 
+    def _build_message_col(self, condition: Column) -> Column:
+        """
+        Builds the message column, using the default message or the user-supplied
+        ``message_expr`` from the rule definition. The expression is evaluated as-is — DQX
+        does not substitute placeholders. Accepts either a Spark SQL expression string or a
+        Spark Column.
+
+        Args:
+            condition: Default DQX condition message returned by evaluating the DQX check function
+
+        Returns:
+            The custom DQX condition message if ``message_expr`` is set on the rule, otherwise the
+            default DQX condition message.
+        """
+        if self.check.message_expr is None:
+            return condition
+
+        custom_message = (
+            self.check.message_expr if isinstance(self.check.message_expr, Column) else F.expr(self.check.message_expr)
+        )
+        return F.when(condition.isNotNull(), custom_message).otherwise(F.lit(None).cast("string"))
+
     def _get_invalid_cols_message(self) -> str:
         """
         Returns invalid columns message containing info about invalid columns to check should be applied to or filter.

@@ -165,6 +165,11 @@ class DQRule(abc.ABC, DQRuleTypeMixin, SingleColumnMixin, MultipleColumnsMixin):
     * *check_func_args* (optional) - Positional arguments for the check function (excluding *column*).
     * *check_func_kwargs* (optional) - Keyword arguments for the check function (excluding *column*).
     * *user_metadata* (optional) - User-defined key-value pairs added to metadata generated by the check.
+    * *message_expr* (optional) - User-defined expression used as the check failure message. Accepts either
+        a Spark SQL expression string or a Spark *Column* expression. The expression is evaluated as-is.
+        Any column references, casts, or rule-identifying literals must be supplied directly by the caller
+        (e.g., ``F.concat(F.lit('age_positive: value '), F.col('age').cast('string'))`` or
+        ``"concat('age_positive: value ', cast(age as string))"``).
     """
 
     check_func: Callable
@@ -176,6 +181,7 @@ class DQRule(abc.ABC, DQRuleTypeMixin, SingleColumnMixin, MultipleColumnsMixin):
     check_func_args: list[Any] = field(default_factory=list)
     check_func_kwargs: dict[str, Any] = field(default_factory=dict)
     user_metadata: dict[str, str] | None = None
+    message_expr: str | Column | None = None
 
     def __post_init__(self):
         self._validate_rule_type(self.check_func)
@@ -259,6 +265,10 @@ def to_dict(self) -> dict:
 
         if self.user_metadata:
             metadata["user_metadata"] = self.user_metadata
+        # Only string expressions can be round-tripped through metadata; Column objects are
+        # in-process Spark expressions with no canonical YAML/JSON representation.
+        if isinstance(self.message_expr, str) and self.message_expr:
+            metadata["message_expr"] = self.message_expr
         return metadata
 
     def _initialize_column_if_missing(self):
@@ -428,6 +438,7 @@ class DQForEachColRule(DQRuleTypeMixin):
     check_func_args: list[Any] = field(default_factory=list)
     check_func_kwargs: dict[str, Any] = field(default_factory=dict)
     user_metadata: dict[str, str] | None = None
+    message_expr: str | Column | None = None
 
     def get_rules(self) -> list[DQRule]:
         """Build a list of rules for a set of columns.
@@ -453,6 +464,7 @@ def get_rules(self) -> list[DQRule]:
                         criticality=self.criticality,
                         filter=self.filter,
                         user_metadata=self.user_metadata,
+                        message_expr=self.message_expr,
                     )
                 )
             else:  # default to row-level rule
@@ -467,6 +479,7 @@ def get_rules(self) -> list[DQRule]:
                         criticality=self.criticality,
                         filter=self.filter,
                         user_metadata=self.user_metadata,
+                        message_expr=self.message_expr,
                     )
                 )
         return rules