posit-dev
diff --git a/‎great-docs.yml‎
Lines changed: 6 additions & 0 deletions b/‎great-docs.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎pointblank/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎pointblank/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pointblank/_utils_ai.py‎
Lines changed: 267 additions & 20 deletions b/‎pointblank/_utils_ai.py‎
Lines changed: 267 additions & 20 deletions
@@ -131,6 +131,8 @@ reference:
           members: false
         - name: DraftValidation
           members: false
+        - name: EditValidation
+          members: true
         - name: MissingSpec
           members: true
 
@@ -260,6 +262,10 @@ reference:
         - Validate.get_step_report
         - Validate.get_json_report
         - Validate.get_dataframe_report
+        - Validate.to_code
+        - Validate.to_yaml
+        - Validate.suggest_improvements
+        - Validate.from_prompt
         - Validate.get_sundered_data
         - Validate.get_data_extracts
         - Validate.all_passed
 
@@ -34,6 +34,7 @@
 from pointblank.contract import Contract, Step
 from pointblank.datascan import DataScan, col_summary_tbl
 from pointblank.draft import DraftValidation
+from pointblank.edit import EditValidation
 from pointblank.field import (
     BoolField,
     DateField,
@@ -122,6 +123,7 @@
     "PipelineResult",
     "DataScan",
     "DraftValidation",
+    "EditValidation",
     "MissingSpec",
     "col",
     "ref",
 
@@ -45,8 +45,32 @@ class _LLMConfig:
     verify_ssl: bool = True
 
 
+_ROW_VALIDATION_SYSTEM_PROMPT = """You are a data validation assistant. Your task is to analyze rows of data and determine if they meet the specified validation criteria.
+
+INSTRUCTIONS:
+- Analyze each row in the provided data
+- For each row, determine if it meets the validation criteria (True) or not (False)
+- Return ONLY a JSON array with validation results
+- Each result should have: {"index": <row_index>, "result": <true_or_false>}
+- Do not include any explanatory text, only the JSON array
+- The row_index should match the "_pb_row_index" field from the input data
+
+EXAMPLE OUTPUT FORMAT:
+[
+  {"index": 0, "result": true},
+  {"index": 1, "result": false},
+  {"index": 2, "result": true}
+]
+
+If reference attachments (images or PDFs) are provided alongside the data, use them as context when evaluating each row."""
+
+
 def _create_chat_instance(
-    provider: str, model_name: str, api_key: Optional[str] = None, verify_ssl: bool = True
+    provider: str,
+    model_name: str,
+    api_key: Optional[str] = None,
+    verify_ssl: bool = True,
+    system_prompt: Optional[str] = None,
 ):
     """
     Create a chatlas chat instance for the specified provider.
@@ -61,6 +85,10 @@ def _create_chat_instance(
         Optional API key. If None, will be read from environment.
     verify_ssl
         Whether to verify SSL certificates when making requests. Defaults to True.
+    system_prompt
+        Optional system prompt to steer the model. If None, a row-validation system prompt is
+        used (the default for the AI validation flow). Other flows (e.g., drafting/editing
+        validation plans) supply their own prompt.
 
     Returns
     -------
@@ -82,25 +110,9 @@ def _create_chat_instance(
             f"Supported providers: {', '.join(MODEL_PROVIDERS)}"
         )
 
-    # System prompt with role definition and instructions
-    system_prompt = """You are a data validation assistant. Your task is to analyze rows of data and determine if they meet the specified validation criteria.
-
-INSTRUCTIONS:
-- Analyze each row in the provided data
-- For each row, determine if it meets the validation criteria (True) or not (False)
-- Return ONLY a JSON array with validation results
-- Each result should have: {"index": <row_index>, "result": <true_or_false>}
-- Do not include any explanatory text, only the JSON array
-- The row_index should match the "_pb_row_index" field from the input data
-
-EXAMPLE OUTPUT FORMAT:
-[
-  {"index": 0, "result": true},
-  {"index": 1, "result": false},
-  {"index": 2, "result": true}
-]
-
-If reference attachments (images or PDFs) are provided alongside the data, use them as context when evaluating each row."""
+    # Use the row-validation system prompt unless a caller supplies its own
+    if system_prompt is None:
+        system_prompt = _ROW_VALIDATION_SYSTEM_PROMPT
 
     # Create httpx client with SSL verification settings
     try:
@@ -989,3 +1001,238 @@ def validate_single_batch(
             for i in range(batch["start_row"], batch["end_row"]):
                 default_results.append({"index": i, "result": False})
             return default_results
+
+
+# ============================================================================
+# Validation-plan code helpers (shared by DraftValidation and EditValidation)
+# ============================================================================
+
+import ast as _ast
+import difflib as _difflib
+
+# Validation method names a generated or edited plan may legitimately call. Used by the
+# syntax guardrail to catch hallucinated methods before a plan is handed back to the user.
+_KNOWN_PLAN_METHODS = frozenset(
+    {
+        "col_vals_gt",
+        "col_vals_lt",
+        "col_vals_eq",
+        "col_vals_ne",
+        "col_vals_ge",
+        "col_vals_le",
+        "col_vals_between",
+        "col_vals_outside",
+        "col_vals_in_set",
+        "col_vals_not_in_set",
+        "col_vals_increasing",
+        "col_vals_decreasing",
+        "col_vals_null",
+        "col_vals_not_null",
+        "col_vals_regex",
+        "col_vals_within_spec",
+        "col_vals_expr",
+        "col_exists",
+        "col_pct_null",
+        "col_schema_match",
+        "col_count_match",
+        "row_count_match",
+        "rows_distinct",
+        "rows_complete",
+        "data_freshness",
+        "tbl_match",
+        "conjointly",
+        "specially",
+        "interrogate",
+    }
+)
+
+
+def _extract_code(text: str) -> str:
+    """Extract Python source from a model response or fenced code block.
+
+    Strips a single ```python ... ``` (or bare ``` ... ```) fence if present; otherwise returns
+    the text unchanged (trimmed).
+    """
+    stripped = text.strip()
+    if "```" not in stripped:
+        return stripped
+
+    fence_start = stripped.find("```")
+    after = stripped[fence_start + 3 :]
+    newline = after.find("\n")
+    if newline != -1:
+        first_line = after[:newline].strip()
+        if first_line.lower() in ("python", "py", ""):
+            after = after[newline + 1 :]
+    fence_end = after.find("```")
+    if fence_end != -1:
+        after = after[:fence_end]
+    return after.strip()
+
+
+def _check_syntax(code: str) -> Tuple[bool, str]:
+    """Parse `code` and flag unknown validation methods.
+
+    Returns `(ok, message)`. `ok` is False if the code fails to parse or calls a method that
+    looks like a validation method but is not in the known set; `message` describes the first
+    problem found.
+    """
+    try:
+        tree = _ast.parse(code)
+    except SyntaxError as e:
+        return False, f"SyntaxError: {e.msg} (line {e.lineno})"
+
+    unknown: List[str] = []
+    for node in _ast.walk(tree):
+        if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Attribute):
+            name = node.func.attr
+            if name.startswith(("col_", "row_", "rows_")) and name not in _KNOWN_PLAN_METHODS:
+                unknown.append(name)
+    if unknown:
+        return False, f"Unknown validation method(s): {', '.join(sorted(set(unknown)))}"
+
+    return True, ""
+
+
+def _chain_base_is_validate(node: Any) -> bool:
+    """Whether an attribute-call chain ultimately starts from a `Validate(...)` call."""
+    cur = node
+    # Descend through chained method calls (whose receiver is itself a call) to the base call
+    while (
+        isinstance(cur, _ast.Call)
+        and isinstance(cur.func, _ast.Attribute)
+        and isinstance(cur.func.value, _ast.Call)
+    ):
+        cur = cur.func.value
+    if isinstance(cur, _ast.Call):
+        func = cur.func
+        if isinstance(func, _ast.Attribute):
+            return func.attr == "Validate"
+        if isinstance(func, _ast.Name):
+            return func.id == "Validate"
+    return False
+
+
+def _chain_depth(node: Any) -> int:
+    depth = 0
+    cur = node
+    while (
+        isinstance(cur, _ast.Call)
+        and isinstance(cur.func, _ast.Attribute)
+        and isinstance(cur.func.value, _ast.Call)
+    ):
+        depth += 1
+        cur = cur.func.value
+    return depth
+
+
+def _extract_chain_steps(code: str) -> List[Dict[str, Any]]:
+    """Parse a plan's code into an ordered list of step descriptors.
+
+    Each descriptor is `{"method": str, "text": str, "kwargs": {name: source}}`. The terminal
+    `.interrogate()` call, if present, is omitted. Returns an empty list if the code cannot be
+    parsed or contains no `Validate(...)` chain.
+    """
+    try:
+        tree = _ast.parse(code)
+    except SyntaxError:
+        return []
+
+    # Locate the deepest (outermost) Validate(...) method chain in the module
+    best: Optional[Tuple[int, Any]] = None
+    for node in _ast.walk(tree):
+        if isinstance(node, _ast.Call) and _chain_base_is_validate(node):
+            depth = _chain_depth(node)
+            if best is None or depth > best[0]:
+                best = (depth, node)
+    if best is None:
+        return []
+
+    calls: List[Any] = []
+    cur = best[1]
+    # Collect chained method calls, stopping before the base `Validate(...)` constructor
+    while (
+        isinstance(cur, _ast.Call)
+        and isinstance(cur.func, _ast.Attribute)
+        and isinstance(cur.func.value, _ast.Call)
+    ):
+        calls.append(cur)
+        cur = cur.func.value
+    calls.reverse()
+
+    steps: List[Dict[str, Any]] = []
+    for call in calls:
+        method = call.func.attr
+        if method == "interrogate":
+            continue
+        args = [_ast.unparse(a) for a in call.args]
+        kwargs: Dict[str, Any] = {}
+        for kw in call.keywords:
+            if kw.arg is not None:
+                kwargs[kw.arg] = _ast.unparse(kw.value)
+        rendered = ", ".join(args + [f"{k}={v}" for k, v in kwargs.items()])
+        steps.append({"method": method, "text": f"{method}({rendered})", "kwargs": kwargs})
+    return steps
+
+
+def _diff_plan_steps(
+    old_steps: List[Dict[str, Any]], new_steps: List[Dict[str, Any]]
+) -> List[Dict[str, Any]]:
+    """Compute a structured, step-level diff between two parsed plans.
+
+    Returns a list of change records, each `{"action": "add"|"remove"|"modify", "method": str,
+    ...}`. For "add"/"modify" the record includes `"new"` (the new step text); for
+    "remove"/"modify" it includes `"old"` (the old step text).
+    """
+    old_texts = [s["text"] for s in old_steps]
+    new_texts = [s["text"] for s in new_steps]
+    matcher = _difflib.SequenceMatcher(a=old_texts, b=new_texts, autojunk=False)
+
+    changes: List[Dict[str, Any]] = []
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag == "equal":
+            continue
+        if tag == "delete":
+            for k in range(i1, i2):
+                changes.append(
+                    {"action": "remove", "method": old_steps[k]["method"], "old": old_texts[k]}
+                )
+        elif tag == "insert":
+            for k in range(j1, j2):
+                changes.append(
+                    {"action": "add", "method": new_steps[k]["method"], "new": new_texts[k]}
+                )
+        else:  # "replace": pair same-method steps as modifications, leftovers as add/remove
+            old_block = list(range(i1, i2))
+            new_block = list(range(j1, j2))
+            used_new: set[int] = set()
+            for oi in list(old_block):
+                match = next(
+                    (
+                        nj
+                        for nj in new_block
+                        if nj not in used_new and new_steps[nj]["method"] == old_steps[oi]["method"]
+                    ),
+                    None,
+                )
+                if match is not None:
+                    used_new.add(match)
+                    old_block.remove(oi)
+                    changes.append(
+                        {
+                            "action": "modify",
+                            "method": old_steps[oi]["method"],
+                            "old": old_texts[oi],
+                            "new": new_texts[match],
+                        }
+                    )
+            for oi in old_block:
+                changes.append(
+                    {"action": "remove", "method": old_steps[oi]["method"], "old": old_texts[oi]}
+                )
+            for nj in new_block:
+                if nj not in used_new:
+                    changes.append(
+                        {"action": "add", "method": new_steps[nj]["method"], "new": new_texts[nj]}
+                    )
+    return changes