databricks
diff --git a/‎NEXT_CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎NEXT_CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎acceptance/bin/assert_exists.py‎
Lines changed: 12 additions & 0 deletions b/‎acceptance/bin/assert_exists.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎acceptance/bin/assert_not_exists.py‎
Lines changed: 12 additions & 0 deletions b/‎acceptance/bin/assert_not_exists.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎acceptance/bin/kill_after.py‎
Lines changed: 39 additions & 0 deletions b/‎acceptance/bin/kill_after.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/chain-3-jobs/databricks.yml‎
Lines changed: 37 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/chain-3-jobs/databricks.yml‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/chain-3-jobs/out.test.toml‎
Lines changed: 3 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/chain-3-jobs/out.test.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/chain-3-jobs/output.txt‎
Lines changed: 110 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/chain-3-jobs/output.txt‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/chain-3-jobs/script‎
Lines changed: 24 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/chain-3-jobs/script‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/chain-3-jobs/test.py‎
Lines changed: 1 addition & 0 deletions b/‎acceptance/bundle/deploy/wal/chain-3-jobs/test.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/databricks.yml‎
Lines changed: 23 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/databricks.yml‎
Lines changed: 23 additions & 0 deletions
@@ -13,5 +13,6 @@
 * Make sure warnings asking for approval are understood by agents ([#5239](https://github.com/databricks/cli/pull/5239))
 * Support `replace_existing: true` on `postgres_branches` and `postgres_endpoints` so bundles can manage the implicitly-created production branch and primary read-write endpoint of a Lakebase project.
 * Add `postgres_catalogs` resource to bind a Unity Catalog catalog to a Postgres database on a Lakebase Autoscaling branch ([#5265](https://github.com/databricks/cli/pull/5265)).
+* engine/direct: Changes to state file now persisted to .wal file right away instead of being saved in the end ([#5149](https://github.com/databricks/cli/pull/5149))
 
 ### Dependency updates
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+import os, sys
+
+errors = 0
+
+for filename in sys.argv[1:]:
+    if not os.path.exists(filename):
+        sys.stderr.write(f"Unexpected: {filename} does not exist.\n")
+        errors += 1
+
+if errors:
+    sys.exit(1)
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+import os, sys
+
+errors = 0
+
+for filename in sys.argv[1:]:
+    if os.path.exists(filename):
+        sys.stderr.write(f"Unexpected: {filename} exists.\n")
+        errors += 1
+
+if errors:
+    sys.exit(1)
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+"""Set up a kill rule on the testserver for the current test token.
+
+Usage: kill_after.py PATTERN OFFSET TIMES
+
+  PATTERN   HTTP method and path, e.g. "POST /api/2.2/jobs/create"
+  OFFSET    number of requests to let through before killing starts
+  TIMES     number of times to kill the caller
+
+The rule is scoped to the current DATABRICKS_TOKEN so it only affects
+the test that registers it, even when tests share a server.
+"""
+
+import json
+import os
+import sys
+import urllib.request
+
+host = os.environ.get("DATABRICKS_HOST", "")
+token = os.environ.get("DATABRICKS_TOKEN", "")
+
+if not host:
+    print("DATABRICKS_HOST not set", file=sys.stderr)
+    sys.exit(1)
+
+if len(sys.argv) != 4:
+    print(f"usage: {sys.argv[0]} PATTERN OFFSET TIMES", file=sys.stderr)
+    sys.exit(1)
+
+pattern, offset, times = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])
+
+data = json.dumps({"pattern": pattern, "offset": offset, "times": times}).encode()
+req = urllib.request.Request(
+    f"{host}/__testserver/kill",
+    data=data,
+    headers={"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
+    method="POST",
+)
+urllib.request.urlopen(req)
@@ -0,0 +1,37 @@
+bundle:
+  name: wal-chain-test
+
+resources:
+  jobs:
+    # Linear chain: job_01 -> job_02 -> job_03
+    # Execution order: job_01 first, job_03 last
+    job_01:
+      name: "job-01"
+      description: "first in chain"
+      tasks:
+        - task_key: "task"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+    job_02:
+      name: "job-02"
+      description: "depends on ${resources.jobs.job_01.id}"
+      tasks:
+        - task_key: "task"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+    job_03:
+      name: "job-03"
+      description: "depends on ${resources.jobs.job_02.id}"
+      tasks:
+        - task_key: "task"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
@@ -0,0 +1,110 @@
+=== First deploy (crashes on job_03) ===
+
+>>> errcode [CLI] bundle deploy
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default/files...
+Deploying resources...
+[PROCESS_KILLED]
+
+Exit code: [KILLED]
+
+=== WAL content after crash ===
+{
+  "cli_version": "[DEV_VERSION]",
+  "lineage": "[UUID]",
+  "serial": 1,
+  "state_version": 2
+}
+{
+  "k": "resources.jobs.job_01",
+  "v": {
+    "__id__": "[JOB_01_ID]",
+    "state": {
+      "deployment": {
+        "kind": "BUNDLE",
+        "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default/state/metadata.json"
+      },
+      "description": "first in chain",
+      "edit_mode": "UI_LOCKED",
+      "format": "MULTI_TASK",
+      "max_concurrent_runs": 1,
+      "name": "job-01",
+      "queue": {
+        "enabled": true
+      },
+      "tasks": [
+        {
+          "new_cluster": {
+            "node_type_id": "[NODE_TYPE_ID]",
+            "spark_version": "15.4.x-scala2.12"
+          },
+          "spark_python_task": {
+            "python_file": "/Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default/files/test.py"
+          },
+          "task_key": "task"
+        }
+      ]
+    }
+  }
+}
+{
+  "k": "resources.jobs.job_02",
+  "v": {
+    "__id__": "[JOB_02_ID]",
+    "depends_on": [
+      {
+        "label": "${resources.jobs.job_01.id}",
+        "node": "resources.jobs.job_01"
+      }
+    ],
+    "state": {
+      "deployment": {
+        "kind": "BUNDLE",
+        "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default/state/metadata.json"
+      },
+      "description": "depends on [JOB_01_ID]",
+      "edit_mode": "UI_LOCKED",
+      "format": "MULTI_TASK",
+      "max_concurrent_runs": 1,
+      "name": "job-02",
+      "queue": {
+        "enabled": true
+      },
+      "tasks": [
+        {
+          "new_cluster": {
+            "node_type_id": "[NODE_TYPE_ID]",
+            "spark_version": "15.4.x-scala2.12"
+          },
+          "spark_python_task": {
+            "python_file": "/Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default/files/test.py"
+          },
+          "task_key": "task"
+        }
+      ]
+    }
+  }
+}
+
+=== Number of jobs saved in WAL ===
+2
+
+=== Bundle summary (reads from WAL) ===
+Name: wal-chain-test
+Target: default
+Workspace:
+  User: [USERNAME]
+  Path: /Workspace/Users/[USERNAME]/.bundle/wal-chain-test/default
+Resources:
+  Jobs:
+    job_01:
+      Name: job-01
+      URL:  [DATABRICKS_URL]/jobs/[JOB_01_ID]?o=[NUMID]
+    job_02:
+      Name: job-02
+      URL:  [DATABRICKS_URL]/jobs/[JOB_02_ID]?o=[NUMID]
+    job_03:
+      Name: job-03
+      URL:  (not deployed)
+
+=== WAL after successful deploy ===
+WAL deleted (expected)
@@ -0,0 +1,24 @@
+# Linear chain: job_01 -> job_02 -> job_03
+# Let first 2 jobs/create succeed, then kill on the 3rd
+kill_after.py "POST /api/2.2/jobs/create" 2 1
+
+echo "=== First deploy (crashes on job_03) ==="
+trace errcode $CLI bundle deploy
+
+echo ""
+echo "=== WAL content after crash ==="
+jq -S . .databricks/bundle/default/resources.json.wal 2>/dev/null || echo "No WAL file"
+
+echo ""
+echo "=== Number of jobs saved in WAL ==="
+grep -c '"k":"resources.jobs' .databricks/bundle/default/resources.json.wal 2>/dev/null || echo "0"
+
+echo ""
+echo "=== Bundle summary (reads from WAL) ==="
+$CLI bundle summary
+
+echo ""
+echo "=== WAL after successful deploy ==="
+cat .databricks/bundle/default/resources.json.wal 2>/dev/null || echo "WAL deleted (expected)"
+
+replace_ids.py
@@ -0,0 +1 @@
+print("test")
@@ -0,0 +1,23 @@
+bundle:
+  name: wal-corrupted-test
+
+resources:
+  jobs:
+    valid_job:
+      name: "valid-job"
+      tasks:
+        - task_key: "task-a"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+    another_valid:
+      name: "another-valid"
+      tasks:
+        - task_key: "task-b"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge