From 0bce9496671825bbd4f5b5fe6bbe00d5f6e5ea1b Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Tue, 24 Feb 2026 10:07:44 -0500 Subject: [PATCH] fix: use persistent storage for Docker data-root instead of ephemeral /mnt The Azure ephemeral disk (/mnt) gets wiped on VM deallocate, causing Docker images to be lost and pool-resume to fail with WAA timeout. Move Docker data-root to /home/azureuser/docker (OS disk, persistent) and increase OS disk to 128GB to accommodate Docker images. Co-Authored-By: Claude Opus 4.6 --- .beads/issues.jsonl | 2 +- openadapt_evals/benchmarks/vm_cli.py | 9 +++++---- openadapt_evals/infrastructure/azure_vm.py | 1 + openadapt_evals/infrastructure/pool.py | 14 ++++++++------ 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index d818478..1cd2c45 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -13,5 +13,5 @@ {"id":"openadapt-evals-hvm","title":"VL model fix PR #18 ready to merge","notes":"2026-02-08: openadapt-ml PR #18 was already merged on 2026-01-29. VL model fix is done.","status":"closed","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-29T16:17:03.491938-05:00","created_by":"Richard Abrich","updated_at":"2026-02-08T12:55:19.233249-05:00","closed_at":"2026-02-08T12:55:19.233249-05:00","close_reason":"PR #18 already merged 2026-01-29"} {"id":"openadapt-evals-mx8","title":"Analyze evaluation results and publish findings","description":"After demo-conditioned evaluation completes, analyze results: success rates, failure modes, demo impact. Create data-driven roadmap for improvements.","status":"open","priority":1,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:06.328838-05:00","created_by":"Richard Abrich","updated_at":"2026-02-14T12:23:06.328838-05:00"} {"id":"openadapt-evals-sz4","title":"RCA: Windows product key prompt recurring issue","status":"closed","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-20T18:59:36.266286-05:00","created_by":"Richard Abrich","updated_at":"2026-01-20T20:32:06.493102-05:00","closed_at":"2026-01-20T20:32:06.493102-05:00","close_reason":"RCA complete - root cause is VERSION mismatch (CLI=11, Dockerfile=11e). Fix documented in RECURRING_ISSUES.md and WINDOWS_PRODUCT_KEY_RCA.md"} -{"id":"openadapt-evals-vcb","title":"Run demo-conditioned WAA evaluation","description":"Once demos are recorded, run WAA evaluation with demo-conditioned agents (RetrievalAugmentedAgent with real demos). Target: measure improvement over zero-shot baseline. Requires real demos from recording task.","notes":"Pipeline complete. 3 annotated demos produced. Need Azure VM to run eval. Anthropic credits depleted — use OpenAI.","status":"open","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:04.624305-05:00","created_by":"Richard Abrich","updated_at":"2026-02-18T00:03:34.77925-05:00"} +{"id":"openadapt-evals-vcb","title":"Run demo-conditioned WAA evaluation","description":"Once demos are recorded, run WAA evaluation with demo-conditioned agents (RetrievalAugmentedAgent with real demos). Target: measure improvement over zero-shot baseline. Requires real demos from recording task.","notes":"PR #35 merged (v0.4.0): full pipeline implemented — record-waa (interactive WAA API recording via VNC), annotate (VLM annotation of screenshots), eval (delegates to eval-suite). 12 harder tasks defined (0/12 zero-shot). CI workflow added. PR #36 merged (v0.4.1): fixed PyPI README images. Next: spin up Azure VM, record demos for 12 harder tasks, annotate, run DC eval.","status":"open","priority":0,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-02-14T12:23:04.624305-05:00","created_by":"Richard Abrich","updated_at":"2026-02-24T02:00:07.491221-05:00"} {"id":"openadapt-evals-wis","title":"Add pre-flight check to detect Windows install issues","status":"closed","priority":1,"issue_type":"task","owner":"richard.abrich@gmail.com","created_at":"2026-01-20T18:59:36.865052-05:00","created_by":"Richard Abrich","updated_at":"2026-01-20T20:32:06.757261-05:00","closed_at":"2026-01-20T20:32:06.757261-05:00","close_reason":"Duplicate of openadapt-evals-0dt"} diff --git a/openadapt_evals/benchmarks/vm_cli.py b/openadapt_evals/benchmarks/vm_cli.py index 9a6a0e0..122dc31 100644 --- a/openadapt_evals/benchmarks/vm_cli.py +++ b/openadapt_evals/benchmarks/vm_cli.py @@ -349,15 +349,16 @@ def cmd_create(args): sudo systemctl enable docker sudo usermod -aG docker $USER -# Configure Docker to use /mnt (larger temp disk) +# Configure Docker to use persistent storage (NOT /mnt which is ephemeral +# and gets wiped on VM deallocate, breaking pool-resume) sudo systemctl stop docker -sudo mkdir -p /mnt/docker -sudo bash -c 'echo "{\\"data-root\\": \\"/mnt/docker\\"}" > /etc/docker/daemon.json' +sudo mkdir -p /home/azureuser/docker +sudo bash -c 'echo "{\\"data-root\\": \\"/home/azureuser/docker\\"}" > /etc/docker/daemon.json' sudo systemctl start docker # Verify docker --version -df -h /mnt +df -h /home """ result = ssh_run(ip, docker_setup, stream=True, step="CREATE") if result.returncode != 0: diff --git a/openadapt_evals/infrastructure/azure_vm.py b/openadapt_evals/infrastructure/azure_vm.py index 8a8d956..3608b3a 100644 --- a/openadapt_evals/infrastructure/azure_vm.py +++ b/openadapt_evals/infrastructure/azure_vm.py @@ -665,6 +665,7 @@ def _sdk_create_vm( "image_reference": {"id": image_id} if image_id else _UBUNTU_2204_IMAGE, "os_disk": { "create_option": "FromImage", + "disk_size_gb": 128, "managed_disk": {"storage_account_type": "Premium_LRS"}, }, }, diff --git a/openadapt_evals/infrastructure/pool.py b/openadapt_evals/infrastructure/pool.py index 1ebf4a6..6dcd8bb 100644 --- a/openadapt_evals/infrastructure/pool.py +++ b/openadapt_evals/infrastructure/pool.py @@ -77,10 +77,11 @@ class PoolRunResult: sudo systemctl enable docker sudo usermod -aG docker $USER -# Configure Docker to use /mnt (larger temp disk) +# Configure Docker to use persistent storage (NOT /mnt which is ephemeral +# and gets wiped on VM deallocate, breaking pool-resume) sudo systemctl stop docker -sudo mkdir -p /mnt/docker -sudo bash -c 'echo "{\\"data-root\\": \\"/mnt/docker\\"}" > /etc/docker/daemon.json' +sudo mkdir -p /home/azureuser/docker +sudo bash -c 'echo "{\\"data-root\\": \\"/home/azureuser/docker\\"}" > /etc/docker/daemon.json' sudo systemctl start docker # Pull base images (use sudo since usermod hasn't taken effect yet) @@ -110,10 +111,11 @@ class PoolRunResult: sudo systemctl enable docker sudo usermod -aG docker $USER -# Configure Docker to use /mnt (larger temp disk) +# Configure Docker to use persistent storage (NOT /mnt which is ephemeral +# and gets wiped on VM deallocate, breaking pool-resume) sudo systemctl stop docker -sudo mkdir -p /mnt/docker -sudo bash -c 'echo "{{\\"data-root\\": \\"/mnt/docker\\"}}" > /etc/docker/daemon.json' +sudo mkdir -p /home/azureuser/docker +sudo bash -c 'echo "{{\\"data-root\\": \\"/home/azureuser/docker\\"}}" > /etc/docker/daemon.json' sudo systemctl start docker # Pull pre-built image from ACR (faster than building)