4646# Constants (single source of truth)
4747# =============================================================================
4848
49- # VM sizes with nested virtualization support
50- # Standard: $0.19/hr, 4 vCPU, 16GB RAM - baseline
51- # Fast: $0.38/hr, 8 vCPU, 32GB RAM - ~30% faster install, ~40% faster eval
52- VM_SIZE_STANDARD = "Standard_D4ds_v4"
53- VM_SIZE_FAST = "Standard_D8ds_v5"
54- VM_SIZE = VM_SIZE_STANDARD # Default, can be overridden by --fast flag
55-
56- # Fallback sizes for --fast mode (in order of preference)
49+ # VM size: D8ds_v5 ($0.38/hr, 8 vCPU, 32GB RAM)
50+ # D4ds_v4 (16GB) OOMs with navi agent's GroundingDINO + SoM models — do not use.
51+ VM_SIZE = "Standard_D8ds_v5"
52+
53+ # Fallback VM sizes (in order of preference, all 8 vCPU / 32GB)
5754# D8ds_v5: First choice (v5 with local SSD)
5855# D8s_v5: v5 without local SSD
5956# D8ds_v4: v4 with local SSD
6057# D8as_v5: AMD version
61- VM_SIZE_FAST_FALLBACKS = [
58+ VM_SIZE_FALLBACKS = [
6259 ("Standard_D8ds_v5" , 0.38 ),
6360 ("Standard_D8s_v5" , 0.36 ),
6461 ("Standard_D8ds_v4" , 0.38 ),
@@ -258,19 +255,12 @@ def cmd_create(args):
258255 log ("CREATE" , "Use 'delete' first if you want to recreate" )
259256 return 0
260257
261- # Determine which sizes to try
262- use_fast = getattr (args , "fast" , False )
263- if use_fast :
264- # Try multiple fast sizes with fallbacks
265- sizes_to_try = VM_SIZE_FAST_FALLBACKS
266- log (
267- "CREATE" ,
268- f"Creating VM '{ VM_NAME } ' with --fast (trying multiple D8 sizes)..." ,
269- )
270- else :
271- # Standard mode: single size
272- sizes_to_try = [(VM_SIZE_STANDARD , 0.19 )]
273- log ("CREATE" , f"Creating VM '{ VM_NAME } ' ({ VM_SIZE_STANDARD } , $0.19/hr)..." )
258+ # Try multiple D8 sizes with fallbacks (all 32GB, required for navi agent)
259+ sizes_to_try = VM_SIZE_FALLBACKS
260+ log (
261+ "CREATE" ,
262+ f"Creating VM '{ VM_NAME } ' (trying D8 sizes with fallbacks)..." ,
263+ )
274264
275265 # Try size+region combinations until one works
276266 vm_created = False
@@ -627,7 +617,6 @@ def cmd_pool_create(args):
627617 from openadapt_evals .infrastructure .pool import PoolManager
628618
629619 num_workers = getattr (args , "workers" , 3 )
630- use_standard = getattr (args , "standard" , False )
631620 auto_shutdown_hours = getattr (args , "auto_shutdown_hours" , 4 )
632621
633622 vm_manager = AzureVMManager (resource_group = RESOURCE_GROUP )
@@ -636,7 +625,6 @@ def cmd_pool_create(args):
636625 try :
637626 manager .create (
638627 workers = num_workers ,
639- fast = not use_standard ,
640628 auto_shutdown_hours = auto_shutdown_hours ,
641629 )
642630 return 0
@@ -1428,18 +1416,10 @@ def cmd_start(args):
14281416 # - Downloads Windows 11 Enterprise if not present
14291417 # - Boots QEMU VM
14301418 # - Runs WAA server automatically via FirstLogonCommands
1431- # QEMU resource allocation (--fast uses more resources on D8ds_v5)
1432- if getattr (args , "fast" , False ):
1433- ram_size = "16G"
1434- cpu_cores = 6
1435- log (
1436- "START" ,
1437- "Starting container with VERSION=11e (FAST mode: 6 cores, 16GB RAM)..." ,
1438- )
1439- else :
1440- ram_size = "8G"
1441- cpu_cores = 4
1442- log ("START" , "Starting container with VERSION=11e..." )
1419+ # QEMU resource allocation — always use 16G/6 cores (D8ds_v5, 32GB host)
1420+ ram_size = "16G"
1421+ cpu_cores = 6
1422+ log ("START" , "Starting container with VERSION=11e (6 cores, 16GB RAM)..." )
14431423
14441424 # Get agent and model from args (defaults match WAA defaults)
14451425 getattr (args , "agent" , "navi" )
@@ -1568,11 +1548,10 @@ def cmd_test_golden_image(args):
15681548 # Start container from golden image (NOT fresh)
15691549 log ("TEST" , "Starting container from golden image..." )
15701550
1571- # Use fast mode for quicker boot
1572- ram_size = "16G" if args .fast else "8G"
1573- cpu_cores = 6 if args .fast else 4
1574- mode_str = "FAST mode" if args .fast else "standard mode"
1575- log ("TEST" , f" Using { mode_str } : { cpu_cores } cores, { ram_size } RAM" )
1551+ # 16GB RAM / 6 cores for D8ds_v5 VM
1552+ ram_size = "16G"
1553+ cpu_cores = 6
1554+ log ("TEST" , f" Using { cpu_cores } cores, { ram_size } RAM" )
15761555
15771556 docker_cmd = f"""docker run -d \\
15781557 --name winarena \\
@@ -1923,7 +1902,6 @@ class FakeArgs:
19231902 log ("TEST-ALL" , "-" * 30 )
19241903
19251904 class FakeArgs2 :
1926- fast = getattr (args , "fast" , False )
19271905 timeout = 120
19281906
19291907 results ["golden_image" ] = cmd_test_golden_image (FakeArgs2 ()) == 0
@@ -2091,8 +2069,10 @@ def cmd_run(args):
20912069 task_info .append (f"task={ task } " )
20922070 elif domain != "all" :
20932071 task_info .append (f"domain={ domain } " )
2094- else :
2072+ elif args . num_tasks :
20952073 task_info .append (f"{ args .num_tasks } task(s)" )
2074+ else :
2075+ task_info .append ("all tasks" )
20962076
20972077 log ("RUN" , f"Starting benchmark: { ', ' .join (task_info )} , model={ model } " )
20982078
@@ -2127,7 +2107,7 @@ def cmd_run(args):
21272107print(f"ERROR: Task {{task_id}} not found in test_all.json")
21282108sys.exit(1)
21292109FINDEOF
2130- python3 /tmp/find_task.py"""
2110+ python3 /tmp/find_task.py && """
21312111 run_args .append ("--test_all_meta_path evaluation_examples_windows/test_custom.json" )
21322112 pre_cmd = create_custom_test_cmd
21332113 elif args .num_tasks and args .num_tasks < 154 :
@@ -4341,13 +4321,11 @@ def cmd_run_azure_ml_auto(args):
43414321 probe_timeout = getattr (args , "probe_timeout" , 1800 ) # 30 min for WAA server
43424322 skip_upload = getattr (args , "skip_upload" , False )
43434323 skip_benchmark = getattr (args , "skip_benchmark" , False )
4344- fast_vm = getattr (args , "fast" , False )
43454324
43464325 log ("AUTO" , "Configuration:" )
43474326 log ("AUTO" , f" Workers: { num_workers } " )
43484327 log ("AUTO" , f" Setup timeout: { timeout_minutes } min" )
43494328 log ("AUTO" , f" Probe timeout: { probe_timeout } sec" )
4350- log ("AUTO" , f" Fast VM: { fast_vm } " )
43514329 log ("AUTO" , "" )
43524330
43534331 # =========================================================================
@@ -4390,7 +4368,6 @@ def cmd_run_azure_ml_auto(args):
43904368
43914369 # Build args for cmd_create
43924370 class CreateArgs :
4393- fast = fast_vm
43944371 workers = 1
43954372
43964373 result = cmd_create (CreateArgs ())
@@ -4447,8 +4424,8 @@ class CreateArgs:
44474424 ssh_run (ip , "docker stop winarena 2>/dev/null; docker rm -f winarena 2>/dev/null" )
44484425
44494426 # Start container with VERSION=11e
4450- ram_size = "16G" if fast_vm else "8G"
4451- cpu_cores = 6 if fast_vm else 4
4427+ ram_size = "16G"
4428+ cpu_cores = 6
44524429
44534430 docker_cmd = f"""docker run -d \\
44544431 --name winarena \\
@@ -7340,11 +7317,6 @@ def main():
73407317
73417318 # create
73427319 p_create = subparsers .add_parser ("create" , help = "Create Azure VM" )
7343- p_create .add_argument (
7344- "--fast" ,
7345- action = "store_true" ,
7346- help = "Use larger VM (D8ds_v5, $0.38/hr) for ~30%% faster install, ~40%% faster eval" ,
7347- )
73487320 p_create .add_argument (
73497321 "--workers" ,
73507322 type = int ,
@@ -7390,15 +7362,6 @@ def main():
73907362 default = 3 ,
73917363 help = "Number of worker VMs to create (default: 3)" ,
73927364 )
7393- p_pool_create .add_argument (
7394- "--fast" ,
7395- action = "store_true" ,
7396- default = True ,
7397- help = "Use D8 (8 vCPU) VMs for faster evaluation (default: True)" ,
7398- )
7399- p_pool_create .add_argument (
7400- "--standard" , action = "store_true" , help = "Use D4 (4 vCPU) VMs to save costs"
7401- )
74027365 p_pool_create .add_argument (
74037366 "--auto-shutdown-hours" ,
74047367 type = int ,
@@ -7527,11 +7490,6 @@ def main():
75277490 "--fresh" , action = "store_true" , help = "Clean storage for fresh Windows install"
75287491 )
75297492 p_start .add_argument ("--no-vnc" , action = "store_true" , help = "Don't auto-launch VNC viewer" )
7530- p_start .add_argument (
7531- "--fast" ,
7532- action = "store_true" ,
7533- help = "Allocate more CPU/RAM to QEMU (use with D8ds_v5 VM)" ,
7534- )
75357493 p_start .set_defaults (func = cmd_start )
75367494
75377495 # stop
@@ -7543,7 +7501,7 @@ def main():
75437501 p_probe = subparsers .add_parser ("probe" , help = "Check if WAA server is ready" )
75447502 p_probe .add_argument ("--wait" , action = "store_true" , help = "Wait until ready" )
75457503 p_probe .add_argument (
7546- "--timeout" , type = int , default = 1200 , help = "Timeout in seconds (default: 1200 )"
7504+ "--timeout" , type = int , default = 1800 , help = "Timeout in seconds (default: 1800 )"
75477505 )
75487506 p_probe .set_defaults (func = cmd_probe )
75497507
@@ -7557,11 +7515,6 @@ def main():
75577515 default = 180 ,
75587516 help = "Max wait time in seconds (default: 180)" ,
75597517 )
7560- p_test_golden .add_argument (
7561- "--fast" ,
7562- action = "store_true" ,
7563- help = "Use more CPU/RAM for faster boot (requires D8ds_v5 VM)" ,
7564- )
75657518 p_test_golden .set_defaults (func = cmd_test_golden_image )
75667519
75677520 # test-blob-access
@@ -7584,20 +7537,15 @@ def main():
75847537 "test-all" , help = "Run all pre-flight tests before Azure ML benchmark"
75857538 )
75867539 p_test_all .add_argument ("--api-key" , help = "OpenAI API key (or set OPENAI_API_KEY in .env)" )
7587- p_test_all .add_argument (
7588- "--fast" ,
7589- action = "store_true" ,
7590- help = "Use more CPU/RAM for faster boot (requires D8ds_v5 VM)" ,
7591- )
75927540 p_test_all .set_defaults (func = cmd_test_all )
75937541
75947542 # run
75957543 p_run = subparsers .add_parser ("run" , help = "Run benchmark tasks (uses vanilla WAA navi agent)" )
75967544 p_run .add_argument (
75977545 "--num-tasks" ,
75987546 type = int ,
7599- default = 1 ,
7600- help = "Number of tasks to run (ignored if --task specified)" ,
7547+ default = None ,
7548+ help = "Number of tasks to run (default: all; ignored if --task specified)" ,
76017549 )
76027550 p_run .add_argument ("--task" , help = "Specific task ID to run" )
76037551 p_run .add_argument (
@@ -7812,11 +7760,6 @@ def main():
78127760 default = 1800 ,
78137761 help = "WAA server probe timeout in seconds (default: 1800 = 30 min)" ,
78147762 )
7815- p_azure_ml_auto .add_argument (
7816- "--fast" ,
7817- action = "store_true" ,
7818- help = "Use larger VM (D8ds_v5, $0.38/hr) for faster setup" ,
7819- )
78207763 p_azure_ml_auto .add_argument (
78217764 "--skip-upload" ,
78227765 action = "store_true" ,
0 commit comments