Skip to content

Commit 2d53fe2

Browse files
committed
training-platform core: collapse duplicate tiled/non-tiled training branches in generate_network
The training path in execution.generate_network had two sibling branches (config.training and config.tiling / config.training) that were 90% identical: - same Step 1 (run training codegen script with --n-steps / --n-accum / --num-data-inputs / -v / --debug / gen_args) - same training_meta.json read-back - same Step 2 optimizer loop with passthrough args and --defaultMemLevel default The only real differences were the two script names (testMVPTraining.py vs generateTrainingNetwork.py and the corresponding optimizer pair), a 4-entry vs 8-entry passthrough list, and the "Tiled training" vs "Training" error-message prefix. Collapse into a single `if config.training:` branch that selects the three variants up front and reuses one body. The two inference branches (`elif config.tiling:` and `else:`) are left untouched. Verified on Siracusa: simplemlp_train passes 0/4 (diff=0.000000 at every step) in both non-tiled and tiled runs.
1 parent 55c91d0 commit 2d53fe2

1 file changed

Lines changed: 22 additions & 93 deletions

File tree

DeeployTest/testUtils/core/execution.py

Lines changed: 22 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -49,117 +49,49 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
4949

5050
script_dir = Path(__file__).parent.parent.parent
5151

52-
if config.training and config.tiling:
53-
# --- Tiled training: testMVPTraining.py (tiling pipeline + training init) ---
54-
generation_script = script_dir / "testMVPTraining.py"
55-
cmd = [
56-
sys.executable,
57-
str(generation_script),
58-
"-d",
59-
config.gen_dir,
60-
"-t",
61-
config.test_dir,
62-
"-p",
63-
config.platform,
64-
]
65-
if config.n_train_steps is not None:
66-
cmd.append(f"--n-steps={config.n_train_steps}")
67-
if config.n_accum_steps is not None:
68-
cmd.append(f"--n-accum={config.n_accum_steps}")
69-
if config.training_num_data_inputs is not None:
70-
cmd.append(f"--num-data-inputs={config.training_num_data_inputs}")
71-
if config.verbose > 0:
72-
cmd.append("-" + "v" * config.verbose)
73-
if config.debug:
74-
cmd.append("--debug")
75-
cmd.extend(config.gen_args)
76-
77-
log.debug(f"[Execution] Tiled training generation command: {' '.join(cmd)}")
78-
result = subprocess.run(cmd, check = False)
79-
if result.returncode != 0:
80-
raise RuntimeError(f"Tiled training network generation failed for {config.test_name}")
81-
82-
# Read back auto-detected values written by testMVPTraining.py
83-
meta_path = Path(config.gen_dir) / "training_meta.json"
84-
if meta_path.exists():
85-
with open(meta_path) as f:
86-
meta = json.load(f)
87-
config.n_train_steps = meta["n_train_steps"]
88-
config.n_accum_steps = meta["n_accum_steps"]
89-
config.training_num_data_inputs = meta["training_num_data_inputs"]
90-
log.info(f"[Execution] Training meta: {meta}")
91-
92-
# --- Step 2: Tiled optimizer network (SGD via testMVPOptimizer.py) ---
93-
opt_dir = _resolve_optimizer_dir(config)
94-
opt_script = script_dir / "testMVPOptimizer.py"
95-
96-
if not Path(opt_dir).exists():
97-
log.warning(f"Optimizer directory not found: {opt_dir} — skipping optimizer codegen")
98-
elif not opt_script.exists():
99-
log.warning(f"testMVPOptimizer.py not found — skipping optimizer codegen")
52+
if config.training:
53+
if config.tiling:
54+
training_script = script_dir / "testMVPTraining.py"
55+
optimizer_script = script_dir / "testMVPOptimizer.py"
56+
opt_passthrough = ("--cores", "--l1", "--l2", "--defaultMemLevel", "--memAllocStrategy",
57+
"--searchStrategy", "--plotMemAlloc", "--profileTiling")
58+
stage = "Tiled training"
10059
else:
101-
opt_cmd = [
102-
sys.executable,
103-
str(opt_script),
104-
"-d",
105-
config.gen_dir,
106-
"-t",
107-
opt_dir,
108-
"-p",
109-
config.platform,
110-
f"--training-dir={config.test_dir}",
111-
]
112-
_OPT_PASSTHROUGH = ("--cores", "--l1", "--l2", "--defaultMemLevel", "--memAllocStrategy",
113-
"--searchStrategy", "--plotMemAlloc", "--profileTiling")
114-
for arg in config.gen_args:
115-
if any(arg.startswith(p) for p in _OPT_PASSTHROUGH):
116-
opt_cmd.append(arg)
117-
# If no --defaultMemLevel was passed through, default to L2
118-
if not any(arg.startswith("--defaultMemLevel") for arg in opt_cmd):
119-
opt_cmd.append("--defaultMemLevel=L2")
120-
if config.verbose > 0:
121-
opt_cmd.append("-" + "v" * config.verbose)
60+
training_script = script_dir / "generateTrainingNetwork.py"
61+
optimizer_script = script_dir / "generateOptimizerNetwork.py"
62+
opt_passthrough = ("--cores", "--l1", "--l2", "--defaultMemLevel")
63+
stage = "Training"
12264

123-
log.debug(f"[Execution] Tiled optimizer generation command: {' '.join(opt_cmd)}")
124-
result = subprocess.run(opt_cmd, check = False)
125-
if result.returncode != 0:
126-
raise RuntimeError(f"Tiled optimizer network generation failed for {config.test_name}")
127-
128-
return # early return — tiled training path complete
129-
130-
elif config.training:
13165
# --- Step 1: Training network (forward + backward + accumulation) ---
132-
generation_script = script_dir / "generateTrainingNetwork.py"
13366
cmd = [
13467
sys.executable,
135-
str(generation_script),
68+
str(training_script),
13669
"-d",
13770
config.gen_dir,
13871
"-t",
13972
config.test_dir,
14073
"-p",
14174
config.platform,
14275
]
143-
# Only pass values when explicitly set; otherwise let the script auto-detect
76+
# Only pass values when explicitly set; otherwise let the script auto-detect.
14477
if config.n_train_steps is not None:
14578
cmd.append(f"--n-steps={config.n_train_steps}")
14679
if config.n_accum_steps is not None:
14780
cmd.append(f"--n-accum={config.n_accum_steps}")
14881
if config.training_num_data_inputs is not None:
14982
cmd.append(f"--num-data-inputs={config.training_num_data_inputs}")
150-
15183
if config.verbose > 0:
15284
cmd.append("-" + "v" * config.verbose)
15385
if config.debug:
15486
cmd.append("--debug")
15587
cmd.extend(config.gen_args)
15688

157-
log.debug(f"[Execution] Training generation command: {' '.join(cmd)}")
89+
log.debug(f"[Execution] {stage} generation command: {' '.join(cmd)}")
15890
result = subprocess.run(cmd, check = False)
15991
if result.returncode != 0:
160-
raise RuntimeError(f"Training network generation failed for {config.test_name}")
92+
raise RuntimeError(f"{stage} network generation failed for {config.test_name}")
16193

162-
# Read back auto-detected values written by generateTrainingNetwork.py
94+
# Read back auto-detected values written by the training generation script.
16395
meta_path = Path(config.gen_dir) / "training_meta.json"
16496
if meta_path.exists():
16597
with open(meta_path) as f:
@@ -171,16 +103,14 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
171103

172104
# --- Step 2: Optimizer network (SGD) ---
173105
opt_dir = _resolve_optimizer_dir(config)
174-
opt_script = script_dir / "generateOptimizerNetwork.py"
175-
176106
if not Path(opt_dir).exists():
177107
log.warning(f"Optimizer directory not found: {opt_dir} — skipping optimizer codegen")
178-
elif not opt_script.exists():
179-
log.warning(f"generateOptimizerNetwork.py not found — skipping optimizer codegen")
108+
elif not optimizer_script.exists():
109+
log.warning(f"{optimizer_script.name} not found — skipping optimizer codegen")
180110
else:
181111
opt_cmd = [
182112
sys.executable,
183-
str(opt_script),
113+
str(optimizer_script),
184114
"-d",
185115
config.gen_dir,
186116
"-t",
@@ -189,19 +119,18 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
189119
config.platform,
190120
f"--training-dir={config.test_dir}",
191121
]
192-
_OPT_PASSTHROUGH = ("--cores", "--l1", "--l2", "--defaultMemLevel")
193122
for arg in config.gen_args:
194-
if any(arg.startswith(p) for p in _OPT_PASSTHROUGH):
123+
if any(arg.startswith(p) for p in opt_passthrough):
195124
opt_cmd.append(arg)
196125
if not any(arg.startswith("--defaultMemLevel") for arg in opt_cmd):
197126
opt_cmd.append("--defaultMemLevel=L2")
198127
if config.verbose > 0:
199128
opt_cmd.append("-" + "v" * config.verbose)
200129

201-
log.debug(f"[Execution] Optimizer generation command: {' '.join(opt_cmd)}")
130+
log.debug(f"[Execution] {stage} optimizer generation command: {' '.join(opt_cmd)}")
202131
result = subprocess.run(opt_cmd, check = False)
203132
if result.returncode != 0:
204-
raise RuntimeError(f"Optimizer network generation failed for {config.test_name}")
133+
raise RuntimeError(f"{stage} optimizer network generation failed for {config.test_name}")
205134

206135
return # early return — training path complete
207136

0 commit comments

Comments
 (0)