Skip to content

Commit 833b796

Browse files
hackwaanurag
andauthored
sync internal (#6)
* sync internal Signed-off-by: anurag <anurag@amd.com> * fix elf Signed-off-by: anurag <anurag@amd.com> --------- Signed-off-by: anurag <anurag@amd.com> Co-authored-by: anurag <anurag@amd.com>
1 parent 414f3e1 commit 833b796

9 files changed

Lines changed: 70 additions & 19 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ src/cpp/build
1414
AIEDebugLibrary.dll
1515
*.lst
1616
*.Identifier
17+
ext/tests

src/mldebug/aie_util.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,12 +243,16 @@ def _filter_tiles(self, tile_type):
243243

244244
def read_control_instr(self):
245245
"""
246-
Read and return the value of the control instruction from the memory tile spare register.
246+
Read and return the value of the SPARE_REG control instruction from all memory tiles.
247247
248248
Returns:
249-
int: Value from the SPARE_REG of memory tile (col=0, row=1).
249+
dict[str, int]: Mapping of "MEM_TILE_{col}" to the SPARE_REG value for each memory tile.
250250
"""
251-
return self.impl.read_register(0, 1, self.aie_iface.Memory_tile_registers["SPARE_REG"])
251+
spare_reg = self.aie_iface.Memory_tile_registers["SPARE_REG"]
252+
return {
253+
f"MEM_TILE_{c}": self.impl.read_register(c, r, spare_reg)
254+
for c, r in self._filter_tiles(self.aie_iface.MEM_TILE_T)
255+
}
252256

253257
def initialize_stamp(self):
254258
"""

src/mldebug/batch_runner.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
InteractiveController builds on this for interactive stepping.
1010
"""
1111

12+
import dataclasses
13+
import json
1214
import pathlib
1315
import sys
1416
import time
@@ -242,13 +244,14 @@ def schedule_layer_start(self, next_layer):
242244
be.continue_aie()
243245

244246
# Poll stamps until breakpoint is hit
245-
max_attempts = 1200
246-
while max_attempts > 0:
247-
if all(be.poll_core_status() for be in bes_to_poll):
248-
break
247+
timeout = 10
248+
start_time = time.time()
249+
while time.time() - start_time < timeout:
249250
if self.args.backend == "test":
250251
break
251-
max_attempts -= 1
252+
time.sleep(0.1)
253+
if all(be.poll_core_status() for be in bes_to_poll):
254+
break
252255

253256
# When combo events are used, it takes a few cycles to
254257
# hit the breakpoint, so pc might have moved
@@ -298,6 +301,7 @@ def _process_err(self):
298301
self.status_handle.get(p + "/" + "aie_status_error.txt")
299302
else:
300303
self.status_handle.get("aie_status_error.txt")
304+
self._write_run_summary("FAIL")
301305
sys.exit(1)
302306

303307
def _process_end_breakpoint(self, layer, it, sid):
@@ -341,6 +345,7 @@ def _process_start_breakpoint(self, layer, it, sid=0):
341345

342346
if self.args.exit_at_layer and layer.layer_order >= self.args.exit_at_layer:
343347
LOGGER.log(f"[INFO] Exiting debugger at Layer: {layer.layer_order}")
348+
self._write_run_summary("SUCCESS")
344349
sys.exit(0)
345350

346351
if self.args.run_flags.layer_status and first_it:
@@ -470,6 +475,7 @@ def execute_and_dump(self):
470475
self.impls[sid].continue_aie()
471476
LOGGER.log("\nFinished Execution")
472477
self._handle_fsp()
478+
self._write_run_summary("SUCCESS")
473479

474480
def _handle_fsp(self):
475481
"""Handle end-of-run logic for VAIML Failsafe Partition mode."""
@@ -487,3 +493,17 @@ def _handle_fsp(self):
487493
"to load the next Failsafe Partition and wait for "
488494
"`waiting for user input`. Then press Enter here."
489495
)
496+
497+
def _write_run_summary(self, status):
498+
"""
499+
Record run state to run_summary.json
500+
"""
501+
rsf = self.args.top_output_dir + "/run_summary.json"
502+
flags_dict = dataclasses.asdict(self.args.run_flags)
503+
summary = {"status": status, "run_flags": flags_dict}
504+
505+
try:
506+
with open(rsf, "w", encoding="utf-8") as fh:
507+
json.dump(summary, fh, indent=2, default=str)
508+
except (IOError, OSError) as e:
509+
print(f"Unable to write run summary file. {e}")

src/mldebug/client_debug.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,19 @@ def read_all_core_pc(self):
263263
print(f"\n=== Stamp {sid} Core PC ===")
264264
impl.read_all_core_pc()
265265

266+
def read_control_instr(self):
267+
"""
268+
Read the SPARE_REG control instruction from all memory tiles across all stamps.
269+
270+
Returns:
271+
dict[str, int]: Merged mapping of "MEM_TILE_{col}" to SPARE_REG value, aggregated
272+
from each per-stamp AIEUtil. Stamps own disjoint columns, so keys do not collide.
273+
"""
274+
result = {}
275+
for utl in self.aie_utls:
276+
result.update(utl.read_control_instr())
277+
return result
278+
266279
#
267280
# START Advanced Mode Specific functionality
268281
#

src/mldebug/input_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class RunFlags:
3636
mock_hang: bool
3737
dump_temps: bool
3838
multistamp: bool
39-
enable_tg: bool
39+
disable_tg: bool
4040

4141

4242
@dataclass
@@ -121,7 +121,7 @@ def get_flag(s, default=False):
121121
get_flag("mock_hang"),
122122
get_flag("dump_temps"),
123123
get_flag("multistamp"),
124-
get_flag("enable_tg", default=True)
124+
get_flag("disable_tg")
125125
)
126126

127127

src/mldebug/interactive_prompt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def _build_shell_namespace(self):
111111
rreg = h.impl.read_register
112112
preg = h.impl.print_register
113113
wreg = h.impl.write_register
114-
control_instr = h.aie_utls[0].read_control_instr
114+
control_instr = h.read_control_instr
115115
add_brkpt = h.add_breakpoint
116116
status = h.status_handle.get
117117
uc_status = h.status_handle.get_uc_status

src/mldebug/layer_info.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ def __init__(self, args):
466466
self.mladf_report = None
467467

468468
has_bi = args.buffer_info and Path(args.buffer_info).is_file()
469-
use_mladf = args.mladf_report and Path(args.mladf_report).is_file() and args.run_flags.enable_tg
469+
use_mladf = args.mladf_report and Path(args.mladf_report).is_file() and not args.run_flags.disable_tg
470470
data = None
471471
# 1. Parse the buffer info to get Layout
472472
if has_bi:
@@ -877,14 +877,18 @@ def _initialize_layers_from_workdir(self, args):
877877
stamp.end_pc = f.final_lock_release_pc
878878

879879
# Under right conditions, we don't even go through iterations
880-
if args.run_flags.skip_iter and args.run_flags.enable_tg:
880+
if args.run_flags.skip_iter:
881881
for idx, layer in enumerate(self.layers):
882882
if idx >= len(self.layers) - 1:
883883
layer.lcp.num_iter = 1
884884
break
885885
next_layer_stamps = self.layers[idx+1].stamps
886-
if (layer.stamps[0].name != next_layer_stamps[0].name
887-
and len(layer.stamps) == len(next_layer_stamps)
888-
and all(layer.stamps[i].elf_name == next_layer_stamps[i].elf_name for i in range(len(layer.stamps)))
889-
):
886+
if args.run_flags.multistamp:
887+
if (layer.stamps[0].name != next_layer_stamps[0].name
888+
and len(layer.stamps) == len(next_layer_stamps)
889+
and all(layer.stamps[i].elf_name == next_layer_stamps[i].elf_name for i in range(len(layer.stamps)))
890+
):
891+
layer.lcp.num_iter = 1
892+
elif (layer.stamps[0].name != next_layer_stamps[0].name
893+
and layer.stamps[0].elf_name == next_layer_stamps[0].elf_name ):
890894
layer.lcp.num_iter = 1

src/mldebug/memory_dumper.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ def get_output_path(self, buffer=None, col=None, row=None, layer_order=None, bat
7373
self._dir_cache.add(p)
7474
return p
7575

76+
def get_base_output_dir(self):
77+
"""
78+
Get the base outputput directory. Used by run summary
79+
"""
80+
return self.output_dir
81+
7682
def write_data_to_file(self, data, fname):
7783
"""
7884
Write an array of data to file in text or binary format.

src/mldebug/mldebug_cli.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,14 @@ def debug(args, timestamp, subgraph_name=None, fsp="0", folder_name=None):
116116
print(f"Debugging New Failsafe Partition: {fsp}\n")
117117
output_dir = f"{folder_name}_{timestamp}/{subgraph_name}/{fsp}"
118118
args.subgraph_name = subgraph_name
119+
args.top_output_dir = f"{folder_name}_{timestamp}"
119120
else:
120121
output_dir = f"output_{time.strftime('%m%d%H%M%S')}"
122+
args.top_output_dir = output_dir
121123

122124
if args.output_dir is not None:
123125
output_dir = args.output_dir + "/" + output_dir
126+
args.top_output_dir = args.output_dir + "/" + args.top_output_dir
124127
launch_debug(args, output_dir)
125128

126129

@@ -329,7 +332,7 @@ def app():
329332
"skip_iter",
330333
"dump_temps",
331334
"multistamp",
332-
"enable_tg"
335+
"disable_tg"
333336
],
334337
help="Specify one or more runtime flags:\n"
335338
"skip_dump : Do not dump memory\n"
@@ -341,7 +344,7 @@ def app():
341344
"skip_iter : Skip iterations in batch mode when possible\n"
342345
#"dump_temps : Write intermediate (.lst) files to disk\n"
343346
"multistamp : Enable N Stamp/Batch mode\n",
344-
#"enable_tg : Enable Step to TG layers\n",
347+
#"disable_tg : Disable Step to TG layers\n",
345348
# 'mock_hang' : Simulate hang at one of the layers in test mode
346349
metavar="<flag1> <flag2>",
347350
)

0 commit comments

Comments
 (0)