@@ -1132,26 +1132,42 @@ def add_macro(
11321132 macro : parser .Macro , instructions : dict [str , Instruction ], uops : dict [str , Uop ]
11331133) -> None :
11341134 parts : list [Part ] = []
1135- first = True
1135+ # True when the next uop is allowed to be a recording uop.
1136+ # Starts True (position 0 is always valid), stays True after a
1137+ # specializing (tier-1) uop because specializing uops only exist in
1138+ # Tier 1 and recording uops only exist in Tier 2 — they are orthogonal
1139+ # at runtime, so no conflict arises from that ordering.
1140+ valid_recording_pos = True
11361141 for part in macro .uops :
11371142 match part :
11381143 case parser .OpName ():
11391144 if part .name == "flush" :
11401145 parts .append (Flush ())
1146+ # A flush does not alter the recording-position state;
1147+ # treat it as transparent.
11411148 else :
11421149 if part .name not in uops :
11431150 raise analysis_error (
11441151 f"No Uop named { part .name } " , macro .tokens [0 ]
11451152 )
11461153 uop = uops [part .name ]
1147- if uop .properties .records_value and not first :
1154+ if uop .properties .records_value and not valid_recording_pos :
11481155 raise analysis_error (
1149- f"Recording uop { part .name } must be first in macro" ,
1156+ f"Recording uop { part .name } must be first in macro "
1157+ f"or immediately follow a specializing uop" ,
11501158 macro .tokens [0 ])
11511159 parts .append (uop )
1152- first = False
1160+ # A specializing uop (tier == 1) keeps the gate open so
1161+ # that a recording uop may follow it. Any other concrete
1162+ # uop closes the gate.
1163+ if uop .properties .tier == 1 :
1164+ valid_recording_pos = True
1165+ else :
1166+ valid_recording_pos = False
11531167 case parser .CacheEffect ():
11541168 parts .append (Skip (part .size ))
1169+ # Cache-entry skips are transparent to the recording-position
1170+ # state (they carry no runtime semantics).
11551171 case _:
11561172 assert False
11571173 assert parts
0 commit comments