@@ -1132,25 +1132,47 @@ def add_macro(
11321132 macro : parser .Macro , instructions : dict [str , Instruction ], uops : dict [str , Uop ]
11331133) -> None :
11341134 parts : list [Part ] = []
1135- first = True
1135+ # Counts only real OpName entries (not CacheEffect/flush) so we
1136+ # know the exact position of each concrete uop inside the macro.
1137+ # CacheEffect → becomes Skip; flush → becomes Flush.
1138+ # Neither increments uop_index because neither is a "real" uop.
1139+ uop_index = 0
11361140 for part in macro .uops :
11371141 match part :
11381142 case parser .OpName ():
11391143 if part .name == "flush" :
1144+ # flush is structural, not a real uop; leave uop_index alone.
11401145 parts .append (Flush ())
11411146 else :
11421147 if part .name not in uops :
11431148 raise analysis_error (
11441149 f"No Uop named { part .name } " , macro .tokens [0 ]
11451150 )
11461151 uop = uops [part .name ]
1147- if uop .properties .records_value and not first :
1148- raise analysis_error (
1149- f"Recording uop { part .name } must be first in macro" ,
1150- macro .tokens [0 ])
1152+ if uop .properties .records_value :
1153+ # A recording uop is legal in exactly two positions:
1154+ # 1. It is the very first real uop (uop_index == 0).
1155+ # 2. It is at index 1 AND the immediately preceding
1156+ # real uop is a specializing uop, identified by
1157+ # the "_SPECIALIZE_" name prefix.
1158+ # (Specializing uops are Tier-1-only; recording
1159+ # uops are Tier-2-only — they are orthogonal at
1160+ # runtime, so this ordering is safe.)
1161+ preceding_is_specializing = (
1162+ uop_index == 1
1163+ and isinstance (parts [- 1 ], Uop )
1164+ and parts [- 1 ].name .startswith ("_SPECIALIZE_" )
1165+ )
1166+ if uop_index != 0 and not preceding_is_specializing :
1167+ raise analysis_error (
1168+ f"Recording uop { part .name } must be first in macro "
1169+ f"or immediately follow a specializing uop" ,
1170+ macro .tokens [0 ])
11511171 parts .append (uop )
1152- first = False
1172+ uop_index += 1
11531173 case parser .CacheEffect ():
1174+ # Cache-entry skips are structural; they do not occupy a uop
1175+ # slot, so uop_index is not incremented.
11541176 parts .append (Skip (part .size ))
11551177 case _:
11561178 assert False
0 commit comments