@@ -1076,6 +1076,25 @@ class Function : public Type {
10761076
10771077 This transformation just figures out what the dotting sequences are.
10781078 */
1079+ // Helper: decode a bytecode instruction's name index from co_names.
1080+ // In Python 3.11+, LOAD_GLOBAL encodes (namei << 1 | push_null) in the arg,
1081+ // so we need to shift right by 1 to get the actual name index.
1082+ // LOAD_ATTR in 3.11 does NOT shift; in 3.12+ it does (namei << 1 | is_method).
1083+ static int decodeNameIndex (uint8_t opcode, int arg) {
1084+ #if PY_VERSION_HEX >= 0x030c0000
1085+ // Python 3.12+: both LOAD_GLOBAL and LOAD_ATTR encode namei << 1
1086+ if (opcode == 116 || opcode == 106 ) { // LOAD_GLOBAL or LOAD_ATTR
1087+ return arg >> 1 ;
1088+ }
1089+ #elif PY_VERSION_HEX >= 0x030b0000
1090+ // Python 3.11: only LOAD_GLOBAL encodes namei << 1
1091+ if (opcode == 116 ) { // LOAD_GLOBAL
1092+ return arg >> 1 ;
1093+ }
1094+ #endif
1095+ return arg;
1096+ }
1097+
10791098 static void extractDottedGlobalAccessesFromCode (PyCodeObject* code, std::vector<std::vector<PyObject*> >& outSequences) {
10801099 uint8_t * bytes;
10811100 Py_ssize_t bytecount;
@@ -1090,40 +1109,42 @@ class Function : public Type {
10901109 PyObject* names = PyCompat::codeGetNames (code);
10911110 PyCompat::NewRefIf311 namesGuard (names);
10921111
1093- long opcodeCount = bytecount / 2 ;
1094-
1095- // opcodes are encoded in the low byte
1096- auto opcodeFor = [&](int i) { return bytes[i * 2 ]; };
1112+ long wordCount = bytecount / 2 ;
10971113
1098- // opcode targets are encoded in the high byte
1099- auto opcodeTargetFor = [&](int i) { return bytes[i * 2 + 1 ]; };
1114+ // Each instruction word: low byte = opcode, high byte = arg
1115+ auto opcodeFor = [&](long i) -> uint8_t { return bytes[i * 2 ]; };
1116+ auto rawArgFor = [&](long i) -> uint8_t { return bytes[i * 2 + 1 ]; };
11001117
1118+ const uint8_t CACHE = 0 ;
11011119 const uint8_t LOAD_ATTR = 106 ;
11021120 const uint8_t LOAD_GLOBAL = 116 ;
11031121 const uint8_t DELETE_GLOBAL = 98 ;
11041122 const uint8_t STORE_GLOBAL = 97 ;
11051123 const uint8_t LOAD_METHOD = 160 ;
11061124
1107-
11081125 std::vector<PyObject*> curDotSequence;
1109- for (long ix = 0 ; ix < opcodeCount; ix++) {
1126+ for (long ix = 0 ; ix < wordCount; ix++) {
1127+ uint8_t op = opcodeFor (ix);
1128+
1129+ // Skip CACHE entries (opcode 0) introduced in Python 3.11+
1130+ if (op == CACHE) continue ;
1131+
1132+ int nameIdx = decodeNameIndex (op, rawArgFor (ix));
1133+
11101134 // if we're loading an attr on an existing sequence, just make it bigger
1111- if ((opcodeFor (ix) == LOAD_ATTR || opcodeFor (ix) == LOAD_METHOD) && curDotSequence.size ()) {
1112- curDotSequence.push_back (PyTuple_GetItem (names, opcodeTargetFor (ix) ));
1135+ if ((op == LOAD_ATTR || op == LOAD_METHOD) && curDotSequence.size ()) {
1136+ curDotSequence.push_back (PyTuple_GetItem (names, nameIdx ));
11131137 } else if (curDotSequence.size ()) {
11141138 // any other operation should flush the buffer
11151139 outSequences.push_back (curDotSequence);
11161140 curDotSequence.clear ();
11171141 }
11181142
11191143 // if we're loading a global, we start a new sequence
1120- if (opcodeFor (ix) == LOAD_GLOBAL) {
1121- curDotSequence.push_back (PyTuple_GetItem (names, opcodeTargetFor (ix)));
1122- } else if (
1123- opcodeFor (ix) == STORE_GLOBAL
1124- || opcodeFor (ix) == DELETE_GLOBAL
1125- ) {
1126- outSequences.push_back ({PyTuple_GetItem (names, opcodeTargetFor (ix))});
1144+ if (op == LOAD_GLOBAL) {
1145+ curDotSequence.push_back (PyTuple_GetItem (names, nameIdx));
1146+ } else if (op == STORE_GLOBAL || op == DELETE_GLOBAL) {
1147+ outSequences.push_back ({PyTuple_GetItem (names, nameIdx)});
11271148 }
11281149 }
11291150
@@ -1153,31 +1174,23 @@ class Function : public Type {
11531174 PyObject* names = PyCompat::codeGetNames (code);
11541175 PyCompat::NewRefIf311 namesGuard (names);
11551176
1156- long opcodeCount = bytecount / 2 ;
1157-
1158- // opcodes are encoded in the low byte
1159- auto opcodeFor = [&](int i) { return bytes[i * 2 ]; };
1177+ long wordCount = bytecount / 2 ;
11601178
1161- // opcode targets are encoded in the high byte
1162- auto opcodeTargetFor = [&](int i) { return bytes[i * 2 + 1 ]; };
1179+ auto opcodeFor = [&]( long i) -> uint8_t { return bytes[i * 2 ]; };
1180+ auto rawArgFor = [&](long i) -> uint8_t { return bytes[i * 2 + 1 ]; };
11631181
1182+ const uint8_t CACHE = 0 ;
11641183 const uint8_t LOAD_GLOBAL = 116 ;
11651184 const uint8_t DELETE_GLOBAL = 98 ;
11661185 const uint8_t STORE_GLOBAL = 97 ;
11671186
1168- for (long ix = 0 ; ix < opcodeCount; ix++) {
1169- // if we're loading a global, we start a new sequence
1170- if (opcodeFor (ix) == LOAD_GLOBAL) {
1171- PyObject* name = PyTuple_GetItem (names, opcodeTargetFor (ix));
1172- if (!PyUnicode_Check (name)) {
1173- throw std::runtime_error (" Function had a non-string object in co_names" );
1174- }
1175- outAccesses.insert (PyUnicode_AsUTF8 (name));
1176- } else if (
1177- opcodeFor (ix) == STORE_GLOBAL
1178- || opcodeFor (ix) == DELETE_GLOBAL
1179- ) {
1180- PyObject* name = PyTuple_GetItem (names, opcodeTargetFor (ix));
1187+ for (long ix = 0 ; ix < wordCount; ix++) {
1188+ uint8_t op = opcodeFor (ix);
1189+ if (op == CACHE) continue ;
1190+
1191+ if (op == LOAD_GLOBAL || op == STORE_GLOBAL || op == DELETE_GLOBAL) {
1192+ int nameIdx = decodeNameIndex (op, rawArgFor (ix));
1193+ PyObject* name = PyTuple_GetItem (names, nameIdx);
11811194 if (!PyUnicode_Check (name)) {
11821195 throw std::runtime_error (" Function had a non-string object in co_names" );
11831196 }
0 commit comments