perf: add type identity fast-paths for str/list/tuple/dict in comparator

KRRT7 · KRRT7 · commit fe39d40e1bec · 2026-04-10T01:25:05.000-05:00
Move the 4 most common return-value types (str, list/tuple, dict) to `orig_type is T` identity checks at the top of the dispatch chain, before the frozenset lookup. A single pointer comparison is cheaper than a frozenset hash, and these types need special handling anyway (temp-path normalization, recursive comparison, superset support). Before: dict traversed ~8 isinstance checks before being handled. After: dict is handled at check #3 via `orig_type is dict`. The isinstance fallbacks remain as slow-paths for subclasses (deque, ChainMap, defaultdict, scipy dok_matrix, etc.). Backported from codeflash-python dispatch ordering.
diff --git a/codeflash/verification/comparator.py b/codeflash/verification/comparator.py
@@ -210,23 +210,49 @@ def comparator(orig: Any, new: Any, superset_obj: bool = False) -> bool:
             # distinct type objects are created at runtime, even if the class code is exactly the same, so we can only compare the names
             if orig_type.__name__ != type(new).__name__ or orig_type.__qualname__ != type(new).__qualname__:
                 return False
-        # Fast-path: O(1) frozenset lookup for common types (avoids isinstance MRO traversal)
-        if orig_type in _IDENTITY_EQ_TYPES:
-            return orig == new
+
+        # Fast-path: type identity checks for the most common return-value types.
+        # `orig_type is T` is a single pointer comparison — cheaper than frozenset hash
+        # lookup or isinstance MRO traversal — and these 4 types dominate real workloads.
+        if orig_type is str:
+            if orig == new:
+                return True
+            if _is_temp_path(orig) and _is_temp_path(new):
+                return _normalize_temp_path(orig) == _normalize_temp_path(new)
+            return False
+        if orig_type is list or orig_type is tuple:
+            if len(orig) != len(new):
+                return False
+            return all(comparator(elem1, elem2, superset_obj) for elem1, elem2 in zip(orig, new))
+        if orig_type is dict:
+            if superset_obj:
+                return all(k in new and comparator(v, new[k], superset_obj) for k, v in orig.items())
+            if len(orig) != len(new):
+                return False
+            for key in orig:
+                if key not in new:
+                    return False
+                if not comparator(orig[key], new[key], superset_obj):
+                    return False
+            return True
         if orig_type is float:
             if math.isnan(orig) and math.isnan(new):
                 return True
             return math.isclose(orig, new)
+        # O(1) frozenset lookup for remaining common types (int, bool, None, Decimal, etc.)
+        if orig_type in _IDENTITY_EQ_TYPES:
+            return orig == new
+
+        # Slower isinstance path for subclasses (deque, ChainMap, etc.)
         if isinstance(orig, (list, tuple, deque, ChainMap)):
             if len(orig) != len(new):
                 return False
             return all(comparator(elem1, elem2, superset_obj) for elem1, elem2 in zip(orig, new))
 
-        # Handle strings separately to normalize temp paths
+        # Handle string subclasses separately to normalize temp paths
         if isinstance(orig, str):
             if orig == new:
                 return True
-            # If strings differ, check if they're temp paths that differ only in session number
             if _is_temp_path(orig) and _is_temp_path(new):
                 return _normalize_temp_path(orig) == _normalize_temp_path(new)
             return False