Skip to content

Commit fe39d40

Browse files
committed
perf: add type identity fast-paths for str/list/tuple/dict in comparator
Move the 4 most common return-value types (str, list/tuple, dict) to `orig_type is T` identity checks at the top of the dispatch chain, before the frozenset lookup. A single pointer comparison is cheaper than a frozenset hash, and these types need special handling anyway (temp-path normalization, recursive comparison, superset support). Before: dict traversed ~8 isinstance checks before being handled. After: dict is handled at check #3 via `orig_type is dict`. The isinstance fallbacks remain as slow-paths for subclasses (deque, ChainMap, defaultdict, scipy dok_matrix, etc.). Backported from codeflash-python dispatch ordering.
1 parent 5a5b6e4 commit fe39d40

1 file changed

Lines changed: 31 additions & 5 deletions

File tree

codeflash/verification/comparator.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -210,23 +210,49 @@ def comparator(orig: Any, new: Any, superset_obj: bool = False) -> bool:
210210
# distinct type objects are created at runtime, even if the class code is exactly the same, so we can only compare the names
211211
if orig_type.__name__ != type(new).__name__ or orig_type.__qualname__ != type(new).__qualname__:
212212
return False
213-
# Fast-path: O(1) frozenset lookup for common types (avoids isinstance MRO traversal)
214-
if orig_type in _IDENTITY_EQ_TYPES:
215-
return orig == new
213+
214+
# Fast-path: type identity checks for the most common return-value types.
215+
# `orig_type is T` is a single pointer comparison — cheaper than frozenset hash
216+
# lookup or isinstance MRO traversal — and these 4 types dominate real workloads.
217+
if orig_type is str:
218+
if orig == new:
219+
return True
220+
if _is_temp_path(orig) and _is_temp_path(new):
221+
return _normalize_temp_path(orig) == _normalize_temp_path(new)
222+
return False
223+
if orig_type is list or orig_type is tuple:
224+
if len(orig) != len(new):
225+
return False
226+
return all(comparator(elem1, elem2, superset_obj) for elem1, elem2 in zip(orig, new))
227+
if orig_type is dict:
228+
if superset_obj:
229+
return all(k in new and comparator(v, new[k], superset_obj) for k, v in orig.items())
230+
if len(orig) != len(new):
231+
return False
232+
for key in orig:
233+
if key not in new:
234+
return False
235+
if not comparator(orig[key], new[key], superset_obj):
236+
return False
237+
return True
216238
if orig_type is float:
217239
if math.isnan(orig) and math.isnan(new):
218240
return True
219241
return math.isclose(orig, new)
242+
# O(1) frozenset lookup for remaining common types (int, bool, None, Decimal, etc.)
243+
if orig_type in _IDENTITY_EQ_TYPES:
244+
return orig == new
245+
246+
# Slower isinstance path for subclasses (deque, ChainMap, etc.)
220247
if isinstance(orig, (list, tuple, deque, ChainMap)):
221248
if len(orig) != len(new):
222249
return False
223250
return all(comparator(elem1, elem2, superset_obj) for elem1, elem2 in zip(orig, new))
224251

225-
# Handle strings separately to normalize temp paths
252+
# Handle string subclasses separately to normalize temp paths
226253
if isinstance(orig, str):
227254
if orig == new:
228255
return True
229-
# If strings differ, check if they're temp paths that differ only in session number
230256
if _is_temp_path(orig) and _is_temp_path(new):
231257
return _normalize_temp_path(orig) == _normalize_temp_path(new)
232258
return False

0 commit comments

Comments
 (0)