|
| 1 | +import re |
| 2 | +from typing import List, Optional |
| 3 | + |
| 4 | +import lldb |
| 5 | + |
| 6 | +from .types.base import KOTLIN_CATEGORY |
| 7 | +from .types.summary import kotlin_objc_class_summary |
| 8 | +from .types.proxy import KonanObjcProxyTypeProvider |
| 9 | +from .cache import LLDBCache |
| 10 | +from .util.log import log |
| 11 | + |
| 12 | +# Matches the per-module Kotlin/Native initializer symbol, e.g. |
| 13 | +# `_Konan_init_MyModule` (frameworks) or `_Konan_init_MyApp_kexe` (executables). |
| 14 | +# The captured group is the module name used to build the type-name specifier. |
| 15 | +_KONAN_INIT_RE = re.compile(r'^_Konan_init_([0-9a-zA-Z_]+?)(_kexe)?$') |
| 16 | + |
| 17 | +# A fixed Kotlin/Native runtime symbol present in every Kotlin binary. Used as a |
| 18 | +# cheap exact-name probe (hashed symbol lookup) to decide whether a module is a |
| 19 | +# Kotlin module before we iterate its full symbol table. |
| 20 | +_KOTLIN_RUNTIME_MARKER = 'Kotlin_initRuntimeIfNeeded' |
| 21 | + |
| 22 | +# ObjC RO-data symbol for the exported Kotlin base class. Its name field gives |
| 23 | +# the ObjC class-name prefix (e.g. `SharedBase` -> `Shared`) used to format |
| 24 | +# Kotlin objects bridged into Swift/ObjC. |
| 25 | +_KOTLIN_BASE_OBJC_SYMBOL = '_OBJC_CLASS_RO_$_KotlinBase' |
| 26 | + |
| 27 | +# Offset (in bytes) of the `name` pointer inside the ObjC class_ro_t struct. |
| 28 | +# Valid for the 64-bit layout; all current Apple targets are 64-bit. |
| 29 | +_OBJC_CLASS_RO_NAME_OFFSET = 6 * 4 |
| 30 | + |
| 31 | +# Module file paths under these roots are system/runtime modules that can never |
| 32 | +# contain Kotlin/Native code (these also cover the dyld shared cache, whose |
| 33 | +# modules report these paths). Skipping them is what avoids the launch-time |
| 34 | +# slowdown: we never force LLDB to realize their symbol tables. Matched as a |
| 35 | +# substring so simulator/device runtime roots (e.g. `.../RuntimeRoot/usr/lib`) |
| 36 | +# are covered too. No trailing slash, so bare `/usr/lib` matches. |
| 37 | +_SYSTEM_PATH_MARKERS = ('/usr/lib', '/System/') |
| 38 | + |
| 39 | +# Max stops to keep retrying the ObjC base-class read before giving up. The read |
| 40 | +# can fail on the earliest stop(s), before dyld has rebased the class_ro_t name |
| 41 | +# pointer; it succeeds once the module is fully mapped. |
| 42 | +_MAX_REGISTER_ATTEMPTS = 16 |
| 43 | + |
| 44 | + |
| 45 | +def _module_key(module: lldb.SBModule) -> str: |
| 46 | + """Stable identity for a module across stops. |
| 47 | +
|
| 48 | + Prefers the content UUID; falls back to the file path when LLDB reports no |
| 49 | + UUID, so two distinct no-UUID modules don't collide on a single key.""" |
| 50 | + return module.GetUUIDString() or module.GetFileSpec().fullpath or '' |
| 51 | + |
| 52 | + |
| 53 | +def _is_candidate_module(module: lldb.SBModule) -> bool: |
| 54 | + """Cheap, symbol-table-free check: only app/embedded modules are candidates. |
| 55 | +
|
| 56 | + Reads just the module's file path (already known to LLDB at load time), so |
| 57 | + it never triggers symbol-table realization for system frameworks.""" |
| 58 | + directory = module.GetFileSpec().GetDirectory() or '' |
| 59 | + for marker in _SYSTEM_PATH_MARKERS: |
| 60 | + if marker in directory: |
| 61 | + return False |
| 62 | + return True |
| 63 | + |
| 64 | + |
| 65 | +def _is_kotlin_module(module: lldb.SBModule) -> bool: |
| 66 | + """Exact-name symbol probe (hashed lookup) for the Kotlin runtime marker.""" |
| 67 | + return len(module.FindSymbols(_KOTLIN_RUNTIME_MARKER)) > 0 |
| 68 | + |
| 69 | + |
| 70 | +def _kotlin_module_names(module: lldb.SBModule) -> List[str]: |
| 71 | + """Collect Kotlin module names from this module's `_Konan_init_*` symbols.""" |
| 72 | + names: List[str] = [] |
| 73 | + for symbol in module.symbols: |
| 74 | + name = symbol.name |
| 75 | + if name is None: |
| 76 | + continue |
| 77 | + match = _KONAN_INIT_RE.match(name) |
| 78 | + if match is None: |
| 79 | + continue |
| 80 | + module_name = match.group(1) |
| 81 | + if module_name == 'stdlib': |
| 82 | + continue |
| 83 | + names.append(module_name) |
| 84 | + return names |
| 85 | + |
| 86 | + |
| 87 | +def _read_objc_class_prefix( |
| 88 | + target: lldb.SBTarget, |
| 89 | + process: lldb.SBProcess, |
| 90 | + base_symbols: lldb.SBSymbolContextList, |
| 91 | +) -> Optional[str]: |
| 92 | + """Read the exported Kotlin ObjC base class name and strip the `Base` suffix. |
| 93 | +
|
| 94 | + Returns None if the name pointer can't be read yet (e.g. not rebased on the |
| 95 | + earliest stop) or if stripping leaves an empty prefix (which would produce a |
| 96 | + `^` regex matching every type) — both signal the caller to retry/skip.""" |
| 97 | + for symbol_context in base_symbols: |
| 98 | + error = lldb.SBError() |
| 99 | + symbol_addr = symbol_context.symbol.addr.GetLoadAddress(target) |
| 100 | + name_addr = process.ReadPointerFromMemory(symbol_addr + _OBJC_CLASS_RO_NAME_OFFSET, error) |
| 101 | + if not error.success: |
| 102 | + continue |
| 103 | + base_class_name = process.ReadCStringFromMemory(name_addr, 128, error) |
| 104 | + if not error.success or not base_class_name: |
| 105 | + continue |
| 106 | + prefix = base_class_name.removesuffix('Base') |
| 107 | + return prefix or None |
| 108 | + return None |
| 109 | + |
| 110 | + |
| 111 | +def _register_specifiers(target: lldb.SBTarget, specifiers: List[lldb.SBTypeNameSpecifier]): |
| 112 | + category = target.debugger.GetCategory(KOTLIN_CATEGORY) |
| 113 | + for type_specifier in specifiers: |
| 114 | + category.AddTypeSummary( |
| 115 | + type_specifier, |
| 116 | + lldb.SBTypeSummary.CreateWithFunctionName( |
| 117 | + '{}.{}'.format(kotlin_objc_class_summary.__module__, kotlin_objc_class_summary.__name__), |
| 118 | + lldb.eTypeOptionHideValue, |
| 119 | + ), |
| 120 | + ) |
| 121 | + category.AddTypeSynthetic( |
| 122 | + type_specifier, |
| 123 | + lldb.SBTypeSynthetic.CreateWithClassName( |
| 124 | + '{}.{}'.format(KonanObjcProxyTypeProvider.__module__, KonanObjcProxyTypeProvider.__name__), |
| 125 | + ), |
| 126 | + ) |
| 127 | + |
| 128 | + |
| 129 | +def _finalize(cache: 'LLDBCache', key: str): |
| 130 | + """Mark a module fully handled: stop scanning/retrying it.""" |
| 131 | + cache.pending.pop(key, None) |
| 132 | + cache.registered_module_keys.add(key) |
| 133 | + |
| 134 | + |
| 135 | +def _classify_module(cache: 'LLDBCache', module: lldb.SBModule, key: str): |
| 136 | + """Gate a newly-seen module, cheapest check first. |
| 137 | +
|
| 138 | + System modules and non-Kotlin frameworks are marked handled immediately |
| 139 | + (no symbol-table realization for system modules). Genuine Kotlin modules are |
| 140 | + placed in `cache.pending` for registration on this and later stops.""" |
| 141 | + if not _is_candidate_module(module): |
| 142 | + cache.registered_module_keys.add(key) |
| 143 | + return |
| 144 | + if not _is_kotlin_module(module): |
| 145 | + cache.registered_module_keys.add(key) |
| 146 | + return |
| 147 | + |
| 148 | + names = _kotlin_module_names(module) |
| 149 | + if not names: |
| 150 | + # Has the Kotlin runtime marker but exposes no module init symbols |
| 151 | + # (e.g. only stdlib): nothing to register. |
| 152 | + log(lambda: 'Kotlin marker present but no module names for {}; skipping.'.format( |
| 153 | + module.GetFileSpec().GetFilename())) |
| 154 | + cache.registered_module_keys.add(key) |
| 155 | + return |
| 156 | + |
| 157 | + cache.pending[key] = { |
| 158 | + 'module': module, |
| 159 | + 'names': names, |
| 160 | + 'names_registered': False, |
| 161 | + 'attempts': 0, |
| 162 | + } |
| 163 | + |
| 164 | + |
| 165 | +def _attempt_register(target: lldb.SBTarget, process: lldb.SBProcess, cache: 'LLDBCache', key: str): |
| 166 | + """One registration attempt for a pending Kotlin module. |
| 167 | +
|
| 168 | + Registers the `^<module>\\.` specifiers once, then tries to read the ObjC |
| 169 | + base-class prefix and register `^<prefix>`. Retries the prefix on later |
| 170 | + stops until it's readable or the attempt cap is hit.""" |
| 171 | + entry = cache.pending.get(key) |
| 172 | + if entry is None: |
| 173 | + return |
| 174 | + entry['attempts'] += 1 |
| 175 | + module = entry['module'] |
| 176 | + |
| 177 | + if not entry['names_registered']: |
| 178 | + _register_specifiers(target, [ |
| 179 | + lldb.SBTypeNameSpecifier('^{}\\.'.format(name), lldb.eMatchTypeRegex) |
| 180 | + for name in entry['names'] |
| 181 | + ]) |
| 182 | + entry['names_registered'] = True |
| 183 | + |
| 184 | + base_symbols = module.FindSymbols(_KOTLIN_BASE_OBJC_SYMBOL) |
| 185 | + if not base_symbols: |
| 186 | + # No exported ObjC base class in this module; module-name formatters are |
| 187 | + # all there is to register. |
| 188 | + _finalize(cache, key) |
| 189 | + log(lambda: 'Registered Kotlin formatters for {} (modules={}, no ObjC base class).'.format( |
| 190 | + module.GetFileSpec().GetFilename(), entry['names'])) |
| 191 | + return |
| 192 | + |
| 193 | + objc_class_prefix = _read_objc_class_prefix(target, process, base_symbols) |
| 194 | + if objc_class_prefix: |
| 195 | + _register_specifiers(target, [ |
| 196 | + lldb.SBTypeNameSpecifier('^{}'.format(objc_class_prefix), lldb.eMatchTypeRegex) |
| 197 | + ]) |
| 198 | + _finalize(cache, key) |
| 199 | + log(lambda: 'Registered Kotlin formatters for {} (modules={}, objc_prefix={}).'.format( |
| 200 | + module.GetFileSpec().GetFilename(), entry['names'], objc_class_prefix)) |
| 201 | + return |
| 202 | + |
| 203 | + if entry['attempts'] >= _MAX_REGISTER_ATTEMPTS: |
| 204 | + # Give up on the ObjC prefix; module-name formatters stay registered. |
| 205 | + _finalize(cache, key) |
| 206 | + log(lambda: 'Gave up reading ObjC prefix for {} after {} attempts; ' |
| 207 | + '^<prefix> formatting unavailable.'.format( |
| 208 | + module.GetFileSpec().GetFilename(), entry['attempts'])) |
| 209 | + return |
| 210 | + # Otherwise leave it pending and retry on a later stop. |
| 211 | + |
| 212 | + |
| 213 | +def scan_and_register_modules(execution_context: lldb.SBExecutionContext): |
| 214 | + """Lazily register Kotlin type formatters for any loaded Kotlin module. |
| 215 | +
|
| 216 | + Side effect only — never influences whether the process stops. Replaces the |
| 217 | + old global `_Konan_init_*` regex breakpoint, which forced LLDB to realize the |
| 218 | + symbol table of every loaded module at launch (the slow app launch under the |
| 219 | + debugger). Steady-state cost is a single integer compare; new modules trigger |
| 220 | + a full scan; pending modules get a cheap targeted retry.""" |
| 221 | + target = execution_context.target |
| 222 | + if not target.IsValid(): |
| 223 | + return |
| 224 | + process = target.GetProcess() |
| 225 | + if not process.IsValid(): |
| 226 | + return |
| 227 | + |
| 228 | + cache = LLDBCache.instance() |
| 229 | + num_modules = target.GetNumModules() |
| 230 | + |
| 231 | + if num_modules != cache.last_scanned_module_count: |
| 232 | + cache.last_scanned_module_count = num_modules |
| 233 | + for i in range(num_modules): |
| 234 | + module = target.GetModuleAtIndex(i) |
| 235 | + key = _module_key(module) |
| 236 | + if key in cache.registered_module_keys or key in cache.pending: |
| 237 | + continue |
| 238 | + _classify_module(cache, module, key) |
| 239 | + |
| 240 | + # Attempt (and retry) registration for everything pending. Iterating |
| 241 | + # `pending` directly keeps the retry window O(pending), never O(all modules). |
| 242 | + if cache.pending: |
| 243 | + for key in list(cache.pending.keys()): |
| 244 | + _attempt_register(target, process, cache, key) |
0 commit comments