|
| 1 | +"""Contains the class definitions outlining the schema of the test data. For LLDB conversion |
| 2 | +from/into these types, see `./from_lldb.py`""" |
| 3 | + |
| 4 | +import enum |
| 5 | +import json |
| 6 | +import os |
| 7 | +from dataclasses import asdict, dataclass, field, fields, is_dataclass |
| 8 | +from types import NoneType |
| 9 | +from typing import Any, Optional, get_origin, Final |
| 10 | + |
| 11 | +char = str |
| 12 | +Primitive = int | float | bool | char |
| 13 | +ByteSize = int |
| 14 | + |
| 15 | +# see: default json decoder docs https://docs.python.org/3/library/json.html#json.JSONDecoder |
| 16 | +# The types we're dealing with can only be: int, str, float, list, dict, bool, and None |
| 17 | +JsonType = int | str | float | list["JsonType"] | bool | None | dict[str, "JsonType"] |
| 18 | + |
| 19 | + |
| 20 | +class Target(enum.Enum): |
| 21 | + """Due to the differences between PDB and DWARF debug info, we cannot guarantee their output |
| 22 | + will be identical. Since LLDB can handle both, we need to conditionally select the correct |
| 23 | + test data to use. |
| 24 | +
|
| 25 | + Additionally, since there are differences in the internals of some structs based on OS (e.g. |
| 26 | + `PathBuf`/`OsString`), we need to be aware of whether we're on Windows or not. |
| 27 | +
|
| 28 | + A global var `TARGET` is set to the current variant upon `lldb_test.py`'s instantiation using an |
| 29 | + env var passed from `compiletest` and is not expected to change afterwards.""" |
| 30 | + |
| 31 | + NonWindows = "non_windows" |
| 32 | + WindowsGnu = "windows_gnu" |
| 33 | + WindowsMsvc = "windows_msvc" |
| 34 | + |
| 35 | + |
| 36 | +def get_target() -> Target: |
| 37 | + # set by compiletest when launching LLDB |
| 38 | + t: str = os.environ["LLDB_BATCHMODE_TARGET_TRIPLE"] |
| 39 | + if t.endswith("windows-msvc"): |
| 40 | + return Target.WindowsMsvc |
| 41 | + if t.endswith("windows-gnu") or t.endswith("windows-gnullvm"): |
| 42 | + return Target.WindowsGnu |
| 43 | + |
| 44 | + return Target.NonWindows |
| 45 | + |
| 46 | + |
| 47 | +BLESS: Final[bool] = os.environ["LLDB_BATCHMODE_BLESS_TEST_DATA"] == "1" |
| 48 | +"""Global constant set by `compiletest` that determines whether or not we are blessing the test |
| 49 | +data.""" |
| 50 | + |
| 51 | +TARGET: Final[Target] = get_target() |
| 52 | +"""Global constant set by `compiletest`. Determines which target the tests were run for, thus which |
| 53 | +set of test input we check.""" |
| 54 | + |
| 55 | + |
| 56 | +def annot_to_ty(annot: str) -> type[Any]: |
| 57 | + """Fallback to resolve a string type annotation to its actual type (e.g. `"Variable"` -> |
| 58 | + `Variable`). For types with generics, the generic is ignored.""" |
| 59 | + |
| 60 | + return { |
| 61 | + "int": int, |
| 62 | + "float": float, |
| 63 | + "bool": bool, |
| 64 | + "None": NoneType, |
| 65 | + "list": list, |
| 66 | + "dict": dict, |
| 67 | + "str": str, |
| 68 | + "ByteSize": int, |
| 69 | + "TargetData": TargetData, |
| 70 | + "Variable": Variable, |
| 71 | + "Type": Type, |
| 72 | + "Field": Field, |
| 73 | + "Child": Child, |
| 74 | + "BlessMetadata": BlessMetadata, |
| 75 | + }.get(annot.split("[", 1)[0], type[Any]) |
| 76 | + |
| 77 | + |
| 78 | +def from_dict(ty: type[Any], data: JsonType): |
| 79 | + """Translates a dictionary into an instance of the given dataclass type (with possibly nested |
| 80 | + dataclasses). |
| 81 | +
|
| 82 | + Relies on accurate type hints for the dataclass's fields, and the default `dataclass.__init__` |
| 83 | + definition.""" |
| 84 | + |
| 85 | + # Optional isn't a constructor, so we have to "unwrap" it. |
| 86 | + if get_origin(ty) is Optional: |
| 87 | + ty = ty.__args__[0] |
| 88 | + |
| 89 | + # recurse into lists |
| 90 | + if isinstance(data, list): |
| 91 | + # pulls the generic type from the list (e.g. `list[int]` -> `int`) |
| 92 | + inner = ty.__args__[0] |
| 93 | + if isinstance(inner, str): |
| 94 | + inner = annot_to_ty(inner) |
| 95 | + |
| 96 | + return [from_dict(inner, i) for i in data] |
| 97 | + |
| 98 | + if get_origin(ty) is dict and ty.__args__[0] is str: |
| 99 | + assert isinstance(data, dict) |
| 100 | + val_ty = ty.__args__[1] |
| 101 | + if isinstance(val_ty, str): |
| 102 | + val_ty = annot_to_ty(val_ty) |
| 103 | + |
| 104 | + if val_ty in [Variable, Child, Type, Field]: |
| 105 | + return {k: from_dict(val_ty, data[k]) for k in data.keys()} |
| 106 | + |
| 107 | + # map dict -> dataclass, recursing for each field |
| 108 | + if is_dataclass(ty): |
| 109 | + assert isinstance(data, dict) |
| 110 | + |
| 111 | + field_types = {f.name: f.type for f in fields(ty)} |
| 112 | + |
| 113 | + try: |
| 114 | + field_map = {} |
| 115 | + |
| 116 | + for f in data: |
| 117 | + f_type = field_types[f] |
| 118 | + |
| 119 | + # type annotations can be strings, so we need to resolve them to their actual type |
| 120 | + if isinstance(f_type, str): |
| 121 | + f_type = annot_to_ty(f_type) |
| 122 | + |
| 123 | + field_map[f] = from_dict(f_type, data[f]) |
| 124 | + |
| 125 | + # if you've never seen this before, `**` is the splat operator. It expands a mapping |
| 126 | + # type (in this case a dict) to keyword arguments. The ordering of the mapping does not |
| 127 | + # matter, only that the mapping's keys match the functions keyword args, and |
| 128 | + # `len(mapping)` == the number of keyword args. |
| 129 | + return ty(**field_map) |
| 130 | + except KeyError as e: |
| 131 | + print( |
| 132 | + f"Unable to convert dict to {ty}: Invalid field name {e}. If the test schema was \ |
| 133 | +changed intentionally, use the `--bless` option to update test data to the new schema." |
| 134 | + ) |
| 135 | + |
| 136 | + # for any other type, we don't need to do any processing |
| 137 | + return data |
| 138 | + |
| 139 | + |
| 140 | +@dataclass(slots=True) |
| 141 | +class Field: |
| 142 | + name: str |
| 143 | + type: str |
| 144 | + """The fully qualified name of the field's type. Full type information should be looked up |
| 145 | + via `TargetData.types`""" |
| 146 | + |
| 147 | + offset: ByteSize |
| 148 | + |
| 149 | + |
| 150 | +@dataclass(slots=True) |
| 151 | +class Type: |
| 152 | + size: ByteSize |
| 153 | + # When GDB support is added to the test framework, basic_type and type_class will probably be |
| 154 | + # converted to a wrapper IntEnum that converts GDB's equivalent information to |
| 155 | + basic_type: int |
| 156 | + """The `lldb.eBasicType` value associated with this type. Tested due to our use of it in type |
| 157 | + recognizer functions.""" |
| 158 | + |
| 159 | + type_class: int |
| 160 | + """The `lldb.eTypeClass` value associated with thjs type. Tested due to our use of it in type |
| 161 | + recognizer functions.""" |
| 162 | + |
| 163 | + fields: list[Field] |
| 164 | + """Stored as a list due to our reliance on `SBType.GetFieldAtIndex()`""" |
| 165 | + |
| 166 | + generic_params: list[str] |
| 167 | + """Stored as a list due to our reliance on `SBType.GetTemplateArgumentType()` and the sequential |
| 168 | + behavior of `lldb_providers.get_template_args`""" |
| 169 | + # FIXME the only way we can look up static fields is by name (as of lldb 22), so we need a way |
| 170 | + # to discover them. ATM only sum-type enums on MSVC use static fields, and those are fixed |
| 171 | + # values, so it's not super urgent. |
| 172 | + # static_fields: list[StaticField] |
| 173 | + |
| 174 | + |
| 175 | +@dataclass(slots=True) |
| 176 | +class Child: |
| 177 | + """Similar to `Variable`, but carries less information since we primarily test top-level |
| 178 | + values (and assume values of these child types have been tested thoroughly elsewhere). |
| 179 | +
|
| 180 | + Note that if the type has a synthetic provider (lldb) or pretty printer (gdb), the child names |
| 181 | + and types can be set to anything at all, so we do need to test these separately from the |
| 182 | + parent's type's fields.""" |
| 183 | + |
| 184 | + name: str |
| 185 | + """The name used to access the child. If the parent object has a synthetic, the child name can |
| 186 | + be overridden.""" |
| 187 | + |
| 188 | + type: str |
| 189 | + """The fully qualified name of the child's type. Full type information should be looked up |
| 190 | + via `TargetData.types`""" |
| 191 | + |
| 192 | + value: Optional[Primitive] |
| 193 | + children: list["Child"] |
| 194 | + """Children are stored as a list because of our use of `GetChildAtIndex()`. Providers can also |
| 195 | + dictate the order that children populate, so it's important to ensure that stays consistent too. |
| 196 | + """ |
| 197 | + |
| 198 | + |
| 199 | +@dataclass(slots=True) |
| 200 | +class Variable: |
| 201 | + type: str |
| 202 | + """The fully qualified name of the variable's type. Full type information should be looked up |
| 203 | + via `TargetData.types`""" |
| 204 | + |
| 205 | + pretty_type_name: Optional[str] |
| 206 | + """Type names can be overridden by `SyntehticProvider.get_type_name()` in LLDB and by |
| 207 | + `type_printer` in GDB""" |
| 208 | + |
| 209 | + pretty_print: Optional[str] |
| 210 | + """The string-result of pretty printing the value (`SBValue.GetSummary` for LLDB, |
| 211 | + `pretty_printer.to_string` for GDB). `None` for aggregates with no summary provider.""" |
| 212 | + |
| 213 | + value: Optional[Primitive] |
| 214 | + """`None` if the object does not have a primitive representation.""" |
| 215 | + |
| 216 | + synthetic: Optional[str] |
| 217 | + """The class/function name of the synthetic provider (lldb) or pretty printer (gdb). |
| 218 | + `None` if the object does not have a synthetic provider""" |
| 219 | + |
| 220 | + summary: Optional[str] |
| 221 | + """The function name of the summary provider. `None` if the object does not have a summary |
| 222 | + provider, or if the test data is for GDB""" |
| 223 | + |
| 224 | + format: Optional[int] |
| 225 | + """The `lldb.eFormat` enum variant associated with this type (if applicable).""" |
| 226 | + |
| 227 | + # Stored as a list instead of a dict because child order matters |
| 228 | + children: list[Child] |
| 229 | + """A list of children provided by the object. If the object has a synthetic provider, the |
| 230 | + children are the result of the provider's `get_child_at_index` function""" |
| 231 | + |
| 232 | + |
| 233 | +@dataclass(slots=True) |
| 234 | +class BlessMetadata: |
| 235 | + """ |
| 236 | + Contains additional context about the tools at the time the test data was generated |
| 237 | + """ |
| 238 | + |
| 239 | + python_version: str = "" |
| 240 | + debugger_version: str = "" |
| 241 | + # FIXME (todo) |
| 242 | + # feature_flags: str |
| 243 | + |
| 244 | + |
| 245 | +@dataclass(slots=True) |
| 246 | +class TargetData: |
| 247 | + """ |
| 248 | + Top-level container for all test data. |
| 249 | +
|
| 250 | + Due to the differences between PDB and DWARF debug info, we cannot guarantee their output |
| 251 | + will be identical. Since LLDB can handle both, we need to conditionally select the correct |
| 252 | + test data to use. |
| 253 | +
|
| 254 | + Additionally, since there are differences in the internals of some structs based on OS (e.g. |
| 255 | + `PathBuf`/`OsString`), we need to be aware of whether we're on Windows or not. |
| 256 | +
|
| 257 | + A global var `TARGET` is set to the current variant upon `lldb_batchmode`'s instantiation using |
| 258 | + an env var passed from `compiletest` and is not expected to change afterwards. |
| 259 | + """ |
| 260 | + |
| 261 | + bless_metadata: BlessMetadata = field(default_factory=BlessMetadata) |
| 262 | + """Miscellaneous data included to make diagnosing issues easier. This data is not intended to be |
| 263 | + tested against.""" |
| 264 | + |
| 265 | + types: dict[str, Type] = field(default_factory=dict) |
| 266 | + """ |
| 267 | + A map of type names to types. Contains all types present in the test's variables, including the |
| 268 | + types of fields and child objects. |
| 269 | + """ |
| 270 | + |
| 271 | + # If we ever decide that it makes sense to check the same variable twice at the same breakpoint |
| 272 | + # this will need to be converted to a list |
| 273 | + breakpoints: list[dict[str, Variable]] = field(default_factory=list) |
| 274 | + """Each element corresponds to one stopping point in the test. The element itself is a |
| 275 | + dictionary mapping variable names to their respective test data.""" |
| 276 | + |
| 277 | + @staticmethod |
| 278 | + def initialize() -> "TargetData": |
| 279 | + result = TargetData() |
| 280 | + path = os.environ["LLDB_BATCHMODE_INPUT_DATA_PATH"] |
| 281 | + if not os.path.isfile(path): |
| 282 | + if BLESS: |
| 283 | + return result |
| 284 | + else: |
| 285 | + raise Exception( |
| 286 | + f"Invalid input data path: '{path}'\nIf test data has not been \ |
| 287 | +generated for this test yet, consider using the `--bless` option." |
| 288 | + ) |
| 289 | + |
| 290 | + with open(path, "r") as f: |
| 291 | + try: |
| 292 | + result = from_dict(TargetData, json.load(f)) |
| 293 | + except json.decoder.JSONDecodeError: |
| 294 | + print("Warning: Malformed input data, reverting to default") |
| 295 | + |
| 296 | + return result |
| 297 | + |
| 298 | + def save_blessing(self, metadata: BlessMetadata): |
| 299 | + """Writes the entirety of `self` to the env var `LLDB_BATCHMODE_INPUT_DATA_PATH`, which is |
| 300 | + set by `compiletest` before running `lldb_batchmode. Used to finalize changes made by one or |
| 301 | + more `from_lldb.bless_variable` calls. |
| 302 | +
|
| 303 | + This function should be called exactly once, right before |
| 304 | + `lldb_batchmode.runner.main` exits if the following conditions are met: |
| 305 | +
|
| 306 | + 1. No other exceptions or error states occurred |
| 307 | + 2. `BLESS == True` |
| 308 | + 3. At least one `repr` pseudo-command was processed |
| 309 | +
|
| 310 | + This prevents us from saving incomplete data or invalid data. It also prevents us from |
| 311 | + creating input data files for tests that do not need it. |
| 312 | + """ |
| 313 | + |
| 314 | + self.bless_metadata = metadata |
| 315 | + path = os.environ["LLDB_BATCHMODE_INPUT_DATA_PATH"] |
| 316 | + # dumping directly to a file is somewhat unsafe. If the `Variable`/`Type` data ends up in a |
| 317 | + # state that cannot be serialized correctly, the json ends up malformed, and we could end up |
| 318 | + # overwriting valid test data with a complete mess. Since the in-memory data is typically |
| 319 | + # completely valid, the testing logic will pass and make it seem like nothing is wrong. |
| 320 | + |
| 321 | + # While we could rely on git to help revert the test file, it's better to just not allow it |
| 322 | + # to save malformed json in the first place. Thus, we dump the JSON, re-read it to check |
| 323 | + # for `JSONDecodeError`, and write it to the target file if no error occurred. |
| 324 | + x = json.dumps(asdict(self), indent=" ") |
| 325 | + _ = json.loads(x) |
| 326 | + |
| 327 | + with open(path, "w") as f: |
| 328 | + f.write(x) |
0 commit comments