Skip to content

Commit 7ff36ef

Browse files
authored
Write errors to a separate cache file (#21022)
This is needed for staggered interface/implementation processing in the parallel checking. Implementation is straightforward. A non-trivial part is that it looks like we need to always write/read errors file, even if empty. Otherwise we risk getting into a stale state in some edge cases. This adds few milliseconds warm cache runs, but still under noise level for both self-check and `-c 'import torch'`.
1 parent 40ffeda commit 7ff36ef

File tree

7 files changed

+68
-38
lines changed

7 files changed

+68
-38
lines changed

misc/diff-cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def load(cache: MetadataStore, s: str) -> Any:
115115
return data
116116
normalize_meta(meta)
117117
return serialize_meta_ff(meta, version_prefix)
118-
if s.endswith(".data.ff"):
118+
if s.endswith((".data.ff", ".err.ff")):
119119
return data
120120
obj = json_loads(data)
121121
if s.endswith(".meta.json"):

mypy/build.py

Lines changed: 64 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@
6969
Tag,
7070
WriteBuffer,
7171
read_bytes,
72+
read_errors,
7273
read_int,
7374
read_int_list,
7475
read_int_opt,
7576
read_str,
7677
read_str_list,
7778
read_str_opt,
7879
write_bytes,
80+
write_errors,
7981
write_int,
8082
write_int_list,
8183
write_int_opt,
@@ -1626,6 +1628,13 @@ def create_metastore(options: Options, parallel_worker: bool = False) -> Metadat
16261628
return mds
16271629

16281630

1631+
def get_errors_name(meta_name: str) -> str:
1632+
# Convert e.g. foo.bar.meta.ff to foo.bar.err.ff
1633+
parts = meta_name.rsplit(".", maxsplit=2)
1634+
parts[1] = "err"
1635+
return ".".join(parts)
1636+
1637+
16291638
def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str | None]:
16301639
"""Return the file names for the cache files.
16311640
@@ -1688,7 +1697,7 @@ def options_snapshot(id: str, manager: BuildManager) -> dict[str, object]:
16881697

16891698
def find_cache_meta(
16901699
id: str, path: str, manager: BuildManager, skip_validation: bool = False
1691-
) -> CacheMeta | None:
1700+
) -> tuple[CacheMeta | None, list[ErrorTuple]]:
16921701
"""Find cache data for a module.
16931702
16941703
Args:
@@ -1705,29 +1714,21 @@ def find_cache_meta(
17051714
meta_file, data_file, _ = get_cache_names(id, path, manager.options)
17061715
if manager.tracing_enabled:
17071716
manager.trace(f"Looking for {id} at {meta_file}")
1708-
meta: bytes | dict[str, Any] | None
17091717
if manager.stats_enabled:
17101718
t0 = time.time()
17111719
if manager.options.fixed_format_cache:
17121720
meta = _load_ff_file(
17131721
meta_file, manager, log_error_fmt="Could not load cache for {}: ", id=id
17141722
)
1715-
if meta is None:
1716-
return None
17171723
else:
17181724
meta = _load_json_file(
17191725
meta_file,
17201726
manager,
17211727
log_success=f"Meta {id} ",
17221728
log_error=f"Could not load cache for {id}: ",
17231729
)
1724-
if meta is None:
1725-
return None
1726-
if not isinstance(meta, dict):
1727-
manager.log( # type: ignore[unreachable]
1728-
f"Could not load cache for {id}: meta cache is not a dict: {repr(meta)}"
1729-
)
1730-
return None
1730+
if meta is None:
1731+
return None, []
17311732
if manager.stats_enabled:
17321733
t1 = time.time()
17331734
if isinstance(meta, bytes):
@@ -1736,31 +1737,31 @@ def find_cache_meta(
17361737
# TODO: switch to something like librt.internal.read_byte() if this is slow.
17371738
if meta[0] != cache_version() or meta[1] != CACHE_VERSION:
17381739
manager.log(f"Metadata abandoned for {id}: incompatible cache format")
1739-
return None
1740+
return None, []
17401741
data_io = ReadBuffer(meta[2:])
17411742
m = CacheMeta.read(data_io, data_file)
17421743
else:
17431744
m = CacheMeta.deserialize(meta, data_file)
17441745
if m is None:
17451746
manager.log(f"Metadata abandoned for {id}: cannot deserialize data")
1746-
return None
1747+
return None, []
17471748
if manager.stats_enabled:
17481749
t2 = time.time()
17491750
manager.add_stats(
17501751
load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1
17511752
)
17521753
if skip_validation:
1753-
return m
1754+
return m, []
17541755

17551756
# Ignore cache if generated by an older mypy version.
17561757
if m.version_id != manager.version_id and not manager.options.skip_version_check:
17571758
manager.log(f"Metadata abandoned for {id}: different mypy version")
1758-
return None
1759+
return None, []
17591760

17601761
total_deps = len(m.dependencies) + len(m.suppressed)
17611762
if len(m.dep_prios) != total_deps or len(m.dep_lines) != total_deps:
17621763
manager.log(f"Metadata abandoned for {id}: broken dependencies")
1763-
return None
1764+
return None, []
17641765

17651766
# Ignore cache if (relevant) options aren't the same.
17661767
# Note that it's fine to mutilate cached_options since it's only used here.
@@ -1782,12 +1783,12 @@ def find_cache_meta(
17821783
key, cached_options.get(key), current_options.get(key)
17831784
)
17841785
)
1785-
return None
1786+
return None, []
17861787
if manager.old_plugins_snapshot and manager.plugins_snapshot:
17871788
# Check if plugins are still the same.
17881789
if manager.plugins_snapshot != manager.old_plugins_snapshot:
17891790
manager.log(f"Metadata abandoned for {id}: plugins differ")
1790-
return None
1791+
return None, []
17911792
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True))
17921793
if not manager.options.fixed_format_cache:
17931794
# So that plugins can return data with tuples in it without
@@ -1796,10 +1797,31 @@ def find_cache_meta(
17961797
plugin_data = json_loads(json_dumps(plugin_data))
17971798
if m.plugin_data != plugin_data:
17981799
manager.log(f"Metadata abandoned for {id}: plugin configuration differs")
1799-
return None
1800+
return None, []
18001801

1802+
# Load cached errors for this file, even if empty. This is needed to avoid
1803+
# invalid cache state after a crash/blocker/Ctrl+C etc.
1804+
errors_file = get_errors_name(meta_file)
1805+
if manager.options.fixed_format_cache:
1806+
errors = _load_ff_file(
1807+
errors_file, manager, log_error_fmt="Could not load errors for {}: ", id=id
1808+
)
1809+
else:
1810+
errors = _load_json_file(
1811+
errors_file,
1812+
manager,
1813+
log_success=f"Errors {id} ",
1814+
log_error=f"Could not load errors for {id}: ",
1815+
)
1816+
if errors is None:
1817+
return None, []
1818+
if isinstance(errors, bytes):
1819+
data_io = ReadBuffer(errors)
1820+
e = read_errors(data_io)
1821+
else:
1822+
e = [tuple(err) for err in errors["error_lines"]]
18011823
manager.add_stats(fresh_metas=1)
1802-
return m
1824+
return m, e
18031825

18041826

18051827
def validate_meta(
@@ -2078,9 +2100,8 @@ def write_cache(
20782100
version_id=manager.version_id,
20792101
ignore_all=ignore_all,
20802102
plugin_data=plugin_data,
2081-
# These two will be filled by the caller.
2103+
# This one will be filled by the caller.
20822104
dep_hashes=[],
2083-
error_lines=[],
20842105
)
20852106
return interface_hash, (meta, meta_file)
20862107

@@ -2104,6 +2125,23 @@ def write_cache_meta(meta: CacheMeta, manager: BuildManager, meta_file: str) ->
21042125
manager.log(f"Error writing cache meta file {meta_file}")
21052126

21062127

2128+
def write_errors_file(
2129+
meta_file: str, error_lines: list[ErrorTuple], manager: BuildManager
2130+
) -> None:
2131+
# Write errors cache file
2132+
errors_file = get_errors_name(meta_file)
2133+
metastore = manager.metastore
2134+
if manager.options.fixed_format_cache:
2135+
data_io = WriteBuffer()
2136+
write_errors(data_io, error_lines)
2137+
meta_bytes = data_io.getvalue()
2138+
else:
2139+
# Some generic JSON helpers require top-level to be a dict.
2140+
meta_bytes = json_dumps({"error_lines": error_lines}, manager.options.debug_cache)
2141+
if not metastore.write(errors_file, meta_bytes):
2142+
manager.log(f"Error writing errors file {errors_file}")
2143+
2144+
21072145
"""Dependency manager.
21082146
21092147
Design
@@ -2393,7 +2431,7 @@ def new_state(
23932431
interface_hash = b""
23942432
meta_source_hash = None
23952433
if path and source is None and manager.cache_enabled:
2396-
meta = find_cache_meta(id, path, manager)
2434+
meta, error_lines = find_cache_meta(id, path, manager)
23972435
# TODO: Get mtime if not cached.
23982436
if meta is not None:
23992437
interface_hash = meta.interface_hash
@@ -2420,7 +2458,7 @@ def new_state(
24202458
assert len(meta.dep_hashes) == len(meta.dependencies)
24212459
dep_hashes = {k: v for (k, v) in zip(meta.dependencies, meta.dep_hashes)}
24222460
# Only copy `error_lines` if the module is not silently imported.
2423-
error_lines = [] if ignore_all else meta.error_lines
2461+
error_lines = [] if ignore_all else error_lines
24242462
imports_ignored = meta.imports_ignored
24252463
else:
24262464
dependencies = []
@@ -2656,7 +2694,7 @@ def reload_meta(self) -> None:
26562694
the interface hash.
26572695
"""
26582696
assert self.path is not None
2659-
self.meta = find_cache_meta(self.id, self.path, self.manager, skip_validation=True)
2697+
self.meta, _ = find_cache_meta(self.id, self.path, self.manager, skip_validation=True)
26602698
assert self.meta is not None
26612699
self.interface_hash = self.meta.interface_hash
26622700

@@ -4355,8 +4393,8 @@ def process_stale_scc(
43554393
continue
43564394
meta, meta_file = meta_tuple
43574395
meta.dep_hashes = [graph[dep].interface_hash for dep in graph[id].dependencies]
4358-
meta.error_lines = errors_by_id.get(id, [])
43594396
write_cache_meta(meta, manager, meta_file)
4397+
write_errors_file(meta_file, errors_by_id.get(id, []), manager)
43604398
manager.done_sccs.add(ascc.id)
43614399
manager.add_stats(
43624400
load_missing_time=t1 - t0,

mypy/cache.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
from mypy_extensions import u8
7070

7171
# High-level cache layout format
72-
CACHE_VERSION: Final = 6
72+
CACHE_VERSION: Final = 7
7373

7474
# Type used internally to represent errors:
7575
# (path, line, column, end_line, end_column, severity, message, code)
@@ -99,7 +99,6 @@ def __init__(
9999
dep_hashes: list[bytes],
100100
interface_hash: bytes,
101101
trans_dep_hash: bytes,
102-
error_lines: list[ErrorTuple],
103102
version_id: str,
104103
ignore_all: bool,
105104
plugin_data: Any,
@@ -123,7 +122,6 @@ def __init__(
123122
self.dep_hashes = dep_hashes # list of interface_hash for dependencies
124123
self.interface_hash = interface_hash # hash representing the public interface
125124
self.trans_dep_hash = trans_dep_hash # hash of import structure (transitive)
126-
self.error_lines = error_lines
127125
self.version_id = version_id # mypy version for cache invalidation
128126
self.ignore_all = ignore_all # if errors were ignored
129127
self.plugin_data = plugin_data # config data from plugins
@@ -146,7 +144,6 @@ def serialize(self) -> dict[str, Any]:
146144
"dep_hashes": [dep.hex() for dep in self.dep_hashes],
147145
"interface_hash": self.interface_hash.hex(),
148146
"trans_dep_hash": self.trans_dep_hash.hex(),
149-
"error_lines": self.error_lines,
150147
"version_id": self.version_id,
151148
"ignore_all": self.ignore_all,
152149
"plugin_data": self.plugin_data,
@@ -175,7 +172,6 @@ def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None:
175172
dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]],
176173
interface_hash=bytes.fromhex(meta["interface_hash"]),
177174
trans_dep_hash=bytes.fromhex(meta["trans_dep_hash"]),
178-
error_lines=[tuple(err) for err in meta["error_lines"]],
179175
version_id=meta["version_id"],
180176
ignore_all=meta["ignore_all"],
181177
plugin_data=meta["plugin_data"],
@@ -203,7 +199,6 @@ def write(self, data: WriteBuffer) -> None:
203199
write_bytes_list(data, self.dep_hashes)
204200
write_bytes(data, self.interface_hash)
205201
write_bytes(data, self.trans_dep_hash)
206-
write_errors(data, self.error_lines)
207202
write_str(data, self.version_id)
208203
write_bool(data, self.ignore_all)
209204
# Plugin data may be not a dictionary, so we use
@@ -233,7 +228,6 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
233228
dep_hashes=read_bytes_list(data),
234229
interface_hash=read_bytes(data),
235230
trans_dep_hash=read_bytes(data),
236-
error_lines=read_errors(data),
237231
version_id=read_str(data),
238232
ignore_all=read_bool(data),
239233
plugin_data=read_json_value(data),

mypy/exportjson.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,6 @@ def convert_binary_cache_meta_to_json(data: bytes, data_file: str) -> Json:
572572
"dep_lines": meta.dep_lines,
573573
"dep_hashes": [dep.hex() for dep in meta.dep_hashes],
574574
"interface_hash": meta.interface_hash.hex(),
575-
"error_lines": meta.error_lines,
576575
"version_id": meta.version_id,
577576
"ignore_all": meta.ignore_all,
578577
"plugin_data": meta.plugin_data,

mypy/test/test_diff_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def test_diff_cache_produces_valid_json(self) -> None:
127127
a_keys = {k for k in keys if "/a." in k or k.startswith("a.")}
128128
assert len(a_keys) == 0, f"Unexpected a.* entries in diff: {a_keys}"
129129
assert len(b_keys) == 2, f"Expected 2 b.* entries in diff, got: {b_keys}"
130-
assert len(c_keys) == 3, f"Expected 3 c.* entries in diff, got: {c_keys}"
130+
assert len(c_keys) == 4, f"Expected 3 c.* entries in diff, got: {c_keys}"
131131

132132
# The new access to a.x in b.py should create a fine-grained
133133
# dependency recorded in @root.deps.json.

mypy/test/testcheck.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def find_missing_cache_files(
293293
ignore_errors = True
294294
missing = {}
295295
for id, path in modules.items():
296-
meta = build.find_cache_meta(id, path, manager)
296+
meta, _ = build.find_cache_meta(id, path, manager)
297297
if not build.validate_meta(meta, id, path, ignore_errors, manager):
298298
missing[id] = path
299299
return set(missing.values())

test-data/unit/exportjson.test

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ from typing_extensions import Final
317317
"<hash>"
318318
],
319319
"interface_hash": "<hash>",
320-
"error_lines": [],
321320
"version_id": ...,
322321
"ignore_all": false,
323322
"plugin_data": null

0 commit comments

Comments
 (0)