Skip to content

Commit ba3e064

Browse files
authored
Refactor State creation logic (#20691)
Ref #933 This should be a pure refactoring unless i messed something up. This is needed to prepare to make `State` serializeable, so that we can send graph over the network to build workers (which is required for parallel parsing). Also not having tons of custom logic in `__init__()` is a better style anyway. Note I make the new state factory a classmethod to minimize the diff. If there is a preference, I can make it a regular function.
1 parent 185b2c9 commit ba3e064

2 files changed

Lines changed: 146 additions & 71 deletions

File tree

mypy/build.py

Lines changed: 138 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,7 +2122,7 @@ class State:
21222122
"""The state for a module.
21232123
21242124
The source is only used for the -c command line option; in that
2125-
case path is None. Otherwise source is None and path isn't.
2125+
case path is None. Otherwise, source is None and path isn't.
21262126
"""
21272127

21282128
manager: BuildManager
@@ -2136,7 +2136,6 @@ class State:
21362136
source_hash: str | None = None # Hash calculated based on the source code
21372137
meta_source_hash: str | None = None # Hash of the source given in the meta, if any
21382138
meta: CacheMeta | None = None
2139-
data: str | None = None
21402139
tree: MypyFile | None = None
21412140
# We keep both a list and set of dependencies. A set because it makes it efficient to
21422141
# prevent duplicates and the list because I am afraid of changing the order of
@@ -2152,10 +2151,10 @@ class State:
21522151
dep_line_map: dict[str, int]
21532152

21542153
# Map from dependency id to its last observed interface hash
2155-
dep_hashes: dict[str, bytes] = {}
2154+
dep_hashes: dict[str, bytes]
21562155

21572156
# List of errors reported for this file last time.
2158-
error_lines: list[SerializedError] = []
2157+
error_lines: list[SerializedError]
21592158

21602159
# Parent package, its parent, etc.
21612160
ancestors: list[str] | None = None
@@ -2196,8 +2195,9 @@ class State:
21962195
# we use file size as a proxy for complexity.
21972196
size_hint: int
21982197

2199-
def __init__(
2200-
self,
2198+
@classmethod
2199+
def new_state(
2200+
cls,
22012201
id: str | None,
22022202
path: str | None,
22032203
source: str | None,
@@ -2211,29 +2211,26 @@ def __init__(
22112211
# process it. With this flag, any changes to external state as well
22122212
# as error reporting should be avoided.
22132213
temporary: bool = False,
2214-
) -> None:
2214+
) -> State:
22152215
if not temporary:
22162216
assert id or path or source is not None, "Neither id, path nor source given"
2217-
self.manager = manager
22182217
State.order_counter += 1
2219-
self.order = State.order_counter
2220-
self.caller_line = caller_line
22212218
if caller_state:
2222-
self.import_context = caller_state.import_context.copy()
2223-
self.import_context.append((caller_state.xpath, caller_line))
2219+
import_context = caller_state.import_context.copy()
2220+
import_context.append((caller_state.xpath, caller_line))
22242221
else:
2225-
self.import_context = []
2226-
self.id = id or "__main__"
2227-
self.options = manager.options.clone_for_module(self.id)
2228-
self.early_errors = []
2229-
self._type_checker = None
2222+
import_context = []
2223+
id = id or "__main__"
2224+
options = manager.options.clone_for_module(id)
2225+
2226+
ignore_all = False
22302227
if not path and source is None:
22312228
assert id is not None
22322229
try:
22332230
path, follow_imports = find_module_and_diagnose(
22342231
manager,
22352232
id,
2236-
self.options,
2233+
options,
22372234
caller_state,
22382235
caller_line,
22392236
ancestor_for,
@@ -2245,47 +2242,72 @@ def __init__(
22452242
manager.missing_modules.add(id)
22462243
raise
22472244
if follow_imports == "silent":
2248-
self.ignore_all = True
2245+
ignore_all = True
22492246
elif path and is_silent_import_module(manager, path) and not root_source:
2250-
self.ignore_all = True
2251-
self.path = path
2252-
if path:
2253-
self.abspath = os.path.abspath(path)
2254-
self.xpath = path or "<string>"
2255-
if path and source is None and self.manager.cache_enabled:
2256-
self.meta = find_cache_meta(self.id, path, manager)
2247+
ignore_all = True
2248+
2249+
meta = None
2250+
interface_hash = b""
2251+
meta_source_hash = None
2252+
if path and source is None and manager.cache_enabled:
2253+
meta = find_cache_meta(id, path, manager)
22572254
# TODO: Get mtime if not cached.
2258-
if self.meta is not None:
2259-
self.interface_hash = self.meta.interface_hash
2260-
self.meta_source_hash = self.meta.hash
2261-
if path and source is None and self.manager.fscache.isdir(path):
2255+
if meta is not None:
2256+
interface_hash = meta.interface_hash
2257+
meta_source_hash = meta.hash
2258+
if path and source is None and manager.fscache.isdir(path):
22622259
source = ""
2263-
self.source = source
2264-
self.add_ancestors()
2265-
self.per_line_checking_time_ns = collections.defaultdict(int)
2260+
22662261
t0 = time.time()
2267-
self.meta = validate_meta(self.meta, self.id, self.path, self.ignore_all, manager)
2268-
self.manager.add_stats(validate_meta_time=time.time() - t0)
2269-
if self.meta:
2262+
meta = validate_meta(meta, id, path, ignore_all, manager)
2263+
manager.add_stats(validate_meta_time=time.time() - t0)
2264+
2265+
if meta:
22702266
# Make copies, since we may modify these and want to
22712267
# compare them to the originals later.
2272-
self.dependencies = list(self.meta.dependencies)
2273-
self.dependencies_set = set(self.dependencies)
2274-
self.suppressed = list(self.meta.suppressed)
2275-
self.suppressed_set = set(self.suppressed)
2276-
all_deps = self.dependencies + self.suppressed
2277-
assert len(all_deps) == len(self.meta.dep_prios)
2278-
self.priorities = {id: pri for id, pri in zip(all_deps, self.meta.dep_prios)}
2279-
assert len(all_deps) == len(self.meta.dep_lines)
2280-
self.dep_line_map = {id: line for id, line in zip(all_deps, self.meta.dep_lines)}
2281-
assert len(self.meta.dep_hashes) == len(self.meta.dependencies)
2282-
self.dep_hashes = {
2283-
k: v for (k, v) in zip(self.meta.dependencies, self.meta.dep_hashes)
2284-
}
2268+
dependencies = list(meta.dependencies)
2269+
suppressed = list(meta.suppressed)
2270+
all_deps = dependencies + suppressed
2271+
assert len(all_deps) == len(meta.dep_prios)
2272+
priorities = {id: pri for id, pri in zip(all_deps, meta.dep_prios)}
2273+
assert len(all_deps) == len(meta.dep_lines)
2274+
dep_line_map = {id: line for id, line in zip(all_deps, meta.dep_lines)}
2275+
assert len(meta.dep_hashes) == len(meta.dependencies)
2276+
dep_hashes = {k: v for (k, v) in zip(meta.dependencies, meta.dep_hashes)}
22852277
# Only copy `error_lines` if the module is not silently imported.
2286-
self.error_lines = [] if self.ignore_all else self.meta.error_lines
2278+
error_lines = [] if ignore_all else meta.error_lines
2279+
else:
2280+
dependencies = []
2281+
suppressed = []
2282+
priorities = {}
2283+
dep_line_map = {}
2284+
dep_hashes = {}
2285+
error_lines = []
2286+
2287+
state = cls(
2288+
manager=manager,
2289+
order=State.order_counter,
2290+
id=id,
2291+
path=path,
2292+
source=source,
2293+
options=options,
2294+
ignore_all=ignore_all,
2295+
caller_line=caller_line,
2296+
import_context=import_context,
2297+
meta=meta,
2298+
interface_hash=interface_hash,
2299+
meta_source_hash=meta_source_hash,
2300+
dependencies=dependencies,
2301+
suppressed=suppressed,
2302+
priorities=priorities,
2303+
dep_line_map=dep_line_map,
2304+
dep_hashes=dep_hashes,
2305+
error_lines=error_lines,
2306+
)
2307+
2308+
if meta:
22872309
if temporary:
2288-
self.load_tree(temporary=True)
2310+
state.load_tree(temporary=True)
22892311
if not manager.use_fine_grained_cache():
22902312
# Special case: if there were a previously missing package imported here
22912313
# and it is not present, then we need to re-calculate dependencies.
@@ -2296,10 +2318,10 @@ def __init__(
22962318
# suppressed dependencies. Therefore, when the package with module is added,
22972319
# we need to re-calculate dependencies.
22982320
# NOTE: see comment below for why we skip this in fine grained mode.
2299-
if exist_added_packages(self.suppressed, manager, self.options):
2300-
self.parse_file() # This is safe because the cache is anyway stale.
2301-
self.compute_dependencies()
2302-
self.size_hint = self.meta.size
2321+
if exist_added_packages(suppressed, manager, options):
2322+
state.parse_file() # This is safe because the cache is anyway stale.
2323+
state.compute_dependencies()
2324+
state.size_hint = meta.size
23032325
else:
23042326
# When doing a fine-grained cache load, pretend we only
23052327
# know about modules that have cache information and defer
@@ -2309,12 +2331,65 @@ def __init__(
23092331
raise ModuleNotFound
23102332

23112333
# Parse the file (and then some) to get the dependencies.
2312-
self.parse_file(temporary=temporary)
2313-
self.compute_dependencies()
2314-
if self.manager.workers:
2334+
state.parse_file(temporary=temporary)
2335+
state.compute_dependencies()
2336+
if manager.workers:
23152337
# We don't need parsed trees in coordinator process, we parse only to
23162338
# compute dependencies.
2317-
self.tree = None
2339+
state.tree = None
2340+
2341+
return state
2342+
2343+
def __init__(
2344+
self,
2345+
manager: BuildManager,
2346+
order: int,
2347+
id: str,
2348+
path: str | None,
2349+
source: str | None,
2350+
options: Options,
2351+
ignore_all: bool,
2352+
caller_line: int,
2353+
import_context: list[tuple[str, int]],
2354+
meta: CacheMeta | None,
2355+
interface_hash: bytes,
2356+
meta_source_hash: str | None,
2357+
dependencies: list[str],
2358+
suppressed: list[str],
2359+
priorities: dict[str, int],
2360+
dep_line_map: dict[str, int],
2361+
dep_hashes: dict[str, bytes],
2362+
error_lines: list[SerializedError],
2363+
size_hint: int = 0,
2364+
) -> None:
2365+
self.manager = manager
2366+
self.order = order
2367+
self.id = id
2368+
self.path = path
2369+
if path:
2370+
self.abspath = os.path.abspath(path)
2371+
self.xpath = path or "<string>"
2372+
self.source = source
2373+
self.options = options
2374+
self.ignore_all = ignore_all
2375+
self.caller_line = caller_line
2376+
self.import_context = import_context
2377+
self.meta = meta
2378+
self.interface_hash = interface_hash
2379+
self.meta_source_hash = meta_source_hash
2380+
self.dependencies = dependencies
2381+
self.suppressed = suppressed
2382+
self.dependencies_set = set(dependencies)
2383+
self.suppressed_set = set(suppressed)
2384+
self.priorities = priorities
2385+
self.dep_line_map = dep_line_map
2386+
self.dep_hashes = dep_hashes
2387+
self.error_lines = error_lines
2388+
self.per_line_checking_time_ns = collections.defaultdict(int)
2389+
self.early_errors = []
2390+
self._type_checker = None
2391+
self.add_ancestors()
2392+
self.size_hint = size_hint
23182393

23192394
def reload_meta(self) -> None:
23202395
"""Force reload of cache meta.
@@ -3070,7 +3145,7 @@ def in_partial_package(id: str, manager: BuildManager) -> bool:
30703145
else:
30713146
# Parent is not in build, try quickly if we can find it.
30723147
try:
3073-
parent_st = State(
3148+
parent_st = State.new_state(
30743149
id=parent, path=None, source=None, manager=manager, temporary=True
30753150
)
30763151
except (ModuleNotFound, CompileError):
@@ -3419,7 +3494,7 @@ def load_graph(
34193494
# Seed the graph with the initial root sources.
34203495
for bs in sources:
34213496
try:
3422-
st = State(
3497+
st = State.new_state(
34233498
id=bs.module,
34243499
path=bs.path,
34253500
source=bs.text,
@@ -3497,11 +3572,11 @@ def load_graph(
34973572
if dep in st.ancestors:
34983573
# TODO: Why not 'if dep not in st.dependencies' ?
34993574
# Ancestors don't have import context.
3500-
newst = State(
3575+
newst = State.new_state(
35013576
id=dep, path=None, source=None, manager=manager, ancestor_for=st
35023577
)
35033578
else:
3504-
newst = State(
3579+
newst = State.new_state(
35053580
id=dep,
35063581
path=None,
35073582
source=None,

mypy/test/testgraph.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,21 @@ def _make_manager(self) -> BuildManager:
6060
def test_sorted_components(self) -> None:
6161
manager = self._make_manager()
6262
graph = {
63-
"a": State("a", None, "import b, c", manager),
64-
"d": State("d", None, "pass", manager),
65-
"b": State("b", None, "import c", manager),
66-
"c": State("c", None, "import b, d", manager),
63+
"a": State.new_state("a", None, "import b, c", manager),
64+
"d": State.new_state("d", None, "pass", manager),
65+
"b": State.new_state("b", None, "import c", manager),
66+
"c": State.new_state("c", None, "import b, d", manager),
6767
}
6868
res = [scc.mod_ids for scc in sorted_components(graph)]
6969
assert_equal(res, [{"d"}, {"c", "b"}, {"a"}])
7070

7171
def test_order_ascc(self) -> None:
7272
manager = self._make_manager()
7373
graph = {
74-
"a": State("a", None, "import b, c", manager),
75-
"d": State("d", None, "def f(): import a", manager),
76-
"b": State("b", None, "import c", manager),
77-
"c": State("c", None, "import b, d", manager),
74+
"a": State.new_state("a", None, "import b, c", manager),
75+
"d": State.new_state("d", None, "def f(): import a", manager),
76+
"b": State.new_state("b", None, "import c", manager),
77+
"c": State.new_state("c", None, "import b, d", manager),
7878
}
7979
res = [scc.mod_ids for scc in sorted_components(graph)]
8080
assert_equal(res, [frozenset({"a", "d", "c", "b"})])

0 commit comments

Comments
 (0)