Skip to content

Commit 7ea4657

Browse files
committed
Write serialized options to a file to broadcast to workers
1 parent d99e2c9 commit 7ea4657

3 files changed

Lines changed: 38 additions & 22 deletions

File tree

mypy/build.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import gc
1919
import json
2020
import os
21-
import pickle
2221
import platform
2322
import re
2423
import stat
@@ -44,7 +43,6 @@
4443
final,
4544
)
4645

47-
from librt.base64 import b64encode
4846
from librt.internal import (
4947
cache_version,
5048
read_bool,
@@ -281,7 +279,7 @@ def __init__(self, status_file: str, options_data: str, env: Mapping[str, str])
281279
"-m",
282280
"mypy.build_worker",
283281
f"--status-file={status_file}",
284-
f'--options-data="{options_data}"',
282+
f"--options-data={options_data}",
285283
]
286284
# Return early without waiting, caller must call connect() before using the client.
287285
self.proc = subprocess.Popen(command, env=env)
@@ -389,10 +387,11 @@ def default_flush_errors(
389387
connect_threads = []
390388
# A quasi-unique ID for this specific mypy invocation.
391389
build_id = os.urandom(4).hex()
390+
options_data = None
392391
if options.num_workers > 0:
393-
# TODO: switch to something more efficient than pickle (also in the daemon).
394-
pickled_options = pickle.dumps(options.snapshot())
395-
options_data = b64encode(pickled_options).decode()
392+
options_data = f".mypy_worker_options.{build_id}.data"
393+
with open(options_data, "wb") as f:
394+
f.write(options.to_bytes())
396395
workers = [
397396
WorkerClient(
398397
f".mypy_worker.{build_id}.{idx}.json", options_data, worker_env or os.environ
@@ -446,6 +445,8 @@ def connect(wc: WorkerClient, data: bytes) -> None:
446445
finally:
447446
# In case of an early crash it is better to wait for workers to become ready, and
448447
# shut them down cleanly. Otherwise, they will linger until connection timeout.
448+
if options_data is not None:
449+
os.unlink(options_data)
449450
for thread in connect_threads:
450451
thread.join()
451452
for worker in workers:
@@ -560,10 +561,11 @@ def build_inner(
560561
def warn_unused_configs(
561562
options: Options, flush_errors: Callable[[str | None, list[str], bool], None]
562563
) -> None:
563-
if options.warn_unused_configs and options.unused_configs and not options.non_interactive:
564+
unused_configs = options.get_unused_configs()
565+
if options.warn_unused_configs and unused_configs and not options.non_interactive:
564566
unused = get_config_module_names(
565567
options.config_file,
566-
[glob for glob in options.per_module_options.keys() if glob in options.unused_configs],
568+
[glob for glob in options.per_module_options.keys() if glob in unused_configs],
567569
)
568570
flush_errors(
569571
None, ["{}: note: unused section(s): {}".format(options.config_file, unused)], False

mypy/build_worker/worker.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* In a loop:
1212
- Receive an SCC id from coordinator, and start processing it.
1313
- SCC is processed in two phases: interface and implementation, send a response after each.
14-
- When prompted by coordinator (with a scc_id=None message), cleanup and shutdown.
14+
- When prompted by coordinator (with a scc_ids=[] message), cleanup and shutdown.
1515
"""
1616

1717
from __future__ import annotations
@@ -20,13 +20,11 @@
2020
import gc
2121
import json
2222
import os
23-
import pickle
2423
import platform
2524
import sys
2625
import time
2726
from typing import NamedTuple
2827

29-
from librt.base64 import b64decode
3028
from librt.internal import ReadBuffer, read_tag
3129

3230
from mypy import util
@@ -47,7 +45,7 @@
4745
process_stale_scc_implementation,
4846
process_stale_scc_interface,
4947
)
50-
from mypy.cache import Tag, read_int_list
48+
from mypy.cache import Tag, read_int_list, read_json
5149
from mypy.defaults import RECURSION_LIMIT, WORKER_CONNECTION_TIMEOUT, WORKER_IDLE_TIMEOUT
5250
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
5351
from mypy.fscache import FileSystemCache
@@ -60,7 +58,7 @@
6058

6159
parser = argparse.ArgumentParser(prog="mypy_worker", description="Mypy build worker")
6260
parser.add_argument("--status-file", help="status file to communicate worker details")
63-
parser.add_argument("--options-data", help="serialized mypy options")
61+
parser.add_argument("--options-data", help="file with serialized mypy options")
6462

6563
CONNECTION_NAME = "build_worker"
6664

@@ -84,11 +82,12 @@ def main(argv: list[str]) -> None:
8482
# This mimics how daemon receives the options. Note we need to postpone
8583
# processing error codes after plugins are loaded, because plugins can add
8684
# custom error codes.
87-
options_dict = pickle.loads(b64decode(args.options_data))
88-
options_obj = Options()
85+
with open(args.options_data, "rb") as f:
86+
buf = ReadBuffer(f.read())
87+
options_dict = read_json(buf)
8988
disable_error_code = options_dict.pop("disable_error_code", [])
9089
enable_error_code = options_dict.pop("enable_error_code", [])
91-
options = options_obj.apply_changes(options_dict)
90+
options = Options().apply_changes(options_dict)
9291

9392
status_file = args.status_file
9493
server = IPCServer(CONNECTION_NAME, WORKER_CONNECTION_TIMEOUT)

mypy/options.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from librt.internal import WriteBuffer, write_bool, write_str
1212

1313
from mypy import defaults
14+
from mypy.cache import write_json
1415
from mypy.errorcodes import ErrorCode, error_codes
1516
from mypy.util import get_class_descriptors, replace_object_state
1617

@@ -343,7 +344,7 @@ def __init__(self) -> None:
343344
# Per-module options (raw)
344345
self.per_module_options: dict[str, dict[str, object]] = {}
345346
self._glob_options: list[tuple[str, Pattern[str]]] = []
346-
self.unused_configs: set[str] = set()
347+
self._unused_configs: set[str] = set()
347348

348349
# -- development options --
349350
self.verbosity = 0 # More verbose messages (for troubleshooting)
@@ -451,6 +452,17 @@ def snapshot(self) -> dict[str, object]:
451452
d = {k: v for k, v in d.items() if not k.startswith("_")}
452453
return d
453454

455+
def to_bytes(self) -> bytes:
456+
"""Serialize this options object to binary data."""
457+
assert self.transform_source is None, "Source transform cannot be serialized"
458+
snapshot = self.snapshot()
459+
# Caller will need to use process_error_codes() to re-compute these.
460+
del snapshot["disabled_error_codes"]
461+
del snapshot["enabled_error_codes"]
462+
buf = WriteBuffer()
463+
write_json(buf, snapshot)
464+
return buf.getvalue()
465+
454466
def __repr__(self) -> str:
455467
return f"Options({pprint.pformat(self.snapshot())})"
456468

@@ -560,7 +572,7 @@ def build_per_module_cache(self) -> None:
560572
# sections as used if any real modules use them or if any
561573
# concrete config sections use them. This means we need to
562574
# track which get used while constructing.
563-
self.unused_configs = set(unstructured_glob_keys)
575+
self._unused_configs = set(unstructured_glob_keys)
564576

565577
for key in wildcards + concrete:
566578
# Find what the options for this key would be, just based
@@ -571,7 +583,7 @@ def build_per_module_cache(self) -> None:
571583

572584
# Add the more structured sections into unused configs, since
573585
# they only count as used if actually used by a real module.
574-
self.unused_configs.update(structured_keys)
586+
self._unused_configs.update(structured_keys)
575587

576588
def clone_for_module(self, module: str) -> Options:
577589
"""Create an Options object that incorporates per-module options.
@@ -585,7 +597,7 @@ def clone_for_module(self, module: str) -> Options:
585597

586598
# If the module just directly has a config entry, use it.
587599
if module in self._per_module_cache:
588-
self.unused_configs.discard(module)
600+
self._unused_configs.discard(module)
589601
return self._per_module_cache[module]
590602

591603
# If not, search for glob paths at all the parents. So if we are looking for
@@ -598,7 +610,7 @@ def clone_for_module(self, module: str) -> Options:
598610
for i in range(len(path), 0, -1):
599611
key = ".".join(path[:i] + ["*"])
600612
if key in self._per_module_cache:
601-
self.unused_configs.discard(key)
613+
self._unused_configs.discard(key)
602614
options = self._per_module_cache[key]
603615
break
604616

@@ -607,7 +619,7 @@ def clone_for_module(self, module: str) -> Options:
607619
if not module.endswith(".*"):
608620
for key, pattern in self._glob_options:
609621
if pattern.match(module):
610-
self.unused_configs.discard(key)
622+
self._unused_configs.discard(key)
611623
options = options.apply_changes(self.per_module_options[key])
612624

613625
# We could update the cache to directly point to modules once
@@ -616,6 +628,9 @@ def clone_for_module(self, module: str) -> Options:
616628

617629
return options
618630

631+
def get_unused_configs(self) -> set[str]:
632+
return self._unused_configs.copy()
633+
619634
def compile_glob(self, s: str) -> Pattern[str]:
620635
# Compile one of the glob patterns to a regex so that '.*' can
621636
# match *zero or more* module sections. This means we compile

0 commit comments

Comments
 (0)