Skip to content

Commit 548a729

Browse files
authored
[mypyc] Do not preemptively diff & union op level dataflow sets (#20897)
Mypyc is currently unable to compile [SQLGlot's AST](https://github.com/tobymao/sqlglot/pull/7143/changes) (~950 classes in a single file) as it dies with OOM even on a 64 gb machine. Upon investigating, one chokehold seems to be the following line which generates the dataflow for each BB `Op`; Given that most instructions generate empty `kill` and `gen` sets, executing the union & difference preemptively ends up creating identical `cur` copies. I have verified that this fix locally unblocks compilation; Do note that mypyc still consumes 6-7 gbs of RAM so I'm still looking out for more improvements.
1 parent 7dad0c6 commit 548a729

File tree

2 files changed

+49
-30
lines changed

2 files changed

+49
-30
lines changed

mypyc/analysis/dataflow.py

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from __future__ import annotations
44

55
from abc import abstractmethod
6-
from collections.abc import Iterable, Iterator
7-
from typing import Generic, TypeVar
6+
from collections.abc import Iterable, Iterator, Set as AbstractSet
7+
from typing import Any, Generic, TypeVar
88

99
from mypyc.ir.ops import (
1010
Assign,
@@ -174,12 +174,14 @@ def __str__(self) -> str:
174174
return f"before: {self.before}\nafter: {self.after}\n"
175175

176176

177-
GenAndKill = tuple[set[T], set[T]]
177+
GenAndKill = tuple[AbstractSet[T], AbstractSet[T]]
178+
179+
_EMPTY: tuple[frozenset[Any], frozenset[Any]] = (frozenset(), frozenset())
178180

179181

180182
class BaseAnalysisVisitor(OpVisitor[GenAndKill[T]]):
181183
def visit_goto(self, op: Goto) -> GenAndKill[T]:
182-
return set(), set()
184+
return _EMPTY
183185

184186
@abstractmethod
185187
def visit_register_op(self, op: RegisterOp) -> GenAndKill[T]:
@@ -317,16 +319,16 @@ def __init__(self, strict_errors: bool = False) -> None:
317319
self.strict_errors = strict_errors
318320

319321
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
320-
return set(), set()
322+
return _EMPTY
321323

322324
def visit_return(self, op: Return) -> GenAndKill[Value]:
323-
return set(), set()
325+
return _EMPTY
324326

325327
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
326-
return set(), set()
328+
return _EMPTY
327329

328330
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
329-
return set(), set()
331+
return _EMPTY
330332

331333
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
332334
# Loading an error value may undefine the register.
@@ -337,10 +339,10 @@ def visit_assign(self, op: Assign) -> GenAndKill[Value]:
337339

338340
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
339341
# Array registers are special and we don't track the definedness of them.
340-
return set(), set()
342+
return _EMPTY
341343

342344
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
343-
return set(), set()
345+
return _EMPTY
344346

345347

346348
def analyze_maybe_defined_regs(
@@ -392,27 +394,27 @@ def __init__(self, args: set[Value]) -> None:
392394
self.args = args
393395

394396
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
395-
return set(), set()
397+
return _EMPTY
396398

397399
def visit_return(self, op: Return) -> GenAndKill[Value]:
398-
return set(), set()
400+
return _EMPTY
399401

400402
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
401-
return set(), set()
403+
return _EMPTY
402404

403405
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
404-
return set(), set()
406+
return _EMPTY
405407

406408
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
407409
if op.dest in self.args:
408410
return set(), {op.dest}
409-
return set(), set()
411+
return _EMPTY
410412

411413
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
412-
return set(), set()
414+
return _EMPTY
413415

414416
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
415-
return set(), set()
417+
return _EMPTY
416418

417419

418420
def analyze_borrowed_arguments(
@@ -435,13 +437,13 @@ def analyze_borrowed_arguments(
435437

436438
class UndefinedVisitor(BaseAnalysisVisitor[Value]):
437439
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
438-
return set(), set()
440+
return _EMPTY
439441

440442
def visit_return(self, op: Return) -> GenAndKill[Value]:
441-
return set(), set()
443+
return _EMPTY
442444

443445
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
444-
return set(), set()
446+
return _EMPTY
445447

446448
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
447449
return set(), {op} if not op.is_void else set()
@@ -453,7 +455,7 @@ def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
453455
return set(), {op.dest}
454456

455457
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
456-
return set(), set()
458+
return _EMPTY
457459

458460

459461
def non_trivial_sources(op: Op) -> set[Value]:
@@ -472,10 +474,10 @@ def visit_return(self, op: Return) -> GenAndKill[Value]:
472474
if not isinstance(op.value, (Integer, Float)):
473475
return {op.value}, set()
474476
else:
475-
return set(), set()
477+
return _EMPTY
476478

477479
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
478-
return set(), set()
480+
return _EMPTY
479481

480482
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
481483
gen = non_trivial_sources(op)
@@ -494,10 +496,10 @@ def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
494496
return non_trivial_sources(op), set()
495497

496498
def visit_inc_ref(self, op: IncRef) -> GenAndKill[Value]:
497-
return set(), set()
499+
return _EMPTY
498500

499501
def visit_dec_ref(self, op: DecRef) -> GenAndKill[Value]:
500-
return set(), set()
502+
return _EMPTY
501503

502504

503505
def analyze_live_regs(blocks: list[BasicBlock], cfg: CFG) -> AnalysisResult[Value]:
@@ -559,8 +561,16 @@ def run_analysis(
559561
ops = list(reversed(ops))
560562
for op in ops:
561563
opgen, opkill = op.accept(gen_and_kill)
562-
gen = (gen - opkill) | opgen
563-
kill = (kill - opgen) | opkill
564+
if opkill:
565+
gen -= opkill
566+
567+
if opgen:
568+
gen |= opgen
569+
kill -= opgen
570+
571+
if opkill:
572+
kill |= opkill
573+
564574
block_gen[block] = gen
565575
block_kill[block] = kill
566576

@@ -624,7 +634,10 @@ def run_analysis(
624634
for idx, op in ops_enum:
625635
op_before[label, idx] = cur
626636
opgen, opkill = op.accept(gen_and_kill)
627-
cur = (cur - opkill) | opgen
637+
if opkill:
638+
cur = cur - opkill
639+
if opgen:
640+
cur = cur | opgen
628641
op_after[label, idx] = cur
629642
if backward:
630643
op_after, op_before = op_before, op_after

mypyc/analysis/selfleaks.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
from __future__ import annotations
22

3-
from mypyc.analysis.dataflow import CFG, MAYBE_ANALYSIS, AnalysisResult, run_analysis
3+
from mypyc.analysis.dataflow import (
4+
CFG,
5+
MAYBE_ANALYSIS,
6+
AnalysisResult,
7+
GenAndKill as _DataflowGenAndKill,
8+
run_analysis,
9+
)
410
from mypyc.ir.ops import (
511
Assign,
612
AssignMulti,
@@ -49,7 +55,7 @@
4955
)
5056
from mypyc.ir.rtypes import RInstance
5157

52-
GenAndKill = tuple[set[None], set[None]]
58+
GenAndKill = _DataflowGenAndKill[None]
5359

5460
CLEAN: GenAndKill = (set(), set())
5561
DIRTY: GenAndKill = ({None}, {None})

0 commit comments

Comments
 (0)