Skip to content

Commit 5043b84

Browse files
authored
[mypyc] Add prototype feature to generate annotated html of compiled code (#18828)
Now you can use `mypyc -a x.html foo.py` to compile `foo.py` and generate `x.html`, which contains annotated source code for `foo.py` with some inefficient operations highlighted with a red background. Right now this is close to minimal and only detects a few inefficient operations. I will improve this in follow-up PRs. The overall idea is similar to the Cython `-a` flag or `annotate=True`. Here is an example (scroll down a bit): https://cython.readthedocs.io/en/latest/src/tutorial/cython_tutorial.html#primes The approach here differs in some ways from Cython. First, we only generate a single html file with annotations for all compiled files. I think this will make things easier when compiling a large number of modules. We'll probably need to add some navigation aids to the generated html eventually. Second, instead of showing the C code when encountering inefficient operations, we will generate (hopefully) easy-to-understand hints that explain what is slow (and perhaps why), without requiring any understanding of C.
1 parent e5546fe commit 5043b84

File tree

8 files changed

+233
-4
lines changed

8 files changed

+233
-4
lines changed

mypy/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,11 @@ def add_invertible_flag(
11231123
dest=f"special-opts:{report_type}_report",
11241124
)
11251125

1126+
# Undocumented mypyc feature: generate annotated HTML source file
1127+
report_group.add_argument(
1128+
"-a", dest="mypyc_annotation_file", type=str, default=None, help=argparse.SUPPRESS
1129+
)
1130+
11261131
other_group = parser.add_argument_group(title="Miscellaneous")
11271132
other_group.add_argument("--quickstart-file", help=argparse.SUPPRESS)
11281133
other_group.add_argument("--junit-xml", help="Write junit.xml to the given file")

mypy/options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,9 @@ def __init__(self) -> None:
406406
# Sets custom output format
407407
self.output: str | None = None
408408

409+
# Output html file for mypyc -a
410+
self.mypyc_annotation_file: str | None = None
411+
409412
def use_lowercase_names(self) -> bool:
410413
if self.python_version >= (3, 9):
411414
return not self.force_uppercase_builtins

mypyc/annotate.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
from __future__ import annotations
2+
3+
import os.path
4+
import sys
5+
from html import escape
6+
7+
from mypy.build import BuildResult
8+
from mypy.nodes import MypyFile
9+
from mypy.util import FancyFormatter
10+
from mypyc.ir.func_ir import FuncIR
11+
from mypyc.ir.module_ir import ModuleIR
12+
from mypyc.ir.ops import CallC, LoadLiteral, Value
13+
14+
CSS = """\
15+
.collapsible {
16+
cursor: pointer;
17+
}
18+
19+
.content {
20+
display: block;
21+
margin-top: 10px;
22+
margin-bottom: 10px;
23+
}
24+
25+
.hint {
26+
display: inline;
27+
border: 1px solid #ccc;
28+
padding: 5px;
29+
}
30+
"""
31+
32+
JS = """\
33+
document.querySelectorAll('.collapsible').forEach(function(collapsible) {
34+
collapsible.addEventListener('click', function() {
35+
const content = this.nextElementSibling;
36+
if (content.style.display === 'none') {
37+
content.style.display = 'block';
38+
} else {
39+
content.style.display = 'none';
40+
}
41+
});
42+
});
43+
"""
44+
45+
46+
class AnnotatedSource:
47+
def __init__(self, path: str, annotations: dict[int, list[str]]) -> None:
48+
self.path = path
49+
self.annotations = annotations
50+
51+
52+
def generate_annotated_html(
53+
html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR]
54+
) -> None:
55+
annotations = []
56+
for mod, mod_ir in modules.items():
57+
path = result.graph[mod].path
58+
tree = result.graph[mod].tree
59+
assert tree is not None
60+
annotations.append(generate_annotations(path or "<source>", tree, mod_ir))
61+
html = generate_html_report(annotations)
62+
with open(html_fnam, "w") as f:
63+
f.write(html)
64+
65+
formatter = FancyFormatter(sys.stdout, sys.stderr, False)
66+
formatted = formatter.style(os.path.abspath(html_fnam), "none", underline=True, bold=True)
67+
print(f"\nWrote {formatted} -- open in browser to view\n")
68+
69+
70+
def generate_annotations(path: str, tree: MypyFile, ir: ModuleIR) -> AnnotatedSource:
71+
anns = {}
72+
for func_ir in ir.functions:
73+
anns.update(function_annotations(func_ir))
74+
return AnnotatedSource(path, anns)
75+
76+
77+
def function_annotations(func_ir: FuncIR) -> dict[int, list[str]]:
78+
# TODO: check if func_ir.line is -1
79+
anns: dict[int, list[str]] = {}
80+
for block in func_ir.blocks:
81+
for op in block.ops:
82+
if isinstance(op, CallC):
83+
name = op.function_name
84+
ann = None
85+
if name == "CPyObject_GetAttr":
86+
attr_name = get_str_literal(op.args[1])
87+
if attr_name:
88+
ann = f'Get non-native attribute "{attr_name}".'
89+
else:
90+
ann = "Dynamic attribute lookup."
91+
elif name == "PyNumber_Add":
92+
ann = 'Generic "+" operation.'
93+
if ann:
94+
anns.setdefault(op.line, []).append(ann)
95+
return anns
96+
97+
98+
def get_str_literal(v: Value) -> str | None:
99+
if isinstance(v, LoadLiteral) and isinstance(v.value, str):
100+
return v.value
101+
return None
102+
103+
104+
def generate_html_report(sources: list[AnnotatedSource]) -> str:
105+
html = []
106+
html.append("<html>\n<head>\n")
107+
html.append(f"<style>\n{CSS}\n</style>")
108+
html.append("</head>\n")
109+
html.append("<body>\n")
110+
for src in sources:
111+
html.append(f"<h2><tt>{src.path}</tt></h2>\n")
112+
html.append("<pre>")
113+
anns = src.annotations
114+
with open(src.path) as f:
115+
lines = f.readlines()
116+
for i, s in enumerate(lines):
117+
s = escape(s)
118+
line = i + 1
119+
linenum = "%5d" % line
120+
if line in anns:
121+
hint = " ".join(anns[line])
122+
s = colorize_line(linenum, s, hint_html=hint)
123+
else:
124+
s = linenum + " " + s
125+
html.append(s)
126+
html.append("</pre>")
127+
128+
html.append("<script>")
129+
html.append(JS)
130+
html.append("</script>")
131+
132+
html.append("</body></html>\n")
133+
return "".join(html)
134+
135+
136+
def colorize_line(linenum: str, s: str, hint_html: str) -> str:
137+
hint_prefix = " " * len(linenum) + " "
138+
line_span = f'<div class="collapsible" style="background-color: #fcc">{linenum} {s}</div>'
139+
hint_div = f'<div class="content">{hint_prefix}<div class="hint">{hint_html}</div></div>'
140+
return f"<span>{line_span}{hint_div}</span>"

mypyc/build.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from mypy.main import process_options
3535
from mypy.options import Options
3636
from mypy.util import write_junit_xml
37+
from mypyc.annotate import generate_annotated_html
3738
from mypyc.codegen import emitmodule
3839
from mypyc.common import RUNTIME_C_FILES, shared_lib_name
3940
from mypyc.errors import Errors
@@ -253,6 +254,9 @@ def generate_c(
253254
if compiler_options.verbose:
254255
print(f"Compiled to C in {t2 - t1:.3f}s")
255256

257+
if options.mypyc_annotation_file:
258+
generate_annotated_html(options.mypyc_annotation_file, result, modules)
259+
256260
return ctext, "\n".join(format_modules(modules))
257261

258262

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[case testAnnotateNonNativeAttribute]
2+
def f(x):
3+
return x.foo
4+
5+
class C:
6+
foo: int
7+
8+
def g(x: C) -> int:
9+
return x.foo
10+
[out]
11+
2: Get non-native attribute "foo".
12+
13+
[case testAnnotateGenericAdd]
14+
def f(x):
15+
return x + 1
16+
17+
def g(x: int) -> int:
18+
return x + 1
19+
[out]
20+
2: Generic "+" operation.
21+
22+
[case testAnnotateTwoOperationsOnLine]
23+
def f(x):
24+
return x.foo + 1
25+
[out]
26+
2: Get non-native attribute "foo". Generic "+" operation.

mypyc/test/test_alwaysdefined.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
3131
return
3232
with use_custom_builtins(os.path.join(self.data_prefix, ICODE_GEN_BUILTINS), testcase):
3333
try:
34-
ir = build_ir_for_single_file2(testcase.input, options)
34+
ir = build_ir_for_single_file2(testcase.input, options)[0]
3535
except CompileError as e:
3636
actual = e.messages
3737
else:

mypyc/test/test_annotate.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Test cases for annotating source code to highlight inefficiencies."""
2+
3+
from __future__ import annotations
4+
5+
import os.path
6+
7+
from mypy.errors import CompileError
8+
from mypy.test.config import test_temp_dir
9+
from mypy.test.data import DataDrivenTestCase
10+
from mypyc.annotate import generate_annotations
11+
from mypyc.test.testutil import (
12+
ICODE_GEN_BUILTINS,
13+
MypycDataSuite,
14+
assert_test_output,
15+
build_ir_for_single_file2,
16+
infer_ir_build_options_from_test_name,
17+
remove_comment_lines,
18+
use_custom_builtins,
19+
)
20+
21+
files = ["annotate-basic.test"]
22+
23+
24+
class TestReport(MypycDataSuite):
25+
files = files
26+
base_path = test_temp_dir
27+
optional_out = True
28+
29+
def run_case(self, testcase: DataDrivenTestCase) -> None:
30+
"""Perform a runtime checking transformation test case."""
31+
options = infer_ir_build_options_from_test_name(testcase.name)
32+
if options is None:
33+
# Skipped test case
34+
return
35+
with use_custom_builtins(os.path.join(self.data_prefix, ICODE_GEN_BUILTINS), testcase):
36+
expected_output = remove_comment_lines(testcase.output)
37+
try:
38+
ir, tree = build_ir_for_single_file2(testcase.input, options)
39+
except CompileError as e:
40+
actual = e.messages
41+
else:
42+
annotations = generate_annotations("native.py", tree, ir)
43+
actual = []
44+
for line, line_anns in annotations.annotations.items():
45+
s = " ".join(line_anns)
46+
actual.append(f"{line}: {s}")
47+
48+
assert_test_output(testcase, actual, "Invalid source code output", expected_output)

mypyc/test/testutil.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from mypy import build
1414
from mypy.errors import CompileError
15+
from mypy.nodes import MypyFile
1516
from mypy.options import Options
1617
from mypy.test.config import test_temp_dir
1718
from mypy.test.data import DataDrivenTestCase, DataSuite
@@ -93,12 +94,12 @@ def perform_test(
9394
def build_ir_for_single_file(
9495
input_lines: list[str], compiler_options: CompilerOptions | None = None
9596
) -> list[FuncIR]:
96-
return build_ir_for_single_file2(input_lines, compiler_options).functions
97+
return build_ir_for_single_file2(input_lines, compiler_options)[0].functions
9798

9899

99100
def build_ir_for_single_file2(
100101
input_lines: list[str], compiler_options: CompilerOptions | None = None
101-
) -> ModuleIR:
102+
) -> tuple[ModuleIR, MypyFile]:
102103
program_text = "\n".join(input_lines)
103104

104105
# By default generate IR compatible with the earliest supported Python C API.
@@ -137,7 +138,9 @@ def build_ir_for_single_file2(
137138
module = list(modules.values())[0]
138139
for fn in module.functions:
139140
assert_func_ir_valid(fn)
140-
return module
141+
tree = result.graph[module.fullname].tree
142+
assert tree is not None
143+
return module, tree
141144

142145

143146
def update_testcase_output(testcase: DataDrivenTestCase, output: list[str]) -> None:

0 commit comments

Comments
 (0)