Skip to content

Commit abce326

Browse files
authored
Merge pull request #88 from klauer/html_output
ENH: first pass at html output
2 parents a39b3f1 + 4cd1d06 commit abce326

3 files changed

Lines changed: 183 additions & 1 deletion

File tree

blark/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pathlib
22

3-
from . import _version, plain
3+
from . import _version, html, plain
44
from .parse import get_parser, new_parser, parse_project, parse_source_code
55
from .solution import Project, Solution, TwincatTsProject
66
from .transform import GrammarTransformer
@@ -13,6 +13,7 @@
1313
GRAMMAR_FILENAME = MODULE_PATH / "iec.lark"
1414

1515
plain._register()
16+
html._register()
1617

1718
__all__ = [
1819
"GRAMMAR_FILENAME",

blark/html.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
"""Syntax-highlighted HTML file writer."""
2+
from __future__ import annotations
3+
4+
import collections
5+
import dataclasses
6+
import pathlib
7+
from typing import Any, DefaultDict, Dict, Generator, List, Optional
8+
9+
import lark
10+
11+
from .output import OutputBlock, register_output_handler
12+
13+
14+
@dataclasses.dataclass(frozen=True)
15+
class HighlighterAnnotation:
16+
"""A single HTML tag annotation which applies to a position range of source code."""
17+
18+
name: str
19+
terminal: bool
20+
is_open_tag: bool
21+
other_tag_pos: int
22+
23+
def __str__(self) -> str:
24+
return self.as_string()
25+
26+
def as_string(self, tag: str = "span") -> str:
27+
# some options here?
28+
if not self.is_open_tag:
29+
return f'</{tag}>'
30+
31+
if self.terminal:
32+
classes = " ".join(("term", self.name))
33+
else:
34+
classes = " ".join(("rule", self.name))
35+
36+
return f'<{tag} class="{classes}">'
37+
38+
39+
def _add_annotation_pair(
40+
annotations: DefaultDict[int, List[HighlighterAnnotation]],
41+
name: str,
42+
start_pos: int,
43+
end_pos: int,
44+
terminal: bool,
45+
) -> None:
46+
"""
47+
Add a pair of HTML tag annotations to the position-indexed list.
48+
49+
Parameters
50+
----------
51+
annotations : DefaultDict[int, List[HighlighterAnnotation]]
52+
Annotations keyed by 0-indexed string position.
53+
name : str
54+
Name of the annotation.
55+
start_pos : int
56+
String index position which the annotation applies to.
57+
end_pos : int
58+
String index position which the annotation ends at.
59+
terminal : bool
60+
Whether this is a TERMINAL (True) or a rule (false).
61+
"""
62+
annotations[start_pos].append(
63+
HighlighterAnnotation(
64+
name=name,
65+
terminal=terminal,
66+
is_open_tag=True,
67+
other_tag_pos=end_pos,
68+
)
69+
)
70+
annotations[end_pos].append(
71+
HighlighterAnnotation(
72+
name=name,
73+
terminal=terminal,
74+
is_open_tag=False,
75+
other_tag_pos=start_pos,
76+
)
77+
)
78+
79+
80+
def get_annotations(tree: lark.Tree) -> DefaultDict[int, List[HighlighterAnnotation]]:
81+
"""Get annotations for syntax elements in the given parse tree."""
82+
annotations: DefaultDict[int, List[HighlighterAnnotation]] = collections.defaultdict(
83+
list
84+
)
85+
86+
for subtree in tree.iter_subtrees():
87+
if hasattr(subtree.meta, "start_pos"):
88+
_add_annotation_pair(
89+
annotations,
90+
name=subtree.data,
91+
terminal=False,
92+
start_pos=subtree.meta.start_pos,
93+
end_pos=subtree.meta.end_pos,
94+
)
95+
for child in subtree.children:
96+
if isinstance(child, lark.Token):
97+
if child.start_pos is not None and child.end_pos is not None:
98+
_add_annotation_pair(
99+
annotations,
100+
name=child.type,
101+
terminal=True,
102+
start_pos=child.start_pos,
103+
end_pos=child.end_pos,
104+
)
105+
return annotations
106+
107+
108+
def apply_annotations_to_code(
109+
code: str,
110+
annotations: Dict[int, List[HighlighterAnnotation]]
111+
) -> str:
112+
def annotate() -> Generator[str, None, None]:
113+
pos = 0
114+
for pos, ch in enumerate(code):
115+
for ann in reversed(annotations.get(pos, [])):
116+
yield str(ann)
117+
if ch == " ":
118+
yield "&nbsp;"
119+
else:
120+
yield ch
121+
122+
for ann in annotations.get(pos + 1, []):
123+
yield str(ann)
124+
125+
return "".join(annotate())
126+
127+
128+
@dataclasses.dataclass
129+
class HtmlWriter:
130+
user: Any
131+
source_filename: Optional[pathlib.Path]
132+
block: OutputBlock
133+
134+
@property
135+
def source_code(self) -> str:
136+
"""The source code associated with the block."""
137+
assert self.block.origin is not None
138+
return self.block.origin.source_code
139+
140+
def to_html(self) -> str:
141+
"""HTML tag-annotated source code."""
142+
assert self.block.origin is not None
143+
assert self.block.origin.tree is not None
144+
annotations = get_annotations(self.block.origin.tree)
145+
146+
for comment in self.block.origin.comments:
147+
if comment.start_pos is not None and comment.end_pos is not None:
148+
_add_annotation_pair(
149+
annotations,
150+
name=comment.type,
151+
start_pos=comment.start_pos,
152+
end_pos=comment.end_pos,
153+
terminal=True,
154+
)
155+
156+
return apply_annotations_to_code(self.source_code, annotations)
157+
158+
@staticmethod
159+
def save(
160+
user: Any,
161+
source_filename: Optional[pathlib.Path],
162+
parts: List[OutputBlock],
163+
) -> str:
164+
"""Convert the source code block to HTML and return it."""
165+
result = []
166+
for part in parts:
167+
writer = HtmlWriter(user, source_filename, part)
168+
result.append(writer.to_html())
169+
170+
return "\n\n".join(result)
171+
172+
173+
def _register():
174+
"""Register the HTML output file handlers."""
175+
register_output_handler("html", HtmlWriter.save)
176+
register_output_handler(".htm", HtmlWriter.save)
177+
register_output_handler(".html", HtmlWriter.save)

blark/tests/test_cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def test_blark_main_help(monkeypatch, args: List[str]):
154154
["format", "--debug", "filename"],
155155
id="format-debug",
156156
),
157+
param(
158+
["format", "-o", "html", "filename"],
159+
id="format-html",
160+
),
157161
]
158162
)
159163
def test_blark_main(monkeypatch, input_filename: str, args: List[str], skip_summary: bool):

0 commit comments

Comments
 (0)