|
29 | 29 | # Stores {puml_basename: (docname, anchor_id_or_None)} |
30 | 30 | _ENV_PUML_DOCNAMES = "clickable_plantuml_puml_docnames" |
31 | 31 |
|
32 | | -# Characters allowed in PlantUML alias identifiers. |
33 | | -_ALIAS_SAFE_RE = re.compile(r"^[\w.]+$") |
| 32 | +# Simple PlantUML alias: alphanumeric + underscore only (matches ALIAS_ID grammar rule). |
| 33 | +_ALIAS_SIMPLE_RE = re.compile(r"^[\w]+$") |
| 34 | +# Extended simple alias: also allows hyphens and dots (common in FQN-style identifiers). |
| 35 | +_ALIAS_EXTENDED_RE = re.compile(r"^[\w][\w.\-]*$") |
| 36 | +# Quoted PlantUML alias: word chars, spaces, hyphens, and dots |
| 37 | +# (injected as "name" in url-of directives). |
| 38 | +_ALIAS_QUOTED_RE = re.compile(r"^[\w\s.\-]+$") |
34 | 39 |
|
35 | 40 |
|
36 | 41 | def _find_parent_section_id(node: nodes.Node) -> str | None: |
@@ -100,20 +105,45 @@ def _collect_link_data(source_dir: Path) -> dict[str, dict[str, Any]]: |
100 | 105 | # --------------------------------------------------------------------------- |
101 | 106 |
|
102 | 107 |
|
| 108 | +def _format_alias_part(alias: str) -> str | None: |
| 109 | + """Return the PlantUML-safe representation of *alias*, or ``None`` if unsafe. |
| 110 | +
|
| 111 | + Simple identifiers (``[A-Za-z0-9_]+``) are returned as-is. |
| 112 | + Extended identifiers (word chars, hyphens, dots) are returned as-is |
| 113 | + (PlantUML accepts them in ``url of`` without quoting). |
| 114 | + Names that contain spaces are wrapped in double-quotes. |
| 115 | + Anything else is rejected to prevent injection into the ``url of`` directive. |
| 116 | + """ |
| 117 | + if _ALIAS_SIMPLE_RE.match(alias): |
| 118 | + return alias |
| 119 | + if _ALIAS_EXTENDED_RE.match(alias): |
| 120 | + return alias |
| 121 | + stripped = alias.strip() |
| 122 | + if _ALIAS_QUOTED_RE.match(alias) and stripped: |
| 123 | + return f'"{stripped}"' |
| 124 | + return None |
| 125 | + |
| 126 | + |
103 | 127 | def _inject_links_into_uml(uml_content: str, links: dict[str, str]) -> str: |
104 | | - """Append ``url of <alias> is [[url]]`` directives before ``@enduml``.""" |
| 128 | + """Append ``url of <alias> is [[url{}{_top}]]`` directives before ``@enduml``. |
| 129 | +
|
| 130 | + The ``{_top}`` window target ensures that clickable links inside an SVG |
| 131 | + embedded via ``<object>`` navigate the top-level browser frame rather than |
| 132 | + the object's own browsing context. |
| 133 | + """ |
105 | 134 | if not links: |
106 | 135 | return uml_content |
107 | | - safe_links = { |
108 | | - alias: url |
109 | | - for alias, url in links.items() |
110 | | - if _ALIAS_SAFE_RE.match(alias) and "]]" not in url |
111 | | - } |
112 | | - if not safe_links: |
| 136 | + url_directives_list = [] |
| 137 | + for alias, url in links.items(): |
| 138 | + if "]]" in url: |
| 139 | + continue |
| 140 | + alias_part = _format_alias_part(alias) |
| 141 | + if alias_part is None: |
| 142 | + continue |
| 143 | + url_directives_list.append(f"url of {alias_part} is [[{url}{{}}{{_top}}]]") |
| 144 | + if not url_directives_list: |
113 | 145 | return uml_content |
114 | | - url_directives = "\n".join( |
115 | | - f"url of {alias} is [[{url}]]" for alias, url in safe_links.items() |
116 | | - ) |
| 146 | + url_directives = "\n".join(url_directives_list) |
117 | 147 | enduml_match = re.search(r"^\s*@enduml\s*$", uml_content, re.MULTILINE) |
118 | 148 | if enduml_match: |
119 | 149 | prefix = uml_content[: enduml_match.start()] |
|
0 commit comments