Skip to content

Commit a50fb85

Browse files
committed
Harden diagram editor network and file security
- Add diagram_net_utils.py with SSRF prevention: block file/ftp schemes, reject private/loopback/link-local IPs, enforce 20 MB download limit and connection timeout - URL image download now routes through safe_download_image() - Image reload from saved JSON validates file extension allowlist - Add Security section to CLAUDE.md with full checklist
1 parent 987252e commit a50fb85

4 files changed

Lines changed: 121 additions & 7 deletions

File tree

CLAUDE.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,35 @@ python -m pybreeze # launch the IDE
8585
- Logging: use `pybreeze_logger` from `pybreeze.utils.logging.logger`
8686
- Plugin API: `register_programming_language()` and `register_natural_language()` from `je_editor.plugins`
8787

88+
## Security
89+
90+
All code must follow secure-by-default principles. Review every change against the checklist below before committing.
91+
92+
### General rules
93+
- Never use `eval()`, `exec()`, or `pickle.loads()` on untrusted data
94+
- Never use `subprocess.Popen(..., shell=True)` — always pass argument lists
95+
- Never log or display secrets, tokens, passwords, or API keys
96+
- Use `json.loads()` / `json.dumps()` for serialisation — never pickle
97+
- Validate all user input at system boundaries (file dialogs, URL inputs, network data)
98+
99+
### Network requests (SSRF prevention)
100+
- All outbound HTTP requests must go through `diagram_net_utils.safe_download_image()` or equivalent guards
101+
- Only `http://` and `https://` schemes are allowed — block `file://`, `ftp://`, `data:`, `gopher://`
102+
- Resolved IP addresses must be checked against private/loopback/link-local ranges (`ipaddress.is_private`, `is_loopback`, `is_link_local`, `is_reserved`)
103+
- Enforce download size limits (default: 20 MB) and connection timeouts (default: 15s)
104+
- Never pass user-supplied URLs directly to `urlopen()` without validation
105+
106+
### File I/O
107+
- File read/write paths from user dialogs (`QFileDialog`) are trusted (user-initiated)
108+
- File paths loaded from saved data (`.diagram.json`) must be validated before access:
109+
- Local paths: check `path.is_file()` and verify extension is in an allowlist
110+
- URLs: pass through the same SSRF validation as user-entered URLs
111+
- Never construct file paths by string concatenation with user input — use `pathlib.Path` with validation
112+
113+
### Qt / UI
114+
- `QGraphicsTextItem` with `TextEditorInteraction` must not be enabled by default — use double-click-to-edit pattern to prevent unintended text selection issues in themed environments
115+
- Plugin loading (`jeditor_plugins/`) uses auto-discovery — only load `.py` files, skip files starting with `_` or `.`
116+
88117
## Commit & PR rules
89118

90119
- Commit messages: short imperative sentence (e.g., "Update stable version", "Fix github actions")

pybreeze/pybreeze_ui/diagram_editor/diagram_editor_widget.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,8 @@ def _add_image_from_url(self) -> None:
535535
return
536536
url = url.strip()
537537
try:
538-
from urllib.request import urlopen
539-
data = urlopen(url, timeout=15).read()
538+
from pybreeze.pybreeze_ui.diagram_editor.diagram_net_utils import safe_download_image
539+
data = safe_download_image(url)
540540
from PySide6.QtGui import QPixmap
541541
pix = QPixmap()
542542
pix.loadFromData(data)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""Network utilities for the diagram editor with security hardening.
2+
3+
Security measures:
4+
- Only ``http`` and ``https`` schemes are allowed (blocks ``file://``, ``ftp://``, etc.)
5+
- Resolved IPs are checked against private/loopback ranges to prevent SSRF
6+
- Downloads are capped at ``MAX_DOWNLOAD_BYTES`` to prevent memory exhaustion
7+
- Connection timeout is enforced
8+
"""
9+
from __future__ import annotations
10+
11+
import ipaddress
12+
import socket
13+
from urllib.parse import urlparse
14+
from urllib.request import Request, urlopen
15+
16+
MAX_DOWNLOAD_BYTES = 20 * 1024 * 1024 # 20 MB
17+
TIMEOUT_SECONDS = 15
18+
_ALLOWED_SCHEMES = {"http", "https"}
19+
20+
21+
class ImageDownloadError(Exception):
22+
pass
23+
24+
25+
def _validate_url(url: str) -> str:
26+
"""Validate URL scheme and resolve hostname to block private/loopback IPs."""
27+
parsed = urlparse(url)
28+
29+
# Scheme check
30+
if parsed.scheme.lower() not in _ALLOWED_SCHEMES:
31+
raise ImageDownloadError(
32+
f"Scheme '{parsed.scheme}' is not allowed. Use http or https."
33+
)
34+
35+
# Hostname check
36+
hostname = parsed.hostname
37+
if not hostname:
38+
raise ImageDownloadError("URL has no hostname.")
39+
40+
# Resolve and check for private/loopback IPs (SSRF prevention)
41+
try:
42+
infos = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
43+
except socket.gaierror as e:
44+
raise ImageDownloadError(f"Cannot resolve hostname '{hostname}': {e}") from e
45+
46+
for family, _, _, _, sockaddr in infos:
47+
ip = ipaddress.ip_address(sockaddr[0])
48+
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
49+
raise ImageDownloadError(
50+
f"Access to private/internal address {ip} is blocked."
51+
)
52+
53+
return url
54+
55+
56+
def safe_download_image(url: str) -> bytes:
57+
"""Download image data from *url* with security and size guards.
58+
59+
Raises ``ImageDownloadError`` on validation failure or oversized response.
60+
"""
61+
url = _validate_url(url)
62+
63+
req = Request(url, headers={"User-Agent": "PyBreeze-DiagramEditor/1.0"})
64+
resp = urlopen(req, timeout=TIMEOUT_SECONDS) # noqa: S310 — URL validated above
65+
66+
# Check Content-Length header if available
67+
content_length = resp.headers.get("Content-Length")
68+
if content_length is not None and int(content_length) > MAX_DOWNLOAD_BYTES:
69+
raise ImageDownloadError(
70+
f"Image too large ({int(content_length)} bytes, max {MAX_DOWNLOAD_BYTES})."
71+
)
72+
73+
# Read with size limit
74+
data = resp.read(MAX_DOWNLOAD_BYTES + 1)
75+
if len(data) > MAX_DOWNLOAD_BYTES:
76+
raise ImageDownloadError(
77+
f"Image exceeds {MAX_DOWNLOAD_BYTES // (1024 * 1024)} MB limit."
78+
)
79+
80+
return data

pybreeze/pybreeze_ui/diagram_editor/diagram_scene.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -574,19 +574,24 @@ def _load_items(self, data: dict) -> None:
574574
self._try_load_image_source(img, source)
575575

576576
def _try_load_image_source(self, img: DiagramImage, source: str) -> None:
577-
"""Load pixmap from local path or URL into a DiagramImage."""
577+
"""Load pixmap from local path or URL into a DiagramImage.
578+
579+
Local paths are restricted to existing image files.
580+
URLs are validated and size-limited via ``safe_download_image``.
581+
"""
578582
from pathlib import Path
579583
path = Path(source)
580-
if path.is_file():
584+
# Only load if the file actually exists and has an image extension
585+
_IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".bmp", ".gif", ".svg", ".webp", ".ico"}
586+
if path.is_file() and path.suffix.lower() in _IMAGE_SUFFIXES:
581587
pix = QPixmap(str(path))
582588
if not pix.isNull():
583589
img.set_pixmap(pix, source)
584590
return
585-
# Try as URL (non-blocking would be better, but keep it simple)
586591
if source.startswith(("http://", "https://")):
587592
try:
588-
from urllib.request import urlopen
589-
data = urlopen(source, timeout=10).read()
593+
from pybreeze.pybreeze_ui.diagram_editor.diagram_net_utils import safe_download_image
594+
data = safe_download_image(source)
590595
pix = QPixmap()
591596
pix.loadFromData(data)
592597
if not pix.isNull():

0 commit comments

Comments
 (0)