Skip to content

Commit 4a6fb52

Browse files
authored
fix(directory-scanner): detect marimo notebooks with long module docstrings (#9647) (#9652)
1 parent 70e9a6a commit 4a6fb52

2 files changed

Lines changed: 194 additions & 210 deletions

File tree

marimo/_server/files/directory_scanner.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,49 @@ def is_marimo_app(full_path: str) -> bool:
1919
Detect whether a file is a marimo app.
2020
2121
Rules:
22-
- Markdown (`.md`/`.qmd`) files are marimo apps if the first 512 bytes
23-
contain `marimo-version:`.
24-
- Python (`.py`) files are marimo apps if the header (first 512 bytes)
25-
contains both `marimo.App` and `import marimo`.
26-
- If the header contains `# /// script`, read the full file and check for
27-
the same Python markers, to handle large script headers.
22+
- Markdown (`.md`/`.qmd`) files are marimo apps if they contain
23+
`marimo-version:` (frontmatter marker).
24+
- Python (`.py`) files are marimo apps if they contain both
25+
`marimo.App` and `import marimo`.
26+
- In both cases the first 512 bytes are scanned first (fast path);
27+
on a miss we read up to 1 MB of the file looking for the markers.
28+
Above `import marimo` there's only ever a shebang, comments, a
29+
module docstring, and/or a `# /// script` block — none of which
30+
realistically exceed a few hundred KB.
2831
- Any errors while reading result in `False`.
2932
"""
30-
READ_LIMIT = 512
31-
32-
def contains_marimo_app(content: bytes) -> bool:
33-
return b"marimo.App" in content and b"import marimo" in content
33+
FAST_PATH_BYTES = 512
34+
# Cap on how far we'll read looking for markers. Marimo notebooks
35+
# put `import marimo` near the top of the file, so this is just a
36+
# guard against scanning huge unrelated Python files in full.
37+
MAX_SCAN_BYTES = 1 * 1024 * 1024 # 1 MB
3438

3539
try:
3640
path = MarimoPath(full_path)
3741

38-
# Fast extension check to avoid I/O for unrelated files
39-
if not path.is_python() and not path.is_markdown():
42+
# Fast extension check to avoid I/O for unrelated files.
43+
if path.is_markdown():
44+
markers: tuple[bytes, ...] = (b"marimo-version:",)
45+
elif path.is_python():
46+
markers = (b"import marimo", b"marimo.App")
47+
else:
4048
return False
4149

42-
with open(full_path, "rb") as f:
43-
header = f.read(READ_LIMIT)
44-
45-
if path.is_markdown():
46-
return b"marimo-version:" in header
50+
def matches(content: bytes) -> bool:
51+
return all(m in content for m in markers)
4752

48-
if path.is_python():
49-
if contains_marimo_app(header):
53+
with open(full_path, "rb") as f:
54+
header = f.read(FAST_PATH_BYTES)
55+
if matches(header):
5056
return True
51-
52-
if b"# /// script" in header:
53-
full_content = path.read_bytes()
54-
if contains_marimo_app(full_content):
55-
return True
56-
57-
return False
57+
# Fast path missed. If the file is smaller than the window,
58+
# we've already seen everything.
59+
if len(header) < FAST_PATH_BYTES:
60+
return False
61+
# Read further, bounded by MAX_SCAN_BYTES. If markers are
62+
# past that, the file isn't shaped like a marimo notebook.
63+
rest = f.read(MAX_SCAN_BYTES - FAST_PATH_BYTES)
64+
return matches(header + rest)
5865
except Exception as e:
5966
LOGGER.debug("Error reading file %s: %s", full_path, e)
6067
return False

0 commit comments

Comments
 (0)