@@ -19,42 +19,49 @@ def is_marimo_app(full_path: str) -> bool:
1919 Detect whether a file is a marimo app.
2020
2121 Rules:
22- - Markdown (`.md`/`.qmd`) files are marimo apps if the first 512 bytes
23- contain `marimo-version:`.
24- - Python (`.py`) files are marimo apps if the header (first 512 bytes)
25- contains both `marimo.App` and `import marimo`.
26- - If the header contains `# /// script`, read the full file and check for
27- the same Python markers, to handle large script headers.
22+ - Markdown (`.md`/`.qmd`) files are marimo apps if they contain
23+ `marimo-version:` (frontmatter marker).
24+ - Python (`.py`) files are marimo apps if they contain both
25+ `marimo.App` and `import marimo`.
26+ - In both cases the first 512 bytes are scanned first (fast path);
27+ on a miss we read up to 1 MB of the file looking for the markers.
28+ Above `import marimo` there's only ever a shebang, comments, a
29+ module docstring, and/or a `# /// script` block — none of which
30+ realistically exceed a few hundred KB.
2831 - Any errors while reading result in `False`.
2932 """
30- READ_LIMIT = 512
31-
32- def contains_marimo_app (content : bytes ) -> bool :
33- return b"marimo.App" in content and b"import marimo" in content
33+ FAST_PATH_BYTES = 512
34+ # Cap on how far we'll read looking for markers. Marimo notebooks
35+ # put `import marimo` near the top of the file, so this is just a
36+ # guard against scanning huge unrelated Python files in full.
37+ MAX_SCAN_BYTES = 1 * 1024 * 1024 # 1 MB
3438
3539 try :
3640 path = MarimoPath (full_path )
3741
38- # Fast extension check to avoid I/O for unrelated files
39- if not path .is_python () and not path .is_markdown ():
42+ # Fast extension check to avoid I/O for unrelated files.
43+ if path .is_markdown ():
44+ markers : tuple [bytes , ...] = (b"marimo-version:" ,)
45+ elif path .is_python ():
46+ markers = (b"import marimo" , b"marimo.App" )
47+ else :
4048 return False
4149
42- with open (full_path , "rb" ) as f :
43- header = f .read (READ_LIMIT )
44-
45- if path .is_markdown ():
46- return b"marimo-version:" in header
50+ def matches (content : bytes ) -> bool :
51+ return all (m in content for m in markers )
4752
48- if path .is_python ():
49- if contains_marimo_app (header ):
53+ with open (full_path , "rb" ) as f :
54+ header = f .read (FAST_PATH_BYTES )
55+ if matches (header ):
5056 return True
51-
52- if b"# /// script" in header :
53- full_content = path .read_bytes ()
54- if contains_marimo_app (full_content ):
55- return True
56-
57- return False
57+ # Fast path missed. If the file is smaller than the window,
58+ # we've already seen everything.
59+ if len (header ) < FAST_PATH_BYTES :
60+ return False
61+ # Read further, bounded by MAX_SCAN_BYTES. If markers are
62+ # past that, the file isn't shaped like a marimo notebook.
63+ rest = f .read (MAX_SCAN_BYTES - FAST_PATH_BYTES )
64+ return matches (header + rest )
5865 except Exception as e :
5966 LOGGER .debug ("Error reading file %s: %s" , full_path , e )
6067 return False
0 commit comments