-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdaft_file_code.py
More file actions
68 lines (56 loc) · 1.86 KB
/
Copy pathdaft_file_code.py
File metadata and controls
68 lines (56 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# /// script
# description = "Extract Python functions from code files using daft.File"
# requires-python = ">=3.12, <3.13"
# dependencies = ["daft>=0.7.10"]
# ///
import daft
from daft import DataType, col
from daft.functions import file as daft_file
from daft.functions import unnest
@daft.func(
return_dtype=DataType.list(
DataType.struct(
{
"name": DataType.string(),
"signature": DataType.string(),
"docstring": DataType.string(),
"start_line": DataType.int64(),
"end_line": DataType.int64(),
}
)
),
on_error="log",
)
def extract_functions(file: daft.File):
"""Extract all function definitions from a Python file."""
import ast
with file.open() as f:
file_content = f.read().decode("utf-8")
tree = ast.parse(file_content)
results = []
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
signature = f"def {node.name}({ast.unparse(node.args)})"
if node.returns:
signature += f" -> {ast.unparse(node.returns)}"
results.append(
{
"name": node.name,
"signature": signature,
"docstring": ast.get_docstring(node),
"start_line": node.lineno,
"end_line": node.end_lineno,
}
)
return results
if __name__ == "__main__":
from daft import col
# Discover Python files from this repo's examples
df = (
daft.from_glob_path("examples/**/*.py")
.with_column("file", daft_file(col("path")))
.with_column("functions", extract_functions(col("file")))
.explode("functions")
.select("path", "size", unnest(col("functions")))
)
df.show(3)