Skip to content

Commit df345a2

Browse files
refactor query graph and enable profiling for a connection (#140)
The idea of this PR is to make this tool a bit more useful (and visually appealing). I think generally it is good to have some better profiling tools to explore queries. **EDIT**: Initially it was only about the query graph visuals, but I decided to expose the profiling functions enabled in the C++ API (similar to what the Go client does with the C API). To use the tool: 1. Build this branch (see https://duckdb.org/docs/stable/dev/building/python) 2. Run the following in DuckDB ```sql PRAGMA enable_profiling = 'json'; PRAGMA profiling_output = './tmp/profile.json'; SELECT ... FROM ...; ``` 3. Run the script (I do it with uv): ```bash uv run -m duckdb.query_graph --profile_input profile.json ``` If you want to use the profiling within the client: ```python import duckdb from duckdb.query_graph import ProfilingInfo con = duckdb.connect() con.enable_profiling() con.execute("select 42").fetchall() #or some other eager operation profiling_info = ProfilingInfo(con) # then the options are profiling_info.to_json() profiling_info.to_pydict() profiling_info.to_html() # optionally con.disable_profiling() ``` It is also possible to just provide the profiling info as a file: ```python # we create profiling info from a file instead of binding it to a connection profiling_info = ProfilingInfo(from_file = 'profile.json') # then the stuff profiling_info.to_html(output_file='profile.html') ``` Which should yield something like the following screenshot: <img width="1023" height="814" alt="Screenshot 2026-01-09 at 11 09 21" src="https://github.com/user-attachments/assets/44de8776-18b2-4320-8c90-4b9cfec40cd1" /> <img width="993" height="701" alt="Screenshot 2026-01-09 at 11 09 44" src="https://github.com/user-attachments/assets/d17ffc91-0354-4ed1-824c-b44018be1a74" /> <img width="208" height="568" alt="Screenshot 2026-01-09 at 11 10 05" src="https://github.com/user-attachments/assets/307f5558-9a6e-4c31-b290-a90a84283014" />
2 parents 510ebd8 + 34ef0d7 commit df345a2

11 files changed

Lines changed: 726 additions & 245 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
.sw?
1515
#OS X specific files.
1616
.DS_store
17+
#VSCode specifics
18+
.vscode/
1719

1820
#==============================================================================#
1921
# Build artifacts
@@ -45,6 +47,7 @@ cmake-build-release
4547
cmake-build-relwithdebinfo
4648
duckdb_packaging/duckdb_version.txt
4749
test.db
50+
tmp/
4851

4952
#==============================================================================#
5053
# Python

_duckdb-stubs/__init__.pyi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ __all__: list[str] = [
8686
"default_connection",
8787
"description",
8888
"df",
89+
"disable_profiling",
8990
"distinct",
9091
"dtype",
9192
"duplicate",
93+
"enable_profiling",
9294
"enum_type",
9395
"execute",
9496
"executemany",
@@ -109,6 +111,7 @@ __all__: list[str] = [
109111
"from_df",
110112
"from_parquet",
111113
"from_query",
114+
"get_profiling_information",
112115
"get_table_names",
113116
"install_extension",
114117
"interrupt",
@@ -313,6 +316,9 @@ class DuckDBPyConnection:
313316
repository_url: str | None = None,
314317
version: str | None = None,
315318
) -> None: ...
319+
def get_profiling_information(self, format: str = "json") -> str: ...
320+
def enable_profiling(self) -> None: ...
321+
def disable_profiling(self) -> None: ...
316322
def interrupt(self) -> None: ...
317323
def list_filesystems(self) -> list[str]: ...
318324
def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ...
@@ -1250,6 +1256,9 @@ def limit(
12501256
*,
12511257
connection: DuckDBPyConnection | None = None,
12521258
) -> DuckDBPyRelation: ...
1259+
def get_profiling_information(*, connection: DuckDBPyConnection | None = None, format: str = "json") -> str: ...
1260+
def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ...
1261+
def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ...
12531262
def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> list[str]: ...
12541263
def list_type(
12551264
type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None

duckdb/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,11 @@
8484
default_connection,
8585
description,
8686
df,
87+
disable_profiling,
8788
distinct,
8889
dtype,
8990
duplicate,
91+
enable_profiling,
9092
enum_type,
9193
execute,
9294
executemany,
@@ -107,6 +109,7 @@
107109
from_df,
108110
from_parquet,
109111
from_query,
112+
get_profiling_information,
110113
get_table_names,
111114
install_extension,
112115
interrupt,
@@ -310,9 +313,11 @@
310313
"default_connection",
311314
"description",
312315
"df",
316+
"disable_profiling",
313317
"distinct",
314318
"dtype",
315319
"duplicate",
320+
"enable_profiling",
316321
"enum_type",
317322
"execute",
318323
"executemany",
@@ -333,6 +338,7 @@
333338
"from_df",
334339
"from_parquet",
335340
"from_query",
341+
"get_profiling_information",
336342
"get_table_names",
337343
"install_extension",
338344
"interrupt",

duckdb/query_graph/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .__main__ import ProfilingInfo # noqa: D104
2+
3+
__all__ = ["ProfilingInfo"]

0 commit comments

Comments
 (0)