Skip to content

Commit d0f1d3a

Browse files
authored
feat!: Add column statistics to query inspection (#18)
1 parent 460ea15 commit d0f1d3a

4 files changed

Lines changed: 72 additions & 32 deletions

File tree

sqlcompyre/analysis/query_inspection.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# Copyright (c) QuantCo 2024-2025
22
# SPDX-License-Identifier: BSD-3-Clause
33

4+
from __future__ import annotations
45

6+
from dataclasses import dataclass
57
from functools import cached_property, lru_cache
8+
from typing import Any
69

710
import sqlalchemy as sa
811

@@ -15,21 +18,21 @@ class QueryInspection:
1518
or :meth:`~sqlcompyre.api.inspect_table` functions instead.
1619
"""
1720

18-
def __init__(self, engine: sa.Engine, selectable: sa.Select):
21+
def __init__(self, engine: sa.Engine, query: sa.FromClause):
1922
"""
2023
Args:
2124
engine: The engine to use for connecting to the database.
2225
query: The query whose results to inspect.
2326
"""
2427
self.engine = engine
25-
self.query = selectable
28+
self.query = query
2629

2730
@cached_property
2831
def row_count(self) -> int:
2932
"""Get the number of rows returned by the query."""
3033
with self.engine.connect() as conn:
3134
return conn.execute(
32-
sa.select(sa.func.count()).select_from(self.query.subquery())
35+
sa.select(sa.func.count()).select_from(self.query)
3336
).scalar_one()
3437

3538
@lru_cache
@@ -45,14 +48,53 @@ def distinct_row_count(self, *columns: str) -> int:
4548
"""
4649

4750
if len(columns) == 0:
48-
data_query = self.query.distinct()
51+
data_query = sa.select(self.query).distinct()
4952

5053
else:
51-
subquery = self.query.subquery()
5254
data_query = (
53-
sa.select(sa.text(", ".join(columns))).distinct().select_from(subquery)
55+
sa.select(sa.text(", ".join(columns)))
56+
.distinct()
57+
.select_from(self.query)
5458
)
5559

5660
count_query = sa.select(sa.func.count()).select_from(data_query.subquery())
5761
with self.engine.connect() as conn:
5862
return conn.execute(count_query).scalar_one()
63+
64+
@lru_cache
65+
def column_stats(self, column: str) -> ColumnStats:
66+
"""Obtain statistics about a single column.
67+
68+
Args:
69+
column: The name of the column to obtain information about.
70+
71+
Returns:
72+
An object providing access to column statistics.
73+
"""
74+
return ColumnStats(self.engine, self.query.c[column])
75+
76+
77+
# ----------------------------------------- COLUMN STATS ---------------------------------------- #
78+
79+
80+
@dataclass
81+
class ColumnStats:
82+
"""Obtain statistics about column values in a table."""
83+
84+
def __init__(self, engine: sa.Engine, column: sa.ColumnElement):
85+
self.engine = engine
86+
self.column = column
87+
88+
@cached_property
89+
def min(self) -> Any | None:
90+
"""The minimum value in the column."""
91+
query = sa.select(sa.func.min(self.column))
92+
with self.engine.connect() as conn:
93+
return conn.execute(query).scalar()
94+
95+
@cached_property
96+
def max(self) -> Any | None:
97+
"""The maximum value in the column."""
98+
query = sa.select(sa.func.max(self.column))
99+
with self.engine.connect() as conn:
100+
return conn.execute(query).scalar()

sqlcompyre/api.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,14 @@
1212
# ---------------------------------------------------------------------------------------------
1313

1414

15-
def inspect(
16-
engine: sa.Engine, query: sa.Select | sa.FromClause | str
17-
) -> QueryInspection:
15+
def inspect(engine: sa.Engine, query: sa.Select | sa.FromClause) -> QueryInspection:
1816
"""Inspect the results of a query in the database.
1917
2018
Args:
2119
engine: The engine to use to access the database.
2220
query: The query whose results to inspect. This can either be a SQLAlchemy ``SELECT``
23-
statement, a ``FROM`` clause (which includes plain :class:`sqlalchemy.Table` objects),
24-
or a SQL query specified as string.
21+
statement or a ``FROM`` clause (which includes plain :class:`sqlalchemy.Table`
22+
objects).
2523
2624
Returns:
2725
A query inspection object that can be used to easily gain insights into the query
@@ -31,10 +29,8 @@ def inspect(
3129
:meth:`inspect_table` if you want to inspect the results of ``SELECT * FROM table`` and
3230
specify the table as a string.
3331
"""
34-
if isinstance(query, str):
35-
query = sa.select("*").select_from(sa.text(f"({query}) as anon_1"))
36-
elif isinstance(query, sa.FromClause):
37-
query = sa.select(query)
32+
if isinstance(query, sa.Select):
33+
return QueryInspection(engine, query.subquery())
3834
return QueryInspection(engine, query)
3935

4036

@@ -63,7 +59,7 @@ def inspect_table(engine: sa.Engine, table: sa.Table | str) -> QueryInspection:
6359
sa_table = meta.tables[table]
6460
else:
6561
sa_table = table
66-
return inspect(engine, sa.select(sa_table))
62+
return inspect(engine, sa_table)
6763

6864

6965
# ---------------------------------------------------------------------------------------------
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) QuantCo 2024-2025
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
import sqlalchemy as sa
5+
6+
import sqlcompyre as sc
7+
8+
9+
def test_column_stats_min(engine: sa.Engine, table_characters: sa.Table):
10+
inspection = sc.inspect_table(engine, table_characters)
11+
assert inspection.column_stats("age").min == 6
12+
assert inspection.column_stats("first_name").min == "Daisy"
13+
14+
15+
def test_column_stats_max(engine: sa.Engine, table_characters: sa.Table):
16+
inspection = sc.inspect_table(engine, table_characters)
17+
assert inspection.column_stats("age").max == 65
18+
assert inspection.column_stats("first_name").max == "Scrooge"

tests/analysis/query_inspection/test_row_counts.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,22 +28,6 @@ def test_row_count_query(engine: sa.Engine, table_characters: sa.Table):
2828
assert inspection.row_count == 6
2929

3030

31-
def test_row_count_raw_query(engine: sa.Engine, table_characters: sa.Table):
32-
inspection = sc.inspect(
33-
engine,
34-
f"""
35-
SELECT characters.first_name, characters.last_name, characters.age
36-
FROM {str(table_characters)}
37-
WHERE characters.last_name = 'Duck'
38-
""",
39-
)
40-
41-
assert inspection.row_count == 6
42-
assert inspection.distinct_row_count() == 5
43-
assert inspection.distinct_row_count("last_name") == 1
44-
assert inspection.distinct_row_count("last_name", "age") == 3
45-
46-
4731
@pytest.mark.parametrize(
4832
("columns", "expected"),
4933
[([], 7), (["last_name"], 3), (["last_name", "age"], 5)],

0 commit comments

Comments
 (0)