11# Copyright (c) QuantCo 2024-2025
22# SPDX-License-Identifier: BSD-3-Clause
33
4+ from __future__ import annotations
45
6+ from dataclasses import dataclass
57from functools import cached_property , lru_cache
8+ from typing import Any
69
710import sqlalchemy as sa
811
@@ -15,21 +18,21 @@ class QueryInspection:
1518 or :meth:`~sqlcompyre.api.inspect_table` functions instead.
1619 """
1720
18- def __init__ (self , engine : sa .Engine , selectable : sa .Select ):
21+ def __init__ (self , engine : sa .Engine , query : sa .FromClause ):
1922 """
2023 Args:
2124 engine: The engine to use for connecting to the database.
2225 query: The query whose results to inspect.
2326 """
2427 self .engine = engine
25- self .query = selectable
28+ self .query = query
2629
2730 @cached_property
2831 def row_count (self ) -> int :
2932 """Get the number of rows returned by the query."""
3033 with self .engine .connect () as conn :
3134 return conn .execute (
32- sa .select (sa .func .count ()).select_from (self .query . subquery () )
35+ sa .select (sa .func .count ()).select_from (self .query )
3336 ).scalar_one ()
3437
3538 @lru_cache
@@ -45,14 +48,53 @@ def distinct_row_count(self, *columns: str) -> int:
4548 """
4649
4750 if len (columns ) == 0 :
48- data_query = self .query .distinct ()
51+ data_query = sa . select ( self .query ) .distinct ()
4952
5053 else :
51- subquery = self .query .subquery ()
5254 data_query = (
53- sa .select (sa .text (", " .join (columns ))).distinct ().select_from (subquery )
55+ sa .select (sa .text (", " .join (columns )))
56+ .distinct ()
57+ .select_from (self .query )
5458 )
5559
5660 count_query = sa .select (sa .func .count ()).select_from (data_query .subquery ())
5761 with self .engine .connect () as conn :
5862 return conn .execute (count_query ).scalar_one ()
63+
64+ @lru_cache
65+ def column_stats (self , column : str ) -> ColumnStats :
66+ """Obtain statistics about a single column.
67+
68+ Args:
69+ column: The name of the column to obtain information about.
70+
71+ Returns:
72+ An object providing access to column statistics.
73+ """
74+ return ColumnStats (self .engine , self .query .c [column ])
75+
76+
77+ # ----------------------------------------- COLUMN STATS ---------------------------------------- #
78+
79+
80+ @dataclass
81+ class ColumnStats :
82+ """Obtain statistics about column values in a table."""
83+
84+ def __init__ (self , engine : sa .Engine , column : sa .ColumnElement ):
85+ self .engine = engine
86+ self .column = column
87+
88+ @cached_property
89+ def min (self ) -> Any | None :
90+ """The minimum value in the column."""
91+ query = sa .select (sa .func .min (self .column ))
92+ with self .engine .connect () as conn :
93+ return conn .execute (query ).scalar ()
94+
95+ @cached_property
96+ def max (self ) -> Any | None :
97+ """The maximum value in the column."""
98+ query = sa .select (sa .func .max (self .column ))
99+ with self .engine .connect () as conn :
100+ return conn .execute (query ).scalar ()
0 commit comments