Skip to content

Commit fc176a8

Browse files
authored
Merge pull request #27 from kdkavanagh/column-meta
Add column metadata panel with stats
2 parents b1ed84b + db9a8d7 commit fc176a8

15 files changed

Lines changed: 1532 additions & 276 deletions

src/dt_browser/browser.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
SelectFromTable,
2929
)
3030
from dt_browser.bookmarks import Bookmarks
31+
from dt_browser.column_metadata import ColumnMetadata
3132
from dt_browser.column_selector import ColumnSelector
3233
from dt_browser.custom_table import CustomTable, _color_name, polars_list_to_string
3334
from dt_browser.expression_box import ExpressionBox
@@ -248,9 +249,8 @@ def compute_active_search_idx_display(self):
248249
class RowDetail(Widget, can_focus=False, can_focus_children=False):
249250
DEFAULT_CSS = """
250251
RowDetail {
251-
width: auto;
252-
max_width: 50%;
253-
min_width: 30%;
252+
width: 100%;
253+
height: 1fr;
254254
padding: 0 1;
255255
border: tall $primary;
256256
}
@@ -288,14 +288,43 @@ def watch_row_df(self):
288288
assert self._schema is not None
289289
display_df = display_df.join(self._schema, on=["Field"]).select(["Field", "dtype", "Value"])
290290
self._dt.set_dt(display_df, display_df.with_row_index(name=INDEX_COL).select([INDEX_COL]))
291-
self.styles.width = self._dt.virtual_size.width + self.gutter.width + 1
292291
self._dt.refresh()
292+
if isinstance(self.parent, DetailPanel):
293+
self.parent.update_width()
293294
# self._dt.go_to_cell(coord)
294295

296+
@property
297+
def content_width(self) -> int:
298+
return self._dt.virtual_size.width + self.gutter.width + 1
299+
295300
def compose(self):
296301
yield self._dt
297302

298303

304+
class DetailPanel(Widget, can_focus=False):
305+
DEFAULT_CSS = """
306+
DetailPanel {
307+
max-width: 50%;
308+
min-width: 30%;
309+
layout: vertical;
310+
}
311+
"""
312+
313+
def __init__(self, row_detail: RowDetail, column_metadata: ColumnMetadata, *args, **kwargs):
314+
super().__init__(*args, **kwargs)
315+
self._row_detail = row_detail
316+
self._column_metadata = column_metadata
317+
318+
def update_width(self) -> None:
319+
row_detail_width = self._row_detail.content_width if not self._row_detail.row_df.is_empty() else 0
320+
meta_width = self._column_metadata.content_size.width + self._column_metadata.gutter.width
321+
self.styles.width = max(row_detail_width, meta_width)
322+
323+
def compose(self):
324+
yield self._row_detail
325+
yield self._column_metadata
326+
327+
299328
def from_file_path(path: pathlib.Path, has_header: bool = True) -> pl.DataFrame:
300329

301330
if path.suffix in [".arrow", ".feather"]:
@@ -343,6 +372,7 @@ class DtBrowser(Widget): # pylint: disable=too-many-public-methods,too-many-ins
343372
current_filter = reactive[str | None](None)
344373

345374
cur_row = reactive(0)
375+
cur_col = reactive(0)
346376
cur_total_rows = reactive(0)
347377
total_rows = reactive(0)
348378

@@ -401,6 +431,9 @@ def __init__(
401431
self._ts_col_selector.styles.width = 1
402432

403433
self._row_detail = RowDetail()
434+
self._column_metadata = ColumnMetadata()
435+
self._column_metadata.set_source_df(self._filtered_dt)
436+
self._detail_panel = DetailPanel(self._row_detail, self._column_metadata)
404437

405438
self._color_by_cache: LRUCache[tuple[str, ...], pl.Series] = LRUCache(5)
406439
self._last_message_ts = time.time()
@@ -629,10 +662,10 @@ async def action_show_save(self):
629662

630663
async def watch_show_row_detail(self):
631664
if not self.show_row_detail:
632-
if existing := self.query(RowDetail):
665+
if existing := self.query(DetailPanel):
633666
existing.remove()
634667
elif not self._display_dt.is_empty():
635-
await self.query_one("#main_hori", Horizontal).mount(self._row_detail)
668+
await self.query_one("#main_hori", Horizontal).mount(self._detail_panel)
636669

637670
async def action_show_bookmarks(self):
638671
await self.mount(self._bookmarks, before=self.query_one(TableFooter))
@@ -650,6 +683,8 @@ async def action_timestamp_selector(self):
650683
def _set_filtered_dt(self, filtered_dt: pl.DataFrame, filtered_meta: pl.DataFrame, **kwargs):
651684
self._filtered_dt = filtered_dt
652685
self._meta_dt = filtered_meta
686+
self._column_metadata.set_source_df(self._filtered_dt)
687+
self._column_metadata.invalidate_cache()
653688
self._set_active_dt(self._filtered_dt, **kwargs)
654689

655690
def _set_active_dt(self, active_dt: pl.DataFrame, new_row: int | None = None):
@@ -720,10 +755,20 @@ def enable_select_from_table(self, event: SelectFromTable):
720755
@on(CustomTable.CellHighlighted, selector="#main_table")
721756
async def handle_cell_highlight(self, event: CustomTable.CellHighlighted):
722757
self.cur_row = event.coordinate.row
758+
col = event.coordinate.column
759+
if col != self.cur_col:
760+
self.cur_col = col
723761

724762
def watch_cur_row(self):
725763
self._row_detail.row_df = self._display_dt[self.cur_row]
726764

765+
def watch_cur_col(self):
766+
if self._display_dt.is_empty() or self.cur_col >= len(self._display_dt.columns):
767+
return
768+
col_name = self._display_dt.columns[self.cur_col]
769+
dtype = self._display_dt.schema[col_name]
770+
self._column_metadata.column_info = (col_name, dtype)
771+
727772
@on(CustomTable.CellSelected, selector="#main_table")
728773
def handle_cell_select(self, event: CustomTable.CellSelected):
729774
if self._select_interest:
@@ -863,6 +908,9 @@ def on_mount(self):
863908
self.cur_total_rows = len(self._display_dt)
864909
self.total_rows = len(self._original_dt)
865910
self._row_detail.row_df = self._display_dt[0]
911+
if not self._display_dt.is_empty():
912+
col_name = self._display_dt.columns[0]
913+
self._column_metadata.column_info = (col_name, self._display_dt.schema[col_name])
866914
if self.removed_cols:
867915
err_str = ", ".join(f"{k}: {v}" for k, v in self.removed_cols.items())
868916
self.notify(

src/dt_browser/column_metadata.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import polars as pl
2+
from rich.table import Table as RichTable
3+
from textual import work
4+
from textual.reactive import reactive
5+
from textual.widget import Widget
6+
from textual.widgets import Static
7+
8+
9+
def _categorical_stats(series: pl.Series) -> list[tuple[str, str]]:
10+
n_unique = series.n_unique()
11+
stats: list[tuple[str, str]] = [("Unique values", str(n_unique))]
12+
val_col = series.name
13+
vc = series.value_counts().sort(["count", val_col], descending=[True, False]).head(10)
14+
for row in vc.iter_rows(named=True):
15+
stats.append((f" {row[val_col]}", str(row["count"])))
16+
return stats
17+
18+
19+
def _numeric_stats(series: pl.Series) -> list[tuple[str, str]]:
20+
s = series.drop_nulls()
21+
if s.is_empty():
22+
return [("", "No data")]
23+
stats = [
24+
("Min", str(s.min())),
25+
("Q1", str(s.quantile(0.25))),
26+
("Median", str(s.median())),
27+
("Q3", str(s.quantile(0.75))),
28+
("Max", str(s.max())),
29+
]
30+
if s.dtype.is_float():
31+
nan_count = s.is_nan().sum()
32+
if nan_count > 0:
33+
stats.append(("NaN", str(nan_count)))
34+
return stats
35+
36+
37+
def _temporal_stats(series: pl.Series) -> list[tuple[str, str]]:
38+
s = series.drop_nulls()
39+
if s.is_empty():
40+
return [("", "No data")]
41+
return [
42+
("Min", str(s.min())),
43+
("Max", str(s.max())),
44+
]
45+
46+
47+
def _boolean_stats(series: pl.Series) -> list[tuple[str, str]]:
48+
true_count = series.sum()
49+
null_count = series.null_count()
50+
false_count = len(series) - (true_count or 0) - null_count
51+
return [
52+
("True", str(true_count)),
53+
("False", str(false_count)),
54+
]
55+
56+
57+
def compute_column_stats(series: pl.Series) -> list[tuple[str, str]]:
58+
dtype = series.dtype
59+
if dtype == pl.Categorical:
60+
stats = _categorical_stats(series)
61+
elif dtype.is_numeric():
62+
stats = _numeric_stats(series)
63+
elif dtype.is_temporal():
64+
stats = _temporal_stats(series)
65+
elif dtype.is_(pl.Boolean):
66+
stats = _boolean_stats(series)
67+
else:
68+
return []
69+
null_count = series.null_count()
70+
if null_count > 0:
71+
stats.append(("Null", str(null_count)))
72+
return stats
73+
74+
75+
class ColumnMetadata(Widget, can_focus=False, can_focus_children=False):
76+
DEFAULT_CSS = """
77+
ColumnMetadata {
78+
width: 100%;
79+
height: auto;
80+
padding: 0 1;
81+
border: tall $primary;
82+
}
83+
"""
84+
column_info: reactive[tuple[str, pl.DataType] | None] = reactive(None)
85+
86+
def __init__(self, *args, **kwargs):
87+
super().__init__(*args, **kwargs)
88+
self.border_title = "Column Metadata"
89+
self._source_df: pl.DataFrame = pl.DataFrame()
90+
self._cache: dict[str, list[tuple[str, str]]] = {}
91+
self._static = Static("")
92+
93+
def set_source_df(self, df: pl.DataFrame) -> None:
94+
self._source_df = df
95+
96+
def invalidate_cache(self) -> None:
97+
self._cache.clear()
98+
99+
def _render_stats(self, col_name: str, stats: list[tuple[str, str]]) -> None:
100+
self.border_title = f"Column: {col_name}"
101+
if not stats:
102+
self._static.update("")
103+
return
104+
table = RichTable(show_header=False, box=None, padding=(0, 1), expand=True)
105+
table.add_column("Stat", no_wrap=True)
106+
table.add_column("Value", no_wrap=True, justify="right")
107+
for label, value in stats:
108+
table.add_row(label, value)
109+
self._static.update(table)
110+
if self.parent is not None and hasattr(self.parent, "update_width"):
111+
self.parent.update_width()
112+
113+
def watch_column_info(self) -> None:
114+
if self.column_info is None or self._source_df.is_empty():
115+
return
116+
col_name, _ = self.column_info
117+
if col_name not in self._source_df.columns:
118+
return
119+
if col_name in self._cache:
120+
self._render_stats(col_name, self._cache[col_name])
121+
else:
122+
self.border_title = f"Column: {col_name}"
123+
self._static.update("Computing...")
124+
self._compute_stats(col_name)
125+
126+
@work(exclusive=True)
127+
async def _compute_stats(self, col_name: str) -> None:
128+
series = self._source_df[col_name]
129+
stats = compute_column_stats(series)
130+
self._cache[col_name] = stats
131+
self._render_stats(col_name, stats)
132+
133+
def compose(self):
134+
yield self._static

0 commit comments

Comments
 (0)