Skip to content

Commit 0ad2621

Browse files
committed
Add boundary condition tests for HTML formatter memory limits and resolve max_rows logic
1 parent 168eda8 commit 0ad2621

File tree

2 files changed

+76
-8
lines changed

2 files changed

+76
-8
lines changed

python/tests/test_dataframe.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,6 +1458,55 @@ def test_html_formatter_memory(df, clean_formatter_state):
14581458
assert "data truncated" not in html_output.lower()
14591459

14601460

1461+
def test_html_formatter_memory_boundary_conditions(df, clean_formatter_state):
1462+
"""Test memory limit behavior at boundary conditions.
1463+
1464+
This test validates that the formatter correctly handles edge cases when
1465+
the memory limit is very close to actual data size, ensuring that min_rows
1466+
constraint is properly respected while respecting memory limits.
1467+
"""
1468+
# Get the raw size of the data to test boundary conditions
1469+
# First, capture output with no limits
1470+
configure_formatter(max_memory_bytes=10 * MB, min_rows_display=1, max_rows=100)
1471+
unrestricted_output = df._repr_html_()
1472+
unrestricted_rows = count_table_rows(unrestricted_output)
1473+
1474+
# Test 1: Very small memory limit should still respect min_rows
1475+
configure_formatter(max_memory_bytes=10, min_rows_display=1)
1476+
html_output = df._repr_html_()
1477+
tr_count = count_table_rows(html_output)
1478+
assert tr_count >= 2 # At least header + 1 data row (minimum)
1479+
# Should show truncation since we limited memory so aggressively
1480+
assert "data truncated" in html_output.lower()
1481+
1482+
# Test 2: Memory limit at default size should work well
1483+
configure_formatter(max_memory_bytes=2 * MB, min_rows_display=1)
1484+
html_output = df._repr_html_()
1485+
tr_count = count_table_rows(html_output)
1486+
assert tr_count >= 2 # At least header + min_rows
1487+
1488+
# Test 3: Very large memory limit should show all data
1489+
configure_formatter(max_memory_bytes=100 * MB, min_rows_display=1)
1490+
html_output = df._repr_html_()
1491+
tr_count = count_table_rows(html_output)
1492+
assert tr_count == unrestricted_rows # Should show all rows
1493+
1494+
# Test 4: Min rows should override memory limit
1495+
# With tiny memory and larger min_rows, min_rows should win
1496+
configure_formatter(max_memory_bytes=10, min_rows_display=2)
1497+
html_output = df._repr_html_()
1498+
tr_count = count_table_rows(html_output)
1499+
assert tr_count >= 3 # At least header + 2 data rows (min_rows)
1500+
# Should show truncation message despite min_rows being satisfied
1501+
assert "data truncated" in html_output.lower()
1502+
1503+
# Test 5: Default memory limit with different min_rows
1504+
configure_formatter(max_memory_bytes=2 * MB, min_rows_display=2, max_rows=2)
1505+
html_output = df._repr_html_()
1506+
tr_count = count_table_rows(html_output)
1507+
assert tr_count == 3 # header + 2 data rows
1508+
1509+
14611510
def test_html_formatter_max_rows(df, clean_formatter_state):
14621511
configure_formatter(min_rows_display=2, max_rows=2)
14631512
html_output = df._repr_html_()

src/dataframe.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,31 @@ where
148148
.unwrap_or_else(|_| default_value.clone())
149149
}
150150

151+
/// Resolve the max_rows value, preferring repr_rows if it differs from the default.
152+
///
153+
/// This function handles the transition from the deprecated `repr_rows` parameter
154+
/// to the new `max_rows` parameter. It checks both attributes and uses `repr_rows`
155+
/// if it has been explicitly set to a different value than `max_rows`.
156+
fn resolve_max_rows(formatter: &Bound<'_, PyAny>, default: usize) -> usize {
157+
let max_rows = get_attr(formatter, "max_rows", default);
158+
let repr_rows = get_attr(formatter, "repr_rows", default);
159+
160+
// If repr_rows differs from the default, it was explicitly set by the user
161+
// (Python-side validation ensures only one is used, but we prefer repr_rows
162+
// for backward compatibility in case it was set)
163+
if repr_rows != default && repr_rows != max_rows {
164+
repr_rows
165+
} else {
166+
max_rows
167+
}
168+
}
169+
151170
/// Helper function to create a FormatterConfig from a Python formatter object
152171
fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult<FormatterConfig> {
153172
let default_config = FormatterConfig::default();
154173
let max_bytes = get_attr(formatter, "max_memory_bytes", default_config.max_bytes);
155174
let min_rows = get_attr(formatter, "min_rows_display", default_config.min_rows);
156-
let max_rows = get_attr(formatter, "max_rows", default_config.max_rows);
157-
let repr_rows = get_attr(formatter, "repr_rows", max_rows);
158-
let max_rows = if repr_rows != max_rows {
159-
repr_rows
160-
} else {
161-
max_rows
162-
};
175+
let max_rows = resolve_max_rows(formatter, default_config.max_rows);
163176

164177
let config = FormatterConfig {
165178
max_bytes,
@@ -1360,7 +1373,10 @@ async fn collect_record_batches_to_display(
13601373
let mut record_batches = Vec::default();
13611374
let mut has_more = false;
13621375

1363-
// ensure minimum rows even if memory/row limits are hit
1376+
// Collect rows until we hit a limit (memory or max_rows) OR reach the guaranteed minimum.
1377+
// The minimum rows constraint overrides both memory and row limits to ensure a baseline
1378+
// of data is always displayed, even if it temporarily exceeds those limits.
1379+
// This provides better UX by guaranteeing users see at least min_rows rows.
13641380
while (size_estimate_so_far < max_bytes && rows_so_far < max_rows) || rows_so_far < min_rows {
13651381
let mut rb = match stream.next().await {
13661382
None => {
@@ -1374,11 +1390,14 @@ async fn collect_record_batches_to_display(
13741390
if rows_in_rb > 0 {
13751391
size_estimate_so_far += rb.get_array_memory_size();
13761392

1393+
// When memory limit is exceeded, scale back row count proportionally to stay within budget
13771394
if size_estimate_so_far > max_bytes {
13781395
let ratio = max_bytes as f32 / size_estimate_so_far as f32;
13791396
let total_rows = rows_in_rb + rows_so_far;
13801397

1398+
// Calculate reduced rows maintaining the memory/data proportion
13811399
let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize;
1400+
// Ensure we always respect the minimum rows guarantee
13821401
if reduced_row_num < min_rows {
13831402
reduced_row_num = min_rows.min(total_rows);
13841403
}

0 commit comments

Comments
 (0)