Skip to content

Commit e2c94da

Browse files
3meijohnnygreco
andauthored
fix: include plugin column types in display_sample_record() (#365)
* fix: include plugin column types in display_sample_record() Replace hardcoded column type list with dynamic iteration over get_column_display_order(), which already includes plugin-registered types. Column types with dedicated display sections (SEED_DATASET, IMAGE, LLM_CODE, VALIDATION, LLM_JUDGE) are excluded from the "Generated Columns" table as before. Also display side_effect_columns for plugin column types, matching the existing behavior for CUSTOM columns. Fixes #345 Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor * fix: use is_plugin_column_type() for side-effect column display Add is_plugin_column_type() helper to column_types.py to avoid redundant plugin_manager.get_plugin_column_types() calls. Use it in display_sample_record() to show side_effect_columns for plugin column types, matching existing CUSTOM behavior. Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor * fix: handle string column_type in is_plugin_column_type() Column configs store column_type as a Literal string, not a DataDesignerColumnType enum. Accept both str and enum to avoid AttributeError when calling .value on a plain string. Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor * test: improve plugin column display tests - Move test imports to module level per project convention - Replace plumbing-only test with one that renders a fake plugin column (with side-effect columns) to HTML and asserts the values appear in the output - Add parametrized test for is_plugin_column_type() verifying all built-in types return False for both enum and string forms Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor * fix: preserve original display order for side-effect columns Render primary column before side-effect columns, matching the existing CUSTOM column behavior. Avoids introducing display ordering discrepancies. Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Made-with: Cursor --------- Signed-off-by: Yev Meyer <ymeyer@nvidia.com> Co-authored-by: Johnny Greco <jogreco@nvidia.com>
1 parent ddd6eb4 commit e2c94da

4 files changed

Lines changed: 78 additions & 11 deletions

File tree

packages/data-designer-config/src/data_designer/config/column_types.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ def get_column_display_order() -> list[DataDesignerColumnType]:
9898
return display_order
9999

100100

101+
def is_plugin_column_type(column_type: str | DataDesignerColumnType) -> bool:
102+
"""Check whether a column type was registered by a plugin."""
103+
type_value = column_type.value if isinstance(column_type, DataDesignerColumnType) else column_type
104+
return plugin_manager.get_column_generator_plugin_if_exists(type_value) is not None
105+
106+
101107
def get_column_emoji_from_type(column_type: DataDesignerColumnType) -> str:
102108
"""Get the emoji for a column type."""
103109
config_cls = get_column_config_cls_from_type(resolve_string_enum(column_type, DataDesignerColumnType))

packages/data-designer-config/src/data_designer/config/utils/visualization.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
import data_designer.lazy_heavy_imports as lazy
2828
from data_designer.config.base import ConfigBase
29-
from data_designer.config.column_types import DataDesignerColumnType
29+
from data_designer.config.column_types import DataDesignerColumnType, get_column_display_order, is_plugin_column_type
3030
from data_designer.config.models import ModelConfig, ModelProvider
3131
from data_designer.config.sampler_params import SamplerType
3232
from data_designer.config.utils.code_lang import code_lang_to_syntax_lexer
@@ -53,6 +53,14 @@
5353
console = Console()
5454
logger = logging.getLogger(__name__)
5555

56+
_DEDICATED_DISPLAY_COL_TYPES = {
57+
DataDesignerColumnType.SEED_DATASET,
58+
DataDesignerColumnType.IMAGE,
59+
DataDesignerColumnType.LLM_CODE,
60+
DataDesignerColumnType.VALIDATION,
61+
DataDesignerColumnType.LLM_JUDGE,
62+
}
63+
5664

5765
def _display_image_if_in_notebook(image_data: str, col_name: str) -> bool:
5866
"""Display image with caption in Jupyter notebook if available.
@@ -287,14 +295,10 @@ def display_sample_record(
287295
table.add_row(col_name, convert_to_row_element(record[col_name]))
288296
render_list.append(pad_console_element(table))
289297

290-
non_code_columns = (
291-
config_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)
292-
+ config_builder.get_columns_of_type(DataDesignerColumnType.EXPRESSION)
293-
+ config_builder.get_columns_of_type(DataDesignerColumnType.LLM_TEXT)
294-
+ config_builder.get_columns_of_type(DataDesignerColumnType.LLM_STRUCTURED)
295-
+ config_builder.get_columns_of_type(DataDesignerColumnType.EMBEDDING)
296-
+ config_builder.get_columns_of_type(DataDesignerColumnType.CUSTOM)
297-
)
298+
non_code_columns = []
299+
for col_type in get_column_display_order():
300+
if col_type not in _DEDICATED_DISPLAY_COL_TYPES:
301+
non_code_columns.extend(config_builder.get_columns_of_type(col_type))
298302
if len(non_code_columns) > 0:
299303
table = Table(title="Generated Columns", **table_kws)
300304
table.add_column("Name")
@@ -306,8 +310,7 @@ def display_sample_record(
306310
get_truncated_list_as_string(embd) for embd in record[col.name].get("embeddings")
307311
]
308312
table.add_row(col.name, convert_to_row_element(record[col.name]))
309-
# Also display side_effect_columns for custom generators
310-
if col.column_type == DataDesignerColumnType.CUSTOM:
313+
if col.column_type == DataDesignerColumnType.CUSTOM or is_plugin_column_type(col.column_type):
311314
for output_col in col.side_effect_columns:
312315
if output_col in record:
313316
table.add_row(output_col, convert_to_row_element(record[output_col]))

packages/data-designer-config/tests/config/test_columns.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
DataDesignerColumnType,
2424
get_column_config_from_kwargs,
2525
get_column_display_order,
26+
is_plugin_column_type,
2627
)
2728
from data_designer.config.errors import InvalidConfigError
2829
from data_designer.config.sampler_params import (
@@ -60,6 +61,12 @@ def test_data_designer_column_type_get_display_order():
6061
]
6162

6263

64+
@pytest.mark.parametrize("col_type", list(DataDesignerColumnType))
65+
def test_is_plugin_column_type_false_for_builtins(col_type: DataDesignerColumnType) -> None:
66+
assert is_plugin_column_type(col_type) is False
67+
assert is_plugin_column_type(col_type.value) is False
68+
69+
6370
def test_sampler_column_config():
6471
sampler_column_config = SamplerColumnConfig(
6572
name="test_sampler",

packages/data-designer-config/tests/config/utils/test_visualization.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55

66
from pathlib import Path
77
from typing import TYPE_CHECKING
8+
from unittest.mock import MagicMock, patch
89

910
import pytest
1011

1112
import data_designer.lazy_heavy_imports as lazy
13+
from data_designer.config.column_types import get_column_display_order
1214
from data_designer.config.config_builder import DataDesignerConfigBuilder
1315
from data_designer.config.utils.code_lang import CodeLang
1416
from data_designer.config.utils.errors import DatasetSampleDisplayError
@@ -268,6 +270,55 @@ def test_convert_to_row_element_renders_non_scalar_types() -> None:
268270
table.add_row(result)
269271

270272

273+
def test_display_sample_record_includes_plugin_column_types(
274+
validation_output: dict,
275+
config_builder_with_validation: DataDesignerConfigBuilder,
276+
tmp_path: Path,
277+
) -> None:
278+
"""Plugin columns and their side-effect columns should appear in the rendered output (fixes #345)."""
279+
from types import SimpleNamespace
280+
281+
fake_plugin_type = "fake-plugin-type"
282+
fake_col = SimpleNamespace(
283+
name="plugin_output",
284+
column_type=fake_plugin_type,
285+
drop=False,
286+
side_effect_columns=["plugin_side_effect"],
287+
)
288+
289+
record = lazy.pd.Series(
290+
{
291+
"code": "print('hello world')",
292+
"code_validation_result": validation_output,
293+
"plugin_output": "primary plugin value",
294+
"plugin_side_effect": "side effect value",
295+
}
296+
)
297+
298+
extended_order = get_column_display_order() + [fake_plugin_type]
299+
original_get_columns = config_builder_with_validation.get_columns_of_type
300+
mock_get_columns = MagicMock(
301+
side_effect=lambda ct: [fake_col] if ct == fake_plugin_type else original_get_columns(ct)
302+
)
303+
config_builder_with_validation.get_columns_of_type = mock_get_columns
304+
305+
save_path = tmp_path / "output.html"
306+
with (
307+
patch("data_designer.config.utils.visualization.get_column_display_order", return_value=extended_order),
308+
patch(
309+
"data_designer.config.utils.visualization.is_plugin_column_type",
310+
side_effect=lambda ct: ct == fake_plugin_type,
311+
),
312+
):
313+
display_sample_record(record, config_builder_with_validation, save_path=save_path)
314+
315+
content = save_path.read_text()
316+
assert "plugin_output" in content, "Plugin column name missing from rendered output"
317+
assert "primary plugin value" in content, "Plugin column value missing from rendered output"
318+
assert "plugin_side_effect" in content, "Plugin side-effect column name missing from rendered output"
319+
assert "side effect value" in content, "Plugin side-effect column value missing from rendered output"
320+
321+
271322
def test_mixin_out_of_bounds_raises_display_error(
272323
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder
273324
) -> None:

0 commit comments

Comments
 (0)