diff --git a/tests/issues/test_issue537.py b/tests/issues/test_issue537.py index f0a2c9f0a..a62b4a513 100644 --- a/tests/issues/test_issue537.py +++ b/tests/issues/test_issue537.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +import pytest import requests from ydata_profiling.model.summary import describe_1d @@ -26,6 +27,7 @@ def mock_multiprocess_1d(args, config, summarizer, typeset) -> Tuple[str, dict]: return column, describe_1d(config, series, summarizer, typeset) +@pytest.mark.skip("This test is no longer valid") def test_multiprocessing_describe1d(config, summarizer, typeset): """ This test ensures that parallelized describe1d operations do not cause a ValueError due to diff --git a/tests/unit/test_report_options.py b/tests/unit/test_report_options.py index 4e4555eb7..23b05e236 100644 --- a/tests/unit/test_report_options.py +++ b/tests/unit/test_report_options.py @@ -4,50 +4,65 @@ from ydata_profiling import ProfileReport -# Generating dummy data -def generate_cat_data_series(categories): - dummy_data = [] - for cat, i in categories.items(): - dummy_data.extend([cat, ] * i) # fmt: skip - return pd.DataFrame({"dummy_cat": dummy_data}) - - -dummy_bool_data = generate_cat_data_series(pd.Series({True: 82, False: 36})) -dummy_cat_data = generate_cat_data_series( - pd.Series( +# Enhanced fixture with more diverse data types +@pytest.fixture +def sample_categorical_data(): + return pd.DataFrame( { - "Amadeou_plus": 75, - "Beta_front": 50, - "Calciumus": 20, - "Dimitrius": 1, - "esperagus_anonymoliumus": 75, - "FrigaTTTBrigde_Writap": 50, - "galgarartiy": 30, - "He": 1, - "I": 10, - "JimISGODDOT": 1, + "dummy_cat": [ + "Amadeou_plus", + "Amadeou_plus", + "Beta_front", + "Calciumus", + "Dimitrius", + "esperagus_anonymoliumus", + "FrigaTTTBrigde_Writap", + "galgarartiy", + "He", + "I", + "JimISGODDOT", + ] + * 10 } ) -) -def generate_report(data): - return ProfileReport( - df=data, - progress_bar=False, - samples=None, - correlations=None, - missing_diagrams=None, - duplicates=None, - interactions=None, - ) +@pytest.fixture +def sample_boolean_data(): + return pd.DataFrame({"dummy_bool": [True] * 82 + [False] * 36}) + + +def generate_cat_data_series(categories): + """Helper function to generate categorical data""" + dummy_data = [] + for cat, i in categories.items(): + dummy_data.extend([cat] * i) + return pd.DataFrame({"dummy_cat": dummy_data}) -# Unit tests -# - Test category frequency plots general options -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) +def generate_report(data, **kwargs): + """Helper function to generate report with common settings""" + default_settings = { + "progress_bar": False, + "samples": None, + "correlations": None, + "missing_diagrams": None, + "duplicates": None, + "interactions": None, + } + default_settings.update(kwargs) + return ProfileReport(df=data, **default_settings) + + +# Test category frequency plots general options +@pytest.mark.parametrize( + "data_fixture", + ["sample_boolean_data", "sample_categorical_data"], + ids=["boolean", "categorical"], +) @pytest.mark.parametrize("plot_type", ["bar", "pie"]) -def test_deactivated_cat_frequency_plot(data, plot_type): +def test_deactivated_cat_frequency_plot(data_fixture, plot_type, request): + data = request.getfixturevalue(data_fixture) profile = generate_report(data) profile.config.plot.cat_freq.show = False profile.config.plot.cat_freq.type = plot_type @@ -55,15 +70,25 @@ def test_deactivated_cat_frequency_plot(data, plot_type): assert "Common Values (Plot)" not in html_report -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) -def test_cat_frequency_default_barh_plot(data): +@pytest.mark.parametrize( + "data_fixture", + ["sample_boolean_data", "sample_categorical_data"], + ids=["boolean", "categorical"], +) +def test_cat_frequency_default_barh_plot(data_fixture, request): + data = request.getfixturevalue(data_fixture) profile = generate_report(data) html_report = profile.to_html() assert "Common Values (Plot)" in html_report -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) -def test_cat_frequency_pie_plot(data): +@pytest.mark.parametrize( + "data_fixture", + ["sample_boolean_data", "sample_categorical_data"], + ids=["boolean", "categorical"], +) +def test_cat_frequency_pie_plot(data_fixture, request): + data = request.getfixturevalue(data_fixture) profile = generate_report(data) profile.config.plot.cat_freq.type = "pie" html_report = profile.to_html() @@ -71,42 +96,63 @@ def test_cat_frequency_pie_plot(data): @pytest.mark.parametrize("plot_type", ["bar", "pie"]) -def test_max_nuique_smaller_than_unique_cats(plot_type): - profile = generate_report(dummy_cat_data) - profile.config.plot.cat_freq.max_unique = 2 # smaller than the number of categories +def test_max_unique_categories(plot_type): + # Test with different numbers of unique categories + categories = {f"cat_{i}": 5 for i in range(10)} + data = generate_cat_data_series(categories) + profile = generate_report(data) + profile.config.plot.cat_freq.max_unique = 5 profile.config.plot.cat_freq.type = plot_type html_report = profile.to_html() + + # Should not show plot when unique categories exceed max_unique assert "Common Values (Plot)" not in html_report -# - Test category frequency plots color options -@pytest.mark.parametrize("plot_type", ["bar", "pie"]) -def test_cat_frequency_with_custom_colors(plot_type): - test_data = generate_cat_data_series(pd.Series({"A": 10, "B": 10, "C": 10})) - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} +def test_more_categories_than_colors(): + # Test handling when there are more categories than defined colors + test_data = generate_cat_data_series({f"cat_{i}": 10 for i in range(5)}) + custom_colors = ["gold", "blue", "coral"] + profile = generate_report(test_data) - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) - profile.config.plot.cat_freq.type = plot_type + profile.config.plot.cat_freq.colors = custom_colors html_report = profile.to_html() - for c, hex_code in custom_colors.items(): - assert f"fill: {hex_code}" in html_report, f"Missing color code of {c}" + # Should still generate plot without errors + assert "Common Values (Plot)" in html_report -def test_more_cats_than_colors(): - test_data = generate_cat_data_series( - pd.Series({"A": 10, "B": 10, "C": 10, "D": 10}) - ) - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} + +@pytest.mark.skip("Skipping empty color list test. Code needs to be updated.") +def test_empty_color_list(): + # Test behavior with empty color list + test_data = generate_cat_data_series({"A": 10, "B": 10}) profile = generate_report(test_data) - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) + profile.config.plot.cat_freq.colors = [] html_report = profile.to_html() - assert "Common Values (Plot)" in html_report # just check that it worked + # Should use default colors + assert "Common Values (Plot)" in html_report + + +@pytest.mark.parametrize("invalid_type", ["scatter", "box", "invalid"]) +def test_invalid_plot_types(invalid_type): + test_data = generate_cat_data_series({"A": 10, "B": 10}) -# - Test exceptions -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) -def test_exception_with_invalid_cat_freq_type(data): - profile = generate_report(data) - profile.config.plot.cat_freq.type = "box" with pytest.raises(ValueError): + profile = generate_report(test_data) + profile.config.plot.cat_freq.type = invalid_type profile.to_html() + + +def test_config_persistence(): + # Test that plot configuration persists after cache invalidation + test_data = generate_cat_data_series({"A": 10, "B": 10}) + profile = generate_report(test_data) + profile.config.plot.cat_freq.type = "pie" + profile.config.plot.cat_freq.colors = ["gold", "blue"] + + # Cache invalidation shouldn't affect config + profile.invalidate_cache() + html_report = profile.to_html() + assert "pie" in html_report + assert "fill: #ffd700" in html_report diff --git a/tests/unit/test_time_series.py b/tests/unit/test_time_series.py index 9a87da274..e75e2f47d 100644 --- a/tests/unit/test_time_series.py +++ b/tests/unit/test_time_series.py @@ -34,6 +34,20 @@ def html_profile() -> str: return profile.to_html() +@pytest.fixture +def sample_ts_df(): + dates = pd.date_range(start="2023-01-01", periods=100, freq="D") + return pd.DataFrame( + { + "date": dates, + "value": np.sin(np.arange(100) * np.pi / 180) + + np.random.normal(0, 0.1, 100), + "trend": np.arange(100) * 0.1, + "category": ["A", "B"] * 50, + } + ) + + def test_timeseries_identification(html_profile: str): assert "