diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..7bb15bf5d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY . . + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \ + pip install --no-cache-dir . && \ + pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \ + pip install --no-cache-dir jupyter + +EXPOSE 8888 + +CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"] + + diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py index 992c1840c..bcca12a1c 100644 --- a/src/ydata_profiling/model/handler.py +++ b/src/ydata_profiling/model/handler.py @@ -60,22 +60,4 @@ def handle(self, dtype: str, *args, **kwargs) -> dict: return summary -def get_render_map() -> Dict[str, Callable]: - import ydata_profiling.report.structure.variables as render_algorithms - render_map = { - "Boolean": render_algorithms.render_boolean, - "Numeric": render_algorithms.render_real, - "Complex": render_algorithms.render_complex, - "Text": render_algorithms.render_text, - "DateTime": render_algorithms.render_date, - "Categorical": render_algorithms.render_categorical, - "URL": render_algorithms.render_url, - "Path": render_algorithms.render_path, - "File": render_algorithms.render_file, - "Image": render_algorithms.render_image, - "Unsupported": render_algorithms.render_generic, - "TimeSeries": render_algorithms.render_timeseries, - } - - return render_map diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py index d733a7d36..54d839915 100644 --- a/src/ydata_profiling/model/summarizer.py +++ b/src/ydata_profiling/model/summarizer.py @@ -27,7 +27,7 @@ from ydata_profiling.model.pandas.describe_supported_pandas import ( pandas_describe_supported, ) -from ydata_profiling.model.summary_algorithms import ( # Check what is this method used for +from ydata_profiling.model.summary_algorithms import ( describe_file_1d, describe_image_1d, describe_path_1d, @@ -50,9 +50,8 @@ def summarize( return self.handle(str(dtype), config, series, {"type": str(dtype)}) -# Revisit this with the correct support for Spark as well. class ProfilingSummarizer(BaseSummarizer): - """A summarizer for Pandas DataFrames.""" + """A summarizer supporting both Pandas and Spark DataFrames.""" def __init__(self, typeset: VisionsTypeset, use_spark: bool = False): self.use_spark = use_spark and is_pyspark_installed() diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py index 482b410b2..0f027f23f 100644 --- a/src/ydata_profiling/report/structure/report.py +++ b/src/ydata_profiling/report/structure/report.py @@ -7,7 +7,7 @@ from ydata_profiling.config import Settings from ydata_profiling.model import BaseDescription from ydata_profiling.model.alerts import AlertType -from ydata_profiling.model.handler import get_render_map +from ydata_profiling.report.structure.variables import get_render_map from ydata_profiling.report.presentation.core import ( HTML, Collapse, diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py index 64f1d6d54..a8aa301b5 100644 --- a/src/ydata_profiling/report/structure/variables/__init__.py +++ b/src/ydata_profiling/report/structure/variables/__init__.py @@ -1,3 +1,5 @@ +from typing import Callable, Dict + from ydata_profiling.report.structure.variables.render_boolean import render_boolean from ydata_profiling.report.structure.variables.render_categorical import ( render_categorical, @@ -17,6 +19,26 @@ ) from ydata_profiling.report.structure.variables.render_url import render_url + +def get_render_map() -> Dict[str, Callable]: + render_map = { + "Boolean": render_boolean, + "Numeric": render_real, + "Complex": render_complex, + "Text": render_text, + "DateTime": render_date, + "Categorical": render_categorical, + "URL": render_url, + "Path": render_path, + "File": render_file, + "Image": render_image, + "Unsupported": render_generic, + "TimeSeries": render_timeseries, + } + + return render_map + + __all__ = [ "render_boolean", "render_categorical", @@ -32,4 +54,5 @@ "render_text", "render_timeseries", "render_url", + "get_render_map", ]