Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.10-slim

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*

COPY . .

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
pip install --no-cache-dir . && \
pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
pip install --no-cache-dir jupyter

EXPOSE 8888

CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]


178 changes: 97 additions & 81 deletions src/ydata_profiling/model/handler.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,97 @@
"""
Auxiliary handler methods for data summary extraction
"""
from typing import Any, Callable, Dict, List, Sequence

import networkx as nx
from visions import VisionsTypeset


def compose(functions: Sequence[Callable]) -> Callable:
"""
Compose a sequence of functions.

:param functions: sequence of functions
:return: combined function applying all functions in order.
"""

def composed_function(*args) -> List[Any]:
result = args # Start with the input arguments
for func in functions:
result = func(*result) if isinstance(result, tuple) else func(result)
return result # type: ignore

return composed_function # type: ignore


class Handler:
"""A generic handler

Allows any custom mapping between data types and functions
"""

def __init__(
self,
mapping: Dict[str, List[Callable]],
typeset: VisionsTypeset,
*args,
**kwargs
):
self.mapping = mapping
self.typeset = typeset
self._complete_dag()

def _complete_dag(self) -> None:
for from_type, to_type in nx.topological_sort(
nx.line_graph(self.typeset.base_graph)
):
self.mapping[str(to_type)] = (
self.mapping[str(from_type)] + self.mapping[str(to_type)]
)

def handle(self, dtype: str, *args, **kwargs) -> dict:
"""
Returns:
object: a tuple containing the config, the dataset series and the summary extracted
"""
funcs = self.mapping.get(dtype, [])
op = compose(funcs)
summary = op(*args)[-1]
return summary


def get_render_map() -> Dict[str, Callable]:
import ydata_profiling.report.structure.variables as render_algorithms

render_map = {
"Boolean": render_algorithms.render_boolean,
"Numeric": render_algorithms.render_real,
"Complex": render_algorithms.render_complex,
"Text": render_algorithms.render_text,
"DateTime": render_algorithms.render_date,
"Categorical": render_algorithms.render_categorical,
"URL": render_algorithms.render_url,
"Path": render_algorithms.render_path,
"File": render_algorithms.render_file,
"Image": render_algorithms.render_image,
"Unsupported": render_algorithms.render_generic,
"TimeSeries": render_algorithms.render_timeseries,
}

return render_map
"""
Auxiliary handler methods for data summary extraction
"""
from typing import Any, Callable, Dict, List, Sequence, Tuple, TypeVar, cast

import networkx as nx
from visions import VisionsTypeset

T = TypeVar("T")
SummaryFunction = Callable[..., Tuple[Any, ...]]


def compose(functions: Sequence[SummaryFunction]) -> SummaryFunction:
"""
Compose a sequence of functions.

:param functions: sequence of functions
:return: combined function applying all functions in order.
"""

def composed_function(*args: Any) -> Tuple[Any, ...]:
result: Tuple[Any, ...] = args
for func in functions:
step_result = func(*result)
if not isinstance(step_result, tuple):
result = (step_result,)
else:
result = step_result
return result

return composed_function


class Handler:
"""A generic handler

Allows any custom mapping between data types and functions
"""

def __init__(
self,
mapping: Dict[str, List[SummaryFunction]],
typeset: VisionsTypeset,
*args: Any,
**kwargs: Any,
) -> None:
self.mapping: Dict[str, List[SummaryFunction]] = mapping
self.typeset = typeset
self._complete_dag()

def _complete_dag(self) -> None:
for from_type, to_type in nx.topological_sort(
nx.line_graph(self.typeset.base_graph)
):
from_type_str = str(from_type)
to_type_str = str(to_type)

if from_type_str not in self.mapping:
continue

if to_type_str in self.mapping:
self.mapping[to_type_str] = (
self.mapping[from_type_str] + self.mapping[to_type_str]
)
else:
self.mapping[to_type_str] = self.mapping[from_type_str].copy()

def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
"""
Returns:
object: a tuple containing the config, the dataset series and the summary extracted
"""
funcs = self.mapping.get(dtype, [])
op = compose(funcs)
result = op(*args)
return cast(Dict[str, Any], result[-1])


def get_render_map() -> Dict[str, Callable]:
import ydata_profiling.report.structure.variables as render_algorithms

render_map = {
"Boolean": render_algorithms.render_boolean,
"Numeric": render_algorithms.render_real,
"Complex": render_algorithms.render_complex,
"Text": render_algorithms.render_text,
"DateTime": render_algorithms.render_date,
"Categorical": render_algorithms.render_categorical,
"URL": render_algorithms.render_url,
"Path": render_algorithms.render_path,
"File": render_algorithms.render_file,
"Image": render_algorithms.render_image,
"Unsupported": render_algorithms.render_generic,
"TimeSeries": render_algorithms.render_timeseries,
}

return render_map
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def get_character_counts_vc(vc: pd.Series) -> pd.Series:
if len(counts) > 0:
counts = counts.groupby(level=0, sort=False).sum()
counts = counts.sort_values(ascending=False)
# FIXME: correct in split, below should be zero: print(counts.loc[''])
counts = counts[counts.index.str.len() > 0]
return counts

Expand Down
1 change: 0 additions & 1 deletion src/ydata_profiling/model/spark/missing_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def __len__(self) -> Optional[int]:
def missing_bar(config: Settings, df: DataFrame) -> str:
import pyspark.sql.functions as F

# FIXME: move to univariate
data_nan_counts = (
df.agg(
*[F.count(F.when(F.isnull(c) | F.isnan(c), c)).alias(c) for c in df.columns]
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/report/presentation/core/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class Collapse(ItemRenderer):
def __init__(self, button: ToggleButton, item: Renderable, **kwargs):
def __init__(self, button: ToggleButton, item: Renderable, **kwargs: Any):
super().__init__("collapse", {"button": button, "item": item}, **kwargs)

def __repr__(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/report/presentation/core/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(
anchor_id: Optional[str] = None,
classes: Optional[str] = None,
oss: Optional[bool] = None,
**kwargs,
**kwargs: Any,
):
args = {"items": items, "nested": nested}
args.update(**kwargs)
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/report/presentation/core/dropdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(
anchor_id: str,
classes: list,
is_row: bool,
**kwargs
**kwargs: Any,
):
super().__init__(
"dropdown",
Expand Down
6 changes: 3 additions & 3 deletions src/ydata_profiling/report/presentation/core/renderable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from typing import Any, Callable, Dict, Optional


class Renderable(ABC):
Expand Down Expand Up @@ -34,9 +34,9 @@ def classes(self) -> str:
def render(self) -> Any:
pass

def __str__(self):
def __str__(self) -> str:
return self.__class__.__name__

@classmethod
def convert_to_class(cls, obj: "Renderable", flavour_func) -> None: # noqa: ANN001
def convert_to_class(cls, obj: "Renderable", flavour_func: Callable) -> None:
obj.__class__ = cls
4 changes: 2 additions & 2 deletions src/ydata_profiling/report/presentation/core/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Root(ItemRenderer):
"""

def __init__(
self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs
self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs: Any
):
super().__init__(
"report",
Expand All @@ -23,7 +23,7 @@ def __init__(
def __repr__(self) -> str:
return "Root"

def render(self, **kwargs) -> Any:
def render(self, **kwargs: Any) -> Any:
raise NotImplementedError()

@classmethod
Expand Down
6 changes: 3 additions & 3 deletions src/ydata_profiling/report/presentation/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def __init__(
top: Renderable,
bottom: Optional[Renderable] = None,
ignore: bool = False,
**kwargs,
**kwargs: Any,
):
super().__init__(
"variable", {"top": top, "bottom": bottom, "ignore": ignore}, **kwargs
)

def __str__(self):
def __str__(self) -> str:
top_text = str(self.content["top"]).replace("\n", "\n\t")
bottom_text = str(self.content["bottom"]).replace("\n", "\n\t")

Expand All @@ -25,7 +25,7 @@ def __str__(self):
text += f"- bottom: {bottom_text}"
return text

def __repr__(self):
def __repr__(self) -> str:
return "Variable"

def render(self) -> Any:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
HTMLVariableInfo,
)

html_mapping = {
from typing import cast
from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping

html_mapping = cast(_FlavourMapping, {
Container: HTMLContainer,
Variable: HTMLVariable,
VariableInfo: HTMLVariableInfo,
Expand All @@ -59,6 +62,6 @@
Collapse: HTMLCollapse,
CorrelationTable: HTMLCorrelationTable,
Scores: HTMLScores,
}
})

register_flavour("html", html_mapping)
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
WidgetVariableInfo,
)

widget_mapping = {
from typing import cast
from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping

widget_mapping = cast(_FlavourMapping, {
Container: WidgetContainer,
Variable: WidgetVariable,
VariableInfo: WidgetVariableInfo,
Expand All @@ -56,6 +59,6 @@
ToggleButton: WidgetToggleButton,
Collapse: WidgetCollapse,
CorrelationTable: WidgetCorrelationTable,
}
})

register_flavour("widget", widget_mapping)
20 changes: 13 additions & 7 deletions src/ydata_profiling/report/presentation/flavours/flavours.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
"""
Flavours registry information
"""
from typing import Callable, Dict, Type

from ydata_profiling.report.presentation.core import Root
from ydata_profiling.report.presentation.core.renderable import Renderable

_FLAVOUR_REGISTRY: dict = {}
_FlavourMapping = Dict[Type[Renderable], Type[Renderable]]
_FLAVOUR_REGISTRY: Dict[str, _FlavourMapping] = {}


def register_flavour(name: str, mapping: dict) -> None:
def register_flavour(name: str, mapping: _FlavourMapping) -> None:
_FLAVOUR_REGISTRY[name] = mapping


def get_flavour_mapping(name: str) -> dict:
def get_flavour_mapping(name: str) -> _FlavourMapping:
if name not in _FLAVOUR_REGISTRY:
raise ValueError(f"Flavour '{name}' is not registered.")
return _FLAVOUR_REGISTRY[name]


_FlavourFunc = Callable[[Renderable], Renderable]


def apply_renderable_mapping(
mapping: dict,
mapping: _FlavourMapping,
structure: Renderable,
flavour_func, # noqa: ANN001
flavour_func: _FlavourFunc,
) -> None:
mapping[type(structure)].convert_to_class(structure, flavour_func)

Expand All @@ -29,7 +35,7 @@ def HTMLReport(structure: Root) -> Root:
from ydata_profiling.report.presentation.flavours import flavour_html # noqa: F401

mapping = get_flavour_mapping("html")
apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport)
apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport) # type: ignore
return structure


Expand All @@ -39,5 +45,5 @@ def WidgetReport(structure: Root) -> Root:
)

mapping = get_flavour_mapping("widget")
apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport)
apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport) # type: ignore
return structure