diff --git a/.github/ISSUE_TEMPLATE/bug_report_form.yaml b/.github/ISSUE_TEMPLATE/bug_report_form.yaml index 691fdc9c8..318d2820d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report_form.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report_form.yaml @@ -41,7 +41,7 @@ body: description: A minimal standalone code sample that reproduces the bug placeholder: | import pandas as pd - from ydata_profiling import ProfileReport + from data_profiling import ProfileReport df = pd.read_parquet(r"") report = ProfileReport(df, title="bug report") diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d137c3b3..83a660af6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: hooks: - id: nbqa-black - id: nbqa-isort - args: [ --profile=black, --project=ydata_profiling ] + args: [ --profile=black, --project=data_profiling ] - id: nbqa-pyupgrade args: [ --py36-plus ] - repo: https://github.com/asottile/pyupgrade @@ -29,12 +29,12 @@ repos: hooks: - id: isort files: '.*' - args: [ --profile=black, --project=ydata_profiling ] + args: [ --profile=black, --project=data_profiling ] - repo: https://github.com/mgedmin/check-manifest rev: "0.47" hooks: - id: check-manifest - args: [ "--ignore=src/ydata_profiling/version.py" ] + args: [ "--ignore=src/data_profiling/version.py" ] stages: [manual] - repo: https://github.com/PyCQA/flake8 rev: "4.0.1" @@ -60,8 +60,8 @@ repos: (?x)( ^tests/| ^docsrc/| - ^src/ydata_profiling/utils/common.py| - ^src/ydata_profiling/utils/imghdr_patch.py + ^src/data_profiling/utils/common.py| + ^src/data_profiling/utils/imghdr_patch.py ) - repo: https://github.com/asottile/blacken-docs rev: v1.12.1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3e0c29b3e..74b951a52 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,8 @@ -## How to contribute to YData-Profiling +## How to contribute to Data-Profiling -YData-profiling aims to ease exploratory data analysis for structured datasets, including time-series. +Data-profiling aims to ease exploratory data analysis for structured datasets, including time-series. Our focus is to provide users with useful and robust statistics for such datasets encountered in industry, academia and elsewhere. -YData-profiling is open-source and stimulates contributions from passionate community users. +Data-profiling is open-source and stimulates contributions from passionate community users. #### Themes to contribute @@ -17,23 +17,23 @@ In line with our aim, we identify the following themes: time series analysis, or even images (e.g. dimensions, EXIF). - _Related_: [#7][i7], [#129][i129], [#190][i190], [#204][i204] or [create one](https://github.com/ydataai/ydata-profiling/issues/new/choose). + _Related_: [#7][i7], [#129][i129], [#190][i190], [#204][i204] or [create one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). - **Stability, Performance and Restricted environment compatibility:** Data exploration takes place in all kinds of conditions, on the latest machine learning platforms with enormous dataset to managed environments in large corporations. - `ydata-profiling` helps analysts, researchers and engineers alike in these cases. + `fg-data-profiling` helps analysts, researchers and engineers alike in these cases. We do this by fixing bugs, improving performance on big datasets and adding environment compatibility. _Suggestions for contribution (Performance)_: - Perform concurrency analysis or profile execution times and leverage the gained insights for improved performance (e.g. multiprocessing, cython, numba) or test the performance of `ydata-profiling` with [big data sets](https://www.stats.govt.nz/large-datasets/csv-files-for-download/) and corresponding commonly used data formats (such as parquet). + Perform concurrency analysis or profile execution times and leverage the gained insights for improved performance (e.g. multiprocessing, cython, numba) or test the performance of `fg-data-profiling` with [big data sets](https://www.stats.govt.nz/large-datasets/csv-files-for-download/) and corresponding commonly used data formats (such as parquet). _Suggestions for contribution (Stability)_: - Either review the code and add tests or watch the [issues page](https://github.com/ydataai/ydata-profiling/issues) and [Stackoverflow tag](https://stackoverflow.com/questions/tagged/ydata-profiling) to find current issues. + Either review the code and add tests or watch the [issues page](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues) and [Stackoverflow tag](https://stackoverflow.com/questions/tagged/ydata-profiling) to find current issues. - _Related_: [#98][i98], [#122][i122] or [create one](https://github.com/ydataai/ydata-profiling/issues/new/choose). + _Related_: [#98][i98], [#122][i122] or [create one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). - **Interaction, presentation and user experience**: - As `ydata-profiling` eases exploratory data analysis, working with the package should reflect that. + As `fg-data-profiling` eases exploratory data analysis, working with the package should reflect that. Interaction and user experience plays a central role in working with the package. Working on interactive and static features is possible through the modular nature of the package: the user can configure which features to use. @@ -46,7 +46,7 @@ In line with our aim, we identify the following themes: Other forms of distribution than HTML (for example PDF or packaged as an GUI application via [PyQt](https://riverbankcomputing.com/software/pyqt/intro)) Users should be able to share reports (improve size of labels in graph, add explanations to correlation matrices and allow for styling/branding). - _Related_: [#161][i161], [#175][i175], [#191][i191] or [create one](https://github.com/ydataai/ydata-profiling/issues/new/choose). + _Related_: [#161][i161], [#175][i175], [#191][i191] or [create one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). - **Community**: The success of this package demonstrates the power of sharing and working together. @@ -54,22 +54,22 @@ In line with our aim, we identify the following themes: _Suggestions for contribution_: Share with us if this package is of value to you, let us know [in our community](https://discord.com/invite/mw7xjJ7b7s). - We are interested in how you use `ydata-profiling` in your work. + We are interested in how you use `fg-data-profiling` in your work. - _Related_: [#87][i87] or [create one](https://github.com/ydataai/ydata-profiling/issues/new/choose). + _Related_: [#87][i87] or [create one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). - **Machine learning:** - `ydata-profiling` is not a machine learning package, even though many of our users use EDA as a step prior to developing their models. + `fg-data-profiling` is not a machine learning package, even though many of our users use EDA as a step prior to developing their models. Our focus lies in the exploratory data analysis. Any functionality that enables machine learning applications by more effective data profiling, is welcome. - _Related_: [#124][i124], [#173][i173], [#198][i198] or [create one](https://github.com/ydataai/ydata-profiling/issues/new/choose). + _Related_: [#124][i124], [#173][i173], [#198][i198] or [create one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). #### **Did you find a bug?** -* **Ensure the bug was not already reported** by searching on Github under [Issues](https://github.com/ydataai/ydata-profiling/issues). +* **Ensure the bug was not already reported** by searching on Github under [Issues](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues). -* If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/ydataai/ydata-profiling/issues/new/choose). +* If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/new/choose). If possible, use the relevant bug report templates to create the issue. #### **Did you write a patch that fixes a bug?** @@ -84,19 +84,19 @@ Include the relevant issue number if applicable. We would like to thank everyone who has helped getting us to where we are now. -See the [Contributor Graph](https://github.com/ydataai/ydata-profiling/graphs/contributors) - -[i7]: https://github.com/ydataai/ydata-profiling/issues/7 -[i129]: https://github.com/ydataai/ydata-profiling/issues/129 -[i190]: https://github.com/ydataai/ydata-profiling/issues/190 -[i204]: https://github.com/ydataai/ydata-profiling/issues/204 -[i98]: https://github.com/ydataai/ydata-profiling/issues/98 -[i122]: https://github.com/ydataai/ydata-profiling/issues/122 -[i124]: https://github.com/ydataai/ydata-profiling/issues/24 -[i173]: https://github.com/ydataai/ydata-profiling/issues/173 -[i198]: https://github.com/ydataai/ydata-profiling/issues/198 -[i87]: https://github.com/ydataai/ydata-profiling/issues/87 -[i161]: https://github.com/ydataai/ydata-profiling/issues/161 -[i175]: https://github.com/ydataai/ydata-profiling/issues/175 -[i191]: https://github.com/ydataai/ydata-profiling/issues/191 +See the [Contributor Graph](https://github.com/Data-Centric-AI-Community/fg-data-profiling/graphs/contributors) + +[i7]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/7 +[i129]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/129 +[i190]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/190 +[i204]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/204 +[i98]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/98 +[i122]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/122 +[i124]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/24 +[i173]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/173 +[i198]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/198 +[i87]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/87 +[i161]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/161 +[i175]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/175 +[i191]: https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues/191 diff --git a/MANIFEST.in b/MANIFEST.in index 36a019687..0fdd346f3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,10 +6,10 @@ include LICENSE include *.md # Templates and static resources -recursive-include src/ydata_profiling/report/presentation/flavours/html/templates *.html *.js *.css +recursive-include src/data_profiling/report/presentation/flavours/html/templates *.html *.js *.css # Configuration -include src/ydata_profiling/*.yaml +include src/data_profiling/*.yaml # Spark Dev venv recursive-include venv *.yml diff --git a/Makefile b/Makefile index 22a69c758..67f494366 100644 --- a/Makefile +++ b/Makefile @@ -7,17 +7,17 @@ test: pytest tests/unit/ pytest tests/issues/ pytest --nbval tests/notebooks/ - ydata_profiling -h + data_profiling -h test_spark: pytest tests/backends/spark_backend/ - ydata_profiling -h + data_profiling -h test_cov: pytest --cov=. tests/unit/ pytest --cov=. --cov-append tests/issues/ pytest --cov=. --cov-append --nbval tests/notebooks/ - ydata_profiling -h + data_profiling -h examples: find ./examples -maxdepth 2 -type f -name "*.py" -execdir python {} \; diff --git a/README.md b/README.md index 3db9a617b..602bfcf88 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ -# ydata-profiling +# fg-data-profiling > **`ydata-profiling` is now `fg-data-profiling`.** This package has been renamed to `fg-data-profiling`. Please follow the [Migration Guide](#migration-guide) as soon as possible — the old package will no longer receive updates or bug fixes. [![Build Status](https://github.com/ydataai/pandas-profiling/actions/workflows/tests.yml/badge.svg?branch=master)](https://github.com/ydataai/pandas-profiling/actions/workflows/tests.yml) -[![PyPI download month](https://img.shields.io/pypi/dm/ydata-profiling.svg)](https://pypi.python.org/pypi/ydata-profiling/) -[![](https://pepy.tech/badge/pandas-profiling)](https://pypi.org/project/ydata-profiling/) +[![PyPI download month](https://img.shields.io/pypi/dm/fg-data-profiling.svg)](https://pypi.python.org/pypi/fg-data-profiling/) +[![](https://pepy.tech/badge/pandas-profiling)](https://pypi.org/project/fg-data-profiling/) [![Code Coverage](https://codecov.io/gh/ydataai/pandas-profiling/branch/master/graph/badge.svg?token=gMptB4YUnF)](https://codecov.io/gh/ydataai/pandas-profiling) [![Release Version](https://img.shields.io/github/release/ydataai/pandas-profiling.svg)](https://github.com/ydataai/pandas-profiling/releases) -[![Python Version](https://img.shields.io/pypi/pyversions/ydata-profiling)](https://pypi.org/project/ydata-profiling/) +[![Python Version](https://img.shields.io/pypi/pyversions/fg-data-profiling)](https://pypi.org/project/fg-data-profiling/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) -

YData Profiling Logo

+

Data Profiling Logo

Documentation | - Discord + Discord | - Stack Overflow + Stack Overflow | Latest changelog @@ -28,12 +28,12 @@ Do you like this project? Show us your love and give feedback!

-`ydata-profiling` primary goal is to provide a one-line Exploratory Data Analysis (EDA) experience in a consistent and fast solution. Like pandas `df.describe()` function, that is so handy, ydata-profiling delivers an extended analysis of a DataFrame while allowing the data analysis to be exported in different formats such as **html** and **json**. +`fg-data-profiling` primary goal is to provide a one-line Exploratory Data Analysis (EDA) experience in a consistent and fast solution. Like pandas `df.describe()` function, that is so handy, fg-data-profiling delivers an extended analysis of a DataFrame while allowing the data analysis to be exported in different formats such as **html** and **json**. The package outputs a simple and digested analysis of a dataset, including **time-series** and **text**. > **Looking for a scalable solution that can fully integrate with your database systems?**
-> Leverage YData Fabric Data Catalog to connect to different databases and storages (Oracle, snowflake, PostGreSQL, GCS, S3, etc.) and leverage an interactive and guided profiling experience in Fabric. Check out the [Community Version](http://ydata.ai/register?utm_source=ydata-profiling&utm_medium=documentation&utm_campaign=YData%20Fabric%20Community). +> Leverage YData Fabric Data Catalog to connect to different databases and storages (Oracle, snowflake, PostGreSQL, GCS, S3, etc.) and leverage an interactive and guided profiling experience in Fabric. Check out the [Community Version](http://ydata.ai/register?utm_source=data-profiling&utm_medium=documentation&utm_campaign=YData%20Fabric%20Community). ## Migration Guide @@ -73,11 +73,11 @@ grep -r "ydata_profiling" . --include="*.py" ### Install ```cmd -pip install ydata-profiling +pip install fg-data-profiling ``` or ```cmd -conda install -c conda-forge ydata-profiling +conda install -c conda-forge fg-data-profiling ``` ### Start profiling @@ -86,7 +86,7 @@ Start by loading your pandas `DataFrame` as you normally would, e.g. by using: ```python import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport df = pd.DataFrame(np.random.rand(100, 5), columns=["a", "b", "c", "d", "e"]) ``` @@ -124,16 +124,16 @@ The report contains three additional sections: ### ✨ Spark Spark support has been released, but we are always looking for an extra pair of hands 👐. -[Check current work in progress!](https://github.com/ydataai/ydata-profiling/projects/3). +[Check current work in progress!](https://github.com/Data-Centric-AI-Community/fg-data-profiling/projects/3). ## 📝 Use cases -YData-profiling can be used to deliver a variety of different use-case. The documentation includes guides, tips and tricks for tackling them: +fg-data-profiling can be used to deliver a variety of different use-case. The documentation includes guides, tips and tricks for tackling them: | Use case | Description | |----------|---------------------------------------------------------------------------------------------| | [Comparing datasets](https://docs.profiling.ydata.ai/latest/features/comparing_datasets) | Comparing multiple version of the same dataset | | [Profiling a Time-Series dataset](https://docs.profiling.ydata.ai/latest/features/time_series_datasets) | Generating a report for a time-series dataset with a single line of code | -|[Profiling large datasets](https://docs.profiling.ydata.ai/latest/features/big_data) | Tips on how to prepare data and configure `ydata-profiling` for working with large datasets | +|[Profiling large datasets](https://docs.profiling.ydata.ai/latest/features/big_data) | Tips on how to prepare data and configure `fg-data-profiling` for working with large datasets | | [Handling sensitive data](https://docs.profiling.ydata.ai/latest/features/sensitive_data) | Generating reports which are mindful about sensitive data in the input dataset | | [Dataset metadata and data dictionaries](https://docs.profiling.ydata.ai/latest/features/metadata) | Complementing the report with dataset details and column-specific data dictionaries | | [Customizing the report's appearance](https://docs.profiling.ydata.ai/latest/features/custom_reports) | Changing the appearance of the report's page and of the contained visualizations | @@ -142,7 +142,7 @@ YData-profiling can be used to deliver a variety of different use-case. The docu There are two interfaces to consume the report inside a Jupyter notebook: through widgets and through an embedded HTML report. -Notebook Widgets +Notebook Widgets The above is achieved by simply displaying the report as a set of widgets. In a Jupyter Notebook, run: @@ -178,10 +178,10 @@ profile.to_file("your_report.json") ### Using in the command line -For standard formatted CSV files (which can be read directly by pandas without additional settings), the `ydata_profiling` executable can be used in the command line. The example below generates a report named *Example Profiling Report*, using a configuration file called `default.yaml`, in the file `report.html` by processing a `data.csv` dataset. +For standard formatted CSV files (which can be read directly by pandas without additional settings), the `data_profiling` executable can be used in the command line. The example below generates a report named *Example Profiling Report*, using a configuration file called `default.yaml`, in the file `report.html` by processing a `data.csv` dataset. ```sh -ydata_profiling --title "Example Profiling Report" --config_file default.yaml data.csv report.html +data_profiling --title "Example Profiling Report" --config_file default.yaml data.csv report.html ``` Additional details on the CLI are available [on the documentation](https://ydata-profiling.ydata.ai/docs/master/pages/getting_started/quickstart.html#command-line-usage). @@ -202,21 +202,21 @@ The following example reports showcase the potentialities of the package across * [Website Inaccessibility](https://ydata-profiling.ydata.ai/examples/master/features/website_inaccessibility_report.html) (website accessibility analysis, showcasing support for URL data) * [Orange prices](https://ydata-profiling.ydata.ai/examples/master/features/united_report.html) and * [Coal prices](https://ydata-profiling.ydata.ai/examples/master/features/flatly_report.html) (simple pricing evolution datasets, showcasing the theming options) -* [USA Air Quality](https://github.com/ydataai/pandas-profiling/tree/master/examples/usaairquality) (Time-series air quality dataset EDA example) -* [HCC](https://github.com/ydataai/pandas-profiling/tree/master/examples/hcc) (Open dataset from healthcare, showcasing compare between two sets of data, before and after preprocessing) +* [USA Air Quality](https://github.com/Data-Centric-AI-Community/fg-data-profiling/tree/master/examples/usaairquality) (Time-series air quality dataset EDA example) +* [HCC](https://github.com/Data-Centric-AI-Community/fg-data-profiling/tree/master/examples/hcc) (Open dataset from healthcare, showcasing compare between two sets of data, before and after preprocessing) ## 🛠️ Installation Additional details, including information about widget support, are available [on the documentation](https://ydata-profiling.ydata.ai/docs/master/pages/getting_started/installation.html). ### Using pip -[![PyPi Downloads](https://pepy.tech/badge/ydata-profiling)](https://pepy.tech/project/ydata-profiling) -[![PyPi Monthly Downloads](https://pepy.tech/badge/pandas-profiling/month)](https://pepy.tech/project/ydata-profiling/month) -[![PyPi Version](https://badge.fury.io/py/ydata-profiling.svg)](https://pypi.org/project/ydata-profiling/) +[![PyPi Downloads](https://pepy.tech/badge/fg-data-profiling)](https://pepy.tech/project/fg-data-profiling) +[![PyPi Monthly Downloads](https://pepy.tech/badge/pandas-profiling/month)](https://pepy.tech/project/fg-data-profiling/month) +[![PyPi Version](https://badge.fury.io/py/fg-data-profiling.svg)](https://pypi.org/project/fg-data-profiling/) You can install using the `pip` package manager by running: ```sh -pip install -U ydata-profiling +pip install -U fg-data-profiling ``` #### Extras @@ -230,7 +230,7 @@ The package declares "extras", sets of additional dependencies. Install these with e.g. ```sh -pip install -U ydata-profiling[notebook,unicode,pyspark] +pip install -U fg-data-profiling[notebook,unicode,pyspark] ``` @@ -242,7 +242,7 @@ pip install -U ydata-profiling[notebook,unicode,pyspark] You can install using the `conda` package manager by running: ```sh -conda install -c conda-forge ydata-profiling +conda install -c conda-forge fg-data-profiling ``` ### From source (development) @@ -268,7 +268,7 @@ You need [Python 3](https://python3statement.github.io/) to run the package. Oth ## 🔗 Integrations -To maximize its usefulness in real world contexts, `ydata-profiling` has a set of implicit and explicit integrations with a variety of other actors in the Data Science ecosystem: +To maximize its usefulness in real world contexts, `fg-data-profiling` has a set of implicit and explicit integrations with a variety of other actors in the Data Science ecosystem: | Integration type | Description | |---|---| @@ -276,15 +276,15 @@ To maximize its usefulness in real world contexts, `ydata-profiling` has a set o | [Great Expectations](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/great_expectations.html) | Generating [Great Expectations](https://greatexpectations.io) expectations suites directly from a profiling report | | [Interactive applications](https://docs.profiling.ydata.ai/latest/integrations/interactive_applications) | Embedding profiling reports in [Streamlit](http://streamlit.io), [Dash](http://dash.plotly.com) or [Panel](https://panel.holoviz.org) applications | | [Pipelines](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/pipelines.html) | Integration with DAG workflow execution tools like [Airflow](https://airflow.apache.org) or [Kedro](https://kedro.org) | -| [Cloud services](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/cloud_services.html) | Using `ydata-profiling` in hosted computation services like [Lambda](https://lambdalabs.com), [Google Cloud](https://github.com/GoogleCloudPlatform/analytics-componentized-patterns/blob/master/retail/propensity-model/bqml/bqml_kfp_retail_propensity_to_purchase.ipynb) or [Kaggle](https://www.kaggle.com/code) | -| [IDEs](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/ides.html) | Using `ydata-profiling` directly from integrated development environments such as [PyCharm](https://www.jetbrains.com/pycharm/) | +| [Cloud services](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/cloud_services.html) | Using `fg-data-profiling` in hosted computation services like [Lambda](https://lambdalabs.com), [Google Cloud](https://github.com/GoogleCloudPlatform/analytics-componentized-patterns/blob/master/retail/propensity-model/bqml/bqml_kfp_retail_propensity_to_purchase.ipynb) or [Kaggle](https://www.kaggle.com/code) | +| [IDEs](https://ydata-profiling.ydata.ai/docs/master/pages/integrations/ides.html) | Using `fg-data-profiling` directly from integrated development environments such as [PyCharm](https://www.jetbrains.com/pycharm/) | ## 🙋 Support Need help? Want to share a perspective? Report a bug? Ideas for collaborations? Reach out via the following channels: -- [Stack Overflow](https://stackoverflow.com/questions/tagged/pandas-profiling+or+ydata-profiling): ideal for asking questions on how to use the package -- [GitHub Issues](https://github.com/ydataai/ydata-profiling/issues): bugs, proposals for changes, feature requests -- [Discord](https://tiny.ydata.ai/dcai-ydata-profiling): ideal for projects discussions, ask questions, collaborations, general chat +- [Stack Overflow](https://stackoverflow.com/questions/tagged/pandas-profiling+or+data-profiling): ideal for asking questions on how to use the package +- [GitHub Issues](https://github.com/Data-Centric-AI-Community/fg-data-profiling/issues): bugs, proposals for changes, feature requests +- [Discord](https://tiny.ydata.ai/dcai-data-profiling): ideal for projects discussions, ask questions, collaborations, general chat > **Need Help?**
> Get your questions answered with a product owner by [booking a Pawsome chat](https://meetings.hubspot.com/fabiana-clemente)! 🐼 @@ -294,13 +294,13 @@ Need help? Want to share a perspective? Report a bug? Ideas for collaborations? ## 🤝🏽 Contributing Learn how to get involved in the [Contribution Guide](https://ydata-profiling.ydata.ai/docs/master/pages/support_contrib/contribution_guidelines.html). -A low-threshold place to ask questions or start contributing is the [Data Centric AI Community's Discord](https://tiny.ydata.ai/dcai-ydata-profiling). +A low-threshold place to ask questions or start contributing is the [Data Centric AI Community's Discord](https://tiny.ydata.ai/dcai-data-profiling). A big thank you to all our amazing contributors! - - + + Contributors wall made with [contrib.rocks](https://contrib.rocks). diff --git a/docs/advanced_settings/analytics.md b/docs/advanced_settings/analytics.md index e21913b1d..f0c154c06 100644 --- a/docs/advanced_settings/analytics.md +++ b/docs/advanced_settings/analytics.md @@ -2,14 +2,14 @@ ## Overview -`ydata-profiling` is a powerful library designed to generate profile reports from pandas and Spark Dataframe objects. -As part of our ongoing efforts to improve user experience and functionality, `ydata-profiling` +`data-profiling` is a powerful library designed to generate profile reports from pandas and Spark Dataframe objects. +As part of our ongoing efforts to improve user experience and functionality, `data-profiling` includes a telemetry feature. This feature collects anonymous usage data, helping us understand how the library is used and identify areas for improvement. The primary goal of collecting telemetry data is to: -- Enhance the functionality and performance of the ydata-profiling library +- Enhance the functionality and performance of the data-profiling library - Prioritize new features based on user engagement - Identify common issues and bugs to improve overall user experience @@ -18,15 +18,15 @@ The primary goal of collecting telemetry data is to: The telemetry system collects non-personal, anonymous information such as: - Python version -- `ydata-profiling` version -- Frequency of use of `ydata-profiling` features +- `data-profiling` version +- Frequency of use of `data-profiling` features - Errors or exceptions thrown within the library ## Disabling usage analytics We respect your choice to not participate in our telemetry collection. If you prefer to disable telemetry, you can do so by setting an environment variable on your system. Disabling telemetry will not affect the functionality -of the ydata-profiling library, except for the ability to contribute to its usage analytics. +of the data-profiling library, except for the ability to contribute to its usage analytics. ### Set an Environment Variable diff --git a/docs/advanced_settings/available_settings.md b/docs/advanced_settings/available_settings.md index ff9917958..b82cfe3df 100644 --- a/docs/advanced_settings/available_settings.md +++ b/docs/advanced_settings/available_settings.md @@ -1,6 +1,6 @@ # Available Settings -A set of options is available in order to customize the behaviour of ``ydata-profiling`` and the appearance of the generated report. The depth of customization allows the creation of behaviours highly targeted at the specific dataset being analysed. The available settings are listed below. To learn how to change them, check :doc:`changing_settings`. +A set of options is available in order to customize the behaviour of ``data-profiling`` and the appearance of the generated report. The depth of customization allows the creation of behaviours highly targeted at the specific dataset being analysed. The available settings are listed below. To learn how to change them, check :doc:`changing_settings`. ## General settings @@ -45,8 +45,8 @@ Configure the schema type for a given dataset. import json import pandas as pd - from ydata_profiling import ProfileReport - from ydata_profiling.utils.cache import cache_file + from data_profiling import ProfileReport + from data_profiling.utils.cache import cache_file file_name = cache_file( "titanic.csv", diff --git a/docs/advanced_settings/changing_settings.md b/docs/advanced_settings/changing_settings.md index 30be08564..7d840f1fd 100644 --- a/docs/advanced_settings/changing_settings.md +++ b/docs/advanced_settings/changing_settings.md @@ -44,21 +44,21 @@ r = ProfileReport( ## Through a custom configuration file -To control `ydata-profiling` through a custom file, you can start with +To control `data-profiling` through a custom file, you can start with one of the sample configuration files below: - [default configuration - file](https://github.com/ydataai/ydata-profiling/blob/master/src/ydata_profiling/config_default.yaml) + file](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/src/data_profiling/config_default.yaml) (default) - [minimal configuration - file](https://github.com/ydataai/ydata-profiling/blob/master/src/ydata_profiling/config_minimal.yaml) + file](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/src/data_profiling/config_minimal.yaml) (minimal computation, optimized for performance) Change the configuration to your liking and point towards that configuration file when computing the report: ``` python linenums="1" title="Custom configuration file" -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport profile = ProfileReport(df, config_file="your_config.yml") profile.to_file("report.html") @@ -70,7 +70,7 @@ Any configuration setting can also be read from environment variables. For example: ```python linenums="1" title="Setting title for the report with parameters" -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport profile = ProfileReport(df, title="My Custom Profiling Report") ``` @@ -79,7 +79,7 @@ is equivalent to setting the title as an environment variable ```python linenums="1" title="Set title through environment variables" import os -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport os.environ("PROFILE_TITLE")='My Custom Profiling Report' diff --git a/docs/advanced_settings/collaborative_data_profiling.md b/docs/advanced_settings/collaborative_data_profiling.md index 5c09e789e..88a75f5e7 100644 --- a/docs/advanced_settings/collaborative_data_profiling.md +++ b/docs/advanced_settings/collaborative_data_profiling.md @@ -4,7 +4,7 @@ [YData Fabric](https://ydata.ai/products/fabric) is a Data-Centric AI development platform. YData Fabric provides all capabilities of -ydata-profiling in a hosted environment combined with a guided UI +data-profiling in a hosted environment combined with a guided UI experience. [Fabric\'s Data Catalog](https://ydata.ai/products/data_catalog) diff --git a/docs/advanced_settings/tables/config_general.csv b/docs/advanced_settings/tables/config_general.csv index ea2d4566f..4758814f4 100644 --- a/docs/advanced_settings/tables/config_general.csv +++ b/docs/advanced_settings/tables/config_general.csv @@ -1,4 +1,4 @@ Parameter,Type,Default,Description ``title``,string,"YData Profiling Report","Title for the report, shown in the header and title bar." ``pool_size``,integer,0,"Number of workers in thread pool. When set to zero, it is set to the number of CPUs available." -``progress_bar``,boolean,``True``,"If ``True``, ``ydata-profiling`` will display a progress bar." +``progress_bar``,boolean,``True``,"If ``True``, ``data-profiling`` will display a progress bar." diff --git a/docs/features/big_data.md b/docs/features/big_data.md index a95c2c083..3a7d41c5a 100644 --- a/docs/features/big_data.md +++ b/docs/features/big_data.md @@ -2,7 +2,7 @@ -By default, `ydata-profiling` comprehensively summarizes the input +By default, `data-profiling` comprehensively summarizes the input dataset in a way that gives the most insights for data analysis. For small datasets, these computations can be performed in *quasi* real-time. For larger datasets, deciding upfront which calculations to @@ -10,7 +10,7 @@ make might be required. Whether a computation scales to a large datasets not only depends on the exact size of the dataset, but also on its complexity and on whether fast computations are available. If the computation time of the profiling becomes a bottleneck, -`ydata-profiling` offers several alternatives to overcome it. +`data-profiling` offers several alternatives to overcome it. !!! info "Scale in a fully managed system" @@ -27,9 +27,9 @@ computation time of the profiling becomes a bottleneck, This mode was introduced in version v4.0.0 -`ydata-profiling` now supports Spark Dataframes profiling. You can find +`data-profiling` now supports Spark Dataframes profiling. You can find an example of the integration -[here](https://github.com/ydataai/ydata-profiling/blob/master/examples/features/spark_example.py). +[here](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/examples/features/spark_example.py). **Features supported:** - Univariate variables' analysis - Head and Tail dataset sample - Correlation matrices: Pearson and Spearman @@ -38,7 +38,7 @@ dataset sample - Correlation matrices: Pearson and Spearman histogram computation Keep an eye on the -[GitHub](https://github.com/ydataai/ydata-profiling/issues) page to +[GitHub](https://github.com/Data-Centric-AI-Community/data-profiling/issues) page to follow the updates on the implementation of [Pyspark Dataframes support](https://github.com/orgs/ydataai/projects/16/views/2). @@ -48,7 +48,7 @@ support](https://github.com/orgs/ydataai/projects/16/views/2). This mode was introduced in version v2.4.0 -`ydata-profiling` includes a minimal configuration file where the most +`data-profiling` includes a minimal configuration file where the most expensive computations are turned off by default. This is the recommended starting point for larger datasets. @@ -58,7 +58,7 @@ profile.to_file("output.html") ``` This configuration file can be found here: -[config_minimal.yaml](https://github.com/ydataai/ydata-profiling/blob/master/src/ydata_profiling/config_minimal.yaml). +[config_minimal.yaml](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/src/data_profiling/config_minimal.yaml). More details on settings and configuration are available in `../advanced_usage/available_settings`{.interpreted-text role="doc"}. @@ -103,7 +103,7 @@ that only the interactions with these variables in specific are computed. ``` python linenums="1" title="Disable expensive computations" -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport import pandas as pd # Reading the data @@ -127,14 +127,14 @@ role="doc"}. # Concurrency -`ydata-profiling` is a project under active development. One of the +`data-profiling` is a project under active development. One of the highly desired features is the addition of a scalable backend such as [Modin](https://github.com/modin-project/modin) or [Dask](https://dask.org/). Keep an eye on the -[GitHub](https://github.com/ydataai/ydata-profiling/issues) page to +[GitHub](https://github.com/Data-Centric-AI-Community/data-profiling/issues) page to follow the updates on the implementation of a concurrent and highly scalable backend. Specifically, development of a Spark backend is [currently -underway](https://github.com/ydataai/ydata-profiling/projects/3). +underway](https://github.com/Data-Centric-AI-Community/data-profiling/projects/3). diff --git a/docs/features/collaborative_data_profiling.md b/docs/features/collaborative_data_profiling.md index 90de5d61c..68f4527d4 100644 --- a/docs/features/collaborative_data_profiling.md +++ b/docs/features/collaborative_data_profiling.md @@ -9,11 +9,11 @@ A collaborative experience to profile datasets & relational databases [YData Fabric](https://ydata.ai/products/fabric) is a Data-Centric AI development platform. YData Fabric provides all capabilities of -ydata-profiling in a hosted environment combined with a guided UI +data-profiling in a hosted environment combined with a guided UI experience. [Fabric's Data Catalog](https://ydata.ai/products/data_catalog), -a scalable and interactive version of ydata-profiling, +a scalable and interactive version of data-profiling, provides a comprehensive and powerful tool designed to enable data professionals, including data scientists and data engineers, to manage and understand data within an organization. The Data Catalog act as a diff --git a/docs/features/comparing_datasets.md b/docs/features/comparing_datasets.md index d55100d28..8209d4cf2 100644 --- a/docs/features/comparing_datasets.md +++ b/docs/features/comparing_datasets.md @@ -4,10 +4,10 @@ !!! note "Dataframes compare support" Profiling compare is supported from - ydata-profiling version 3.5.0 onwards. + data-profiling version 3.5.0 onwards. Profiling compare is not *(yet!)* available for Spark Dataframes -`ydata-profiling` can be used to compare multiple version of the same +`data-profiling` can be used to compare multiple version of the same dataset. This is useful when comparing data from multiple time periods, such as two years. Another common scenario is to view the dataset profile for training, validation and test sets in machine learning. @@ -15,7 +15,7 @@ profile for training, validation and test sets in machine learning. The following syntax can be used to compare two datasets: ``` python linenums="1" title="Comparing 2 datasets" -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport train_df = pd.read_csv("train.csv") train_report = ProfileReport(train_df, title="Train") @@ -37,7 +37,7 @@ In order to compare more than two reports, the following syntax can be used: ``` python linenums="1" title="Comparing more than 2 datasets" -from ydata_profiling import ProfileReport, compare +from data_profiling import ProfileReport, compare comparison_report = compare([train_report, validation_report, test_report]) diff --git a/docs/features/custom_reports.md b/docs/features/custom_reports.md index 362bb1a26..9014c4a6b 100644 --- a/docs/features/custom_reports.md +++ b/docs/features/custom_reports.md @@ -2,7 +2,7 @@ In some situations, a user might want to customize the appearance of the report to match personal preferences or a corporate brand. -``ydata-profiling`` offers two major customization dimensions: +``data-profiling`` offers two major customization dimensions: the **styling of the HTML report** and the **styling of the visualizations and plots** contained within. @@ -64,7 +64,7 @@ values overview can also be customized via the ``plot`` argument. To customize the palette used by the correlation matrix, use the ``correlation`` key: ``` python linenums="1" title="Changing visualizations color palettes" - from ydata_profiling import ProfileReport + from data_profiling import ProfileReport profile = ProfileReport( df, @@ -77,7 +77,7 @@ Similarly, the palette for *Missing values* can be changed using ``missing`` arg ``` python linenums="1" python - from ydata_profiling import ProfileReport + from data_profiling import ProfileReport profile = ProfileReport( df, @@ -87,7 +87,7 @@ Similarly, the palette for *Missing values* can be changed using ``missing`` arg ) ``` -``ydata-profiling`` accepts all ``cmap`` values (colormaps) accepted by ``matplotlib``. +``data-profiling`` accepts all ``cmap`` values (colormaps) accepted by ``matplotlib``. The list of available colour maps can [be accessed here](https://matplotlib.org/stable/tutorials/colors/colormaps.html>). Alternatively, it is possible to create [custom palettes](https://matplotlib.org/stable/gallery/color/custom_cmap.html>). diff --git a/docs/features/metadata.md b/docs/features/metadata.md index d830d0d2d..66d849f2e 100644 --- a/docs/features/metadata.md +++ b/docs/features/metadata.md @@ -4,7 +4,7 @@ When sharing reports with coworkers or publishing online, it might be important to include metadata of the dataset, such as author, copyright -holder or descriptions. `ydata-profiling` allows complementing a report +holder or descriptions. `data-profiling` allows complementing a report with that information. Inspired by [schema.org\'s Dataset](https://schema.org/Dataset), the currently supported properties are *description*, *creator*, *author*, *url*, *copyright_year* and @@ -33,7 +33,7 @@ report.to_file(Path("stata_auto_report.html")) In addition to providing dataset details, often users want to include column-specific descriptions when sharing reports with team members and -stakeholders. `ydata-profiling` supports creating these descriptions, so +stakeholders. `data-profiling` supports creating these descriptions, so that the report includes a built-in data dictionary. By default, the descriptions are presented in the *Overview* section of the report, next to each variable. @@ -64,7 +64,7 @@ Alternatively, column descriptions can be loaded from a JSON file: ``` python linenums="1" title="Generate a report with descriptions per variable from a JSON definitions file" import json import pandas as pd -import ydata_profiling +import data_profiling definition_file = dataset_column_definition.json @@ -87,8 +87,8 @@ report.to_file("report.html") In addition to providing dataset details, users often want to include set type schemas. This is particularly important when integrating -`ydata-profiling` generation with the information already in a data -catalog. When using `ydata-profiling` ProfileReport, users can set the +`data-profiling` generation with the information already in a data +catalog. When using `data-profiling` ProfileReport, users can set the type_schema property to control the generated profiling data types. By default, the `type_schema` is automatically inferred with [visions](https://github.com/dylan-profiler/visions). @@ -96,8 +96,8 @@ default, the `type_schema` is automatically inferred with [visions](https://gith import json import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file file_name = cache_file( "titanic.csv", diff --git a/docs/features/pii_identification_management.md b/docs/features/pii_identification_management.md index b3cb55ed4..0c3f04f55 100644 --- a/docs/features/pii_identification_management.md +++ b/docs/features/pii_identification_management.md @@ -12,7 +12,7 @@ This includes but is not limited to, names, addresses, phone numbers, social sec and financial information. PII is crucial in today's digital age, where data is extensively collected, stored, and processed. -[YData Fabric Data Catalog](https://ydata.ai/products/data_catalog), a scalable and interactive version of ydata-profiling, +[YData Fabric Data Catalog](https://ydata.ai/products/data_catalog), a scalable and interactive version of data-profiling, integrates into the data profiling experience, an advanced machine learning solutions based on a Named Entity Recognition (NER) model combine with traditional rule-based patterns identification, allowing to efficiently detect PII. diff --git a/docs/features/sensitive_data.md b/docs/features/sensitive_data.md index 9ad80c7a6..41e970a08 100644 --- a/docs/features/sensitive_data.md +++ b/docs/features/sensitive_data.md @@ -10,7 +10,7 @@ report and no individual records are shown: report = df.profile_report(sensitive=True) ``` -Additionally, `ydata-profiling` does not send data to external +Additionally, `data-profiling` does not send data to external services, making it suitable for private data. ## Sample and duplicates diff --git a/docs/features/time_series_datasets.md b/docs/features/time_series_datasets.md index b06dc0cf6..7198ae339 100644 --- a/docs/features/time_series_datasets.md +++ b/docs/features/time_series_datasets.md @@ -1,6 +1,6 @@ # Time-Series data -`ydata-profiling` can be used for a quick Exploratory Data Analysis on +`data-profiling` can be used for a quick Exploratory Data Analysis on time-series data. This is useful for a quick understanding on the behaviour of time dependent variables regarding behaviours such as time plots, seasonality, trends, stationary and data gaps. @@ -17,7 +17,7 @@ values or by entries missing in the time index. check out [blog content here](https://ydata.ai/resources/how-to-do-an-eda-for-time-series). You can find the a [otebook with the - [full code in our examples folder](https://github.com/ydataai/ydata-profiling/tree/develop/examples/usaairquality). + [full code in our examples folder](https://github.com/Data-Centric-AI-Community/data-profiling/tree/develop/examples/usaairquality).
![Time-series profiling](../_static/img/time-series_profiling.gif){width="800"} @@ -32,8 +32,8 @@ assumption that the dataset includes time dependent features: ``` python linenums="1" title="Setting the configurations for time-series profiling" import pandas as pd -from ydata_profiling.utils.cache import cache_file -from ydata_profiling import ProfileReport +from data_profiling.utils.cache import cache_file +from data_profiling import ProfileReport file_name = cache_file( "pollution_us_2000_2016.csv", @@ -63,7 +63,7 @@ validation by setting the x configuration. ### Warnings and validations -Specific to time-series analysis, 2 new warnings were added to the `ydata-profiling` +Specific to time-series analysis, 2 new warnings were added to the `data-profiling` warnings family: **NON_STATIONARY** and **SEASONAL**. #### Stationarity @@ -74,7 +74,7 @@ forecasting and modeling techniques because they often assume that the underlyin data is stationary. Stationarity simplifies the modeling process by making it easier to detect patterns and trends. -`ydata-profiling` stationary warning is based on an **Augmented Dickey-Fuller(ADF)** test. +`data-profiling` stationary warning is based on an **Augmented Dickey-Fuller(ADF)** test. Nevertheless, you should always combine the output of this warning with a visual inspection to your time-series behaviour and search for variance of the rolling statistics analysis. @@ -87,7 +87,7 @@ are known as seasonality and are often observed in data associated with yearly, monthly, weekly, or daily cycles. Seasonal time-series data can be challenging to model accurately without addressing the underlying seasonality. -`ydata-profiling` seasonality warning is based on an **Augmented Dickey-Fuller(ADF)** test. +`data-profiling` seasonality warning is based on an **Augmented Dickey-Fuller(ADF)** test. Nevertheless, you should always combine the output of this warning with a seasonal decomposition PACF and ACF plots (also computed in your time-series profiling). @@ -104,7 +104,7 @@ intervals within your time-series data where observations are missing or incompl While these gaps might seem like inconveniences, they hold valuable information and can significantly impact the quality and reliability of your analyses and predictions. -`ydata-profiling` automated identification of potential time-series gaps is based +`data-profiling` automated identification of potential time-series gaps is based on time intervals analysis. By analyzing the time intervals between data points, the gaps are expected to be reflected as larger intervals in the distribution. @@ -119,8 +119,8 @@ variables that you want to analyze as time-series are profiled as such: ``` python linenums="1" title="Setting what variables are time-series" import pandas as pd -from ydata_profiling.utils.cache import cache_file -from ydata_profiling import ProfileReport +from data_profiling.utils.cache import cache_file +from data_profiling import ProfileReport file_name = cache_file( "pollution_us_2000_2016.csv", diff --git a/docs/getting-started/concepts.md b/docs/getting-started/concepts.md index aa38fcfba..f8bc2fd6f 100644 --- a/docs/getting-started/concepts.md +++ b/docs/getting-started/concepts.md @@ -2,7 +2,7 @@ !!! question "Text/corpus data - your input is needed!" - `ydata-profiling` team is considering the support of a new set of features for corpus data + `data-profiling` team is considering the support of a new set of features for corpus data and we want to hear from you! We're particularly interested in understanding why you think these features would be useful, and your input will help us prioritize and refine this development. @@ -19,7 +19,7 @@ It can reveal information about data volatility, periodicity, and anomalies, fac - **Text:** when it comes to text data, such as strings or documents, the profiling offers insightful statistics on the distribution of word frequencies, common phrases, and unique words. ## Data types -Types, when going beyond the logical data types such as integer, floats, etc, are a powerful abstraction for effective data analysis, allowing analysis under higher level lenses. ``ydata-profiling`` is backed by a powerful type system developed specifically for data analysis: `visions `_. Currently, ``ydata-profiling`` recognizes the following types: +Types, when going beyond the logical data types such as integer, floats, etc, are a powerful abstraction for effective data analysis, allowing analysis under higher level lenses. ``data-profiling`` is backed by a powerful type system developed specifically for data analysis: `visions `_. Currently, ``data-profiling`` recognizes the following types: - Boolean - Numerical @@ -33,12 +33,12 @@ Types, when going beyond the logical data types such as integer, floats, etc, a Appropriate typesets can both improve the overall expressiveness and reduce the complexity of the analysis/code. User customized summarizations and type definitions are fully supported, with PRs supporting new data types -for specific use cases more than welcome. For reference, you can check the implementation of ``ydata-profiling``'s -default typeset [here](https://github.com/ydataai/ydata-profiling/blob/develop/src/ydata_profiling/model/typeset.py). +for specific use cases more than welcome. For reference, you can check the implementation of ``data-profiling``'s +default typeset [here](https://github.com/Data-Centric-AI-Community/data-profiling/blob/develop/src/data_profiling/model/typeset.py). ## Data quality alerts
- ![Data quality warnings ydata-profiling](../_static/img/warnings_section.png){width="700"} + ![Data quality warnings data-profiling](../_static/img/warnings_section.png){width="700"}
Data quality warnings
@@ -54,7 +54,7 @@ as well as settings to disable specific ones, can be consulted in the [documenta ## Univariate profiling
- ![Univariate profiling ydata-profiling](../_static/img/univariate_profiling.png){width="700"} + ![Univariate profiling data-profiling](../_static/img/univariate_profiling.png){width="700"}
Univariate profiling metrics and visualization
@@ -67,7 +67,7 @@ For more details about the different metrics and visualizations check the Univar ## Multivariate profiling
- ![Multivariate profiling ydata-profiling](../_static/img/multivariate_profiling.png){width="700"} + ![Multivariate profiling data-profiling](../_static/img/multivariate_profiling.png){width="700"}
Multivariate profiling metrics and visualization
@@ -102,7 +102,7 @@ The identification of outliers allows the data analyst or scientist to assess wh Feature limited to user of the [cloud hosted solution](http://ydata.ai/register?utm_source=ydata-profiling&utm_medium=documentation&utm_campaign=YData%20Fabric%20Community). ## Preview data -For a quick overview of the data, ydata-profiling provides the following sections that can be easily configure by the user: +For a quick overview of the data, data-profiling provides the following sections that can be easily configure by the user: - First n records of a given dataset - Last n records of a given dataset - A table containing observed duplicates (exact matches) diff --git a/docs/getting-started/examples.md b/docs/getting-started/examples.md index 98ca106c1..2fdbdf451 100644 --- a/docs/getting-started/examples.md +++ b/docs/getting-started/examples.md @@ -11,12 +11,12 @@ across a wide range of dataset and data types: Meteorites](../examples/meteorites/meteorites_report.html) (comprehensive set of meteorite landing - object properties and locations) - [![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ydataai/ydata-profiling/master?filepath=examples%2Fmeteorites%2Fmeteorites.ipynb) - [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-profiling/blob/master/examples/meteorites/meteorites.ipynb) + [![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Data-Centric-AI-Community/data-profiling/master?filepath=examples%2Fmeteorites%2Fmeteorites.ipynb) + [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Data-Centric-AI-Community/data-profiling/blob/master/examples/meteorites/meteorites.ipynb) - [Titanic](../examples/titanic/titanic_report.html) (the \"Wonderwall\" of datasets) - [![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ydataai/ydata-profiling/master?filepath=examples%2Ftitanic%2Ftitanic.ipynb) - [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-profiling/blob/master/examples/titanic/titanic.ipynb) + [![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Data-Centric-AI-Community/data-profiling/master?filepath=examples%2Ftitanic%2Ftitanic.ipynb) + [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Data-Centric-AI-Community/data-profiling/blob/master/examples/titanic/titanic.ipynb) - [NZA](../examples/nza/nza_report.html) (open data from the Dutch Healthcare Authority) - [Stata @@ -41,9 +41,9 @@ across a wide range of dataset and data types: prices](../examples/features/flatly_report.html) (simple pricing evolution datasets, showcasing the theming options) - [USA Air - Quality](https://github.com/ydataai/ydata-profiling/tree/master/examples/usaairquality) + Quality](https://github.com/Data-Centric-AI-Community/data-profiling/tree/master/examples/usaairquality) (Time-series air quality dataset EDA example) -- [HCC](https://github.com/ydataai/ydata-profiling/tree/master/examples/hcc) +- [HCC](https://github.com/Data-Centric-AI-Community/data-profiling/tree/master/examples/hcc) (Open dataset from healthcare, showcasing compare between two sets of data, before and after preprocessing) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index d271e725d..b002ec51b 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -5,13 +5,13 @@ You can install using the ``pip`` package manager by running: ```console - pip install -U ydata-profiling + pip install -U data-profiling ``` If you are in a notebook (locally, LambdaLabs, Google Colab or Kaggle), you can run: ```python linenums="1" import sys - !{sys.executable} -m pip install -U ydata-profiling[notebook] + !{sys.executable} -m pip install -U data-profiling[notebook] !pip install jupyter-contrib-nbextensions ``` Afterwards you can run the following command @@ -23,14 +23,14 @@ You may have to restart the kernel or runtime for the package to work. ## Using conda -[ydata-profiling through Conda](https://anaconda.org/conda-forge/ydata-profiling) +[data-profiling through Conda](https://anaconda.org/conda-forge/data-profiling) A new conda environment containing the module can be created via: ```console - conda env create -n ydata-profiling - conda activate ydata-profiling - conda install -c conda-forge ydata-profiling + conda env create -n data-profiling + conda activate data-profiling + conda install -c conda-forge data-profiling ``` !!! tip @@ -43,7 +43,7 @@ For the Jupyter widgets extension (used for progress bars and the interactive wi This can be done via ``pip``: ```console - pip install ydata-profiling[notebook] + pip install data-profiling[notebook] jupyter nbextension enable --py widgetsnbextension ``` @@ -57,7 +57,7 @@ environment configurations, refer to [the official ipywidgets documentation](htt ## From source -Download the source code by cloning the repository or by clicking on [Download ZIP](https://github.com/ydataai/ydata-profiling/archive/master.zip). +Download the source code by cloning the repository or by clicking on [Download ZIP](https://github.com/Data-Centric-AI-Community/data-profiling/archive/master.zip). Install it by navigating to the uncompressed directory and running: ```console @@ -67,7 +67,7 @@ Install it by navigating to the uncompressed directory and running: This can also be done via the following one-liner: ```console - pip install https://github.com/ydataai/ydata-profiling/archive/master.zip + pip install https://github.com/Data-Centric-AI-Community/data-profiling/archive/master.zip ``` @@ -79,8 +79,8 @@ The package declares some "extras", sets of additional dependencies. * ``[pyspark]``: support for pyspark engine to run the profile on big datasets Install these with e.g. -````console - pip install -U ydata-profiling[notebook,unicode, pyspark] -```` +```console + pip install -U data-profiling[notebook,unicode, pyspark] +``` \ No newline at end of file diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 635bd1323..f425a17f0 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -6,7 +6,7 @@ using: ``` python linenums="1" import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport df = pd.DataFrame(np.random.rand(100, 5), columns=["a", "b", "c", "d", "e"]) ``` @@ -23,7 +23,7 @@ There are two interfaces to consume the report inside a Jupyter notebook (see animations below): through widgets and through an embedded HTML report. -![Running ydata-proling inside a Jupyter Notebook](../_static/img/widgets.gif) +![Running data-proling inside a Jupyter Notebook](../_static/img/widgets.gif) This is achieved by simply displaying the report as a set of widgets. In a Jupyter Notebook, run: @@ -38,7 +38,7 @@ The HTML report can be directly embedded in a cell in a similar fashion: profile.to_notebook_iframe() ``` -![ydata-profiling widgets](../_static/img/iframe.gif) +![data-profiling widgets](../_static/img/iframe.gif) ## Exporting the report to a file @@ -62,14 +62,14 @@ profile.to_file("your_report.json") ## Command line usage For standard formatted CSV files (which can be read directly by pandas -without additional settings), the `ydata_profiling` executable can be +without additional settings), the `data_profiling` executable can be used in the command line. The example below generates a report named *Example Profiling Report*, using a configuration file called `default.yaml`, in the file `report.html` by processing a `data.csv` dataset. ``` bash -ydata_profiling --title "Example Profiling Report" --config_file default.yaml data.csv report.html +data_profiling --title "Example Profiling Report" --config_file default.yaml data.csv report.html ``` Information about all available options and arguments can be viewed @@ -79,7 +79,7 @@ filenames, setting a custom report title, specifying role="doc"} and control other advanced aspects of the experience. ``` bash -ydata_profiling -h +data_profiling -h ```
![Image title](../_static/img/cli.png){width="500"} @@ -96,8 +96,8 @@ data profiling option. profile = ProfileReport(df, title="Profiling Report", explorative=True) ``` -On the CLI utility `ydata_profiling`, this mode can be activated with -the `-e` flag. Learn more about configuring `ydata-profiling` on the +On the CLI utility `data_profiling`, this mode can be activated with +the `-e` flag. Learn more about configuring `data_profiling` on the `../advanced_usage/available_settings`{.interpreted-text role="doc"}. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index be0e7ee0f..16268d25d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,30 +2,30 @@ Data quality profiling and exploratory data analysis are crucial steps in the process of Data Science and Machine Learning development. -YData-profiling is a leading tool in the data understanding step of the data science workflow as a pioneering Python package. +Data-profiling is a leading tool in the data understanding step of the data science workflow as a pioneering Python package. -`ydata-profiling` is a leading package for data profiling, that automates and standardizes the generation of detailed reports, +`data-profiling` is a leading package for data profiling, that automates and standardizes the generation of detailed reports, complete with statistics and visualizations. The significance of the package lies in how it streamlines the process of understanding and preparing data for analysis in a single line of code! If you're ready to get started see the [quickstart](getting-started/quickstart.md)! !!! tip "Profiling and scale and for databases" - Take your data profiling to the next level - try ydata-profiling at scale and for databases! + Take your data profiling to the next level - try data-profiling at scale and for databases! Experience enterprise-level scalability and database support while enjoying the familiar open-source features you love. - Dive into large datasets with ease and ensure data quality like never before. Try [YData Fabric community version](https://ydata.ai/register)! + Dive into large datasets with ease and ensure data quality like never before. Try [Data Fabric community version](https://ydata.ai/register)! -![ydata-profiling report](_static/img/ydata-profiling.gif) +![data-profiling report](_static/img/data-profiling.gif) -## Why use ydata-profiling? +## Why use data-profiling? -`ydata-profiling` is a valuable tool for data scientists and analysts because it streamlines EDA, provides comprehensive insights, enhances data quality, +`data-profiling` is a valuable tool for data scientists and analysts because it streamlines EDA, provides comprehensive insights, enhances data quality, and promotes data science best practices. - **Simple to user**: It is so **simple to use** - a single line of code is what you need to get you started. *Do you really need more to convince you?* 😛 ```python linenums="1" import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport df = pd.read_csv('data.csv') profile = ProfileReport(df, title="Profiling Report") @@ -35,13 +35,13 @@ providing a holistic view of your data. The report is shareable as a html file o - **Data quality assessment**: excel at the identification of missing data, duplicate entries and outliers. These insights are essential for data cleaning and preparation, ensuring the reliability of your analysis and leading to early problems' identification. - **Ease of integration with other flows**: all metrics of the data profiling can be consumed in a standard JSON format. -- **Data exploration for large datasets**: even with dataset with a large number of rows, `ydata-profiling` will be able to help you +- **Data exploration for large datasets**: even with dataset with a large number of rows, `data-profiling` will be able to help you as it supports both Pandas Dataframes and [Spark Dataframes](integrations/pyspark.md). To learn more about the package check out [concepts overview](getting-started/concepts.md). ## 📝 Features, functionalities & integrations -YData-profiling can be used to deliver a variety of different applications. The documentation includes guides, tips and tricks for tackling them: +Data-profiling can be used to deliver a variety of different applications. The documentation includes guides, tips and tricks for tackling them: !!! question "Data Catalog with data profiling for databases & storages" @@ -55,7 +55,7 @@ YData-profiling can be used to deliver a variety of different applications. The |----------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| | [Comparing datasets](features/comparing_datasets.md) | Comparing multiple version of the same dataset | | [Profiling a Time-Series dataset](features/time_series_datasets.md) | Generating a report for a time-series dataset with a single line of code | -| [Profiling large datasets](features/big_data.md) | Tips on how to prepare data and configure `ydata-profiling` for working with large datasets | +| [Profiling large datasets](features/big_data.md) | Tips on how to prepare data and configure `data-profiling` for working with large datasets | | [Handling sensitive data](features/sensitive_data.md) | Generating reports which are mindful about sensitive data in the input dataset | | [Dataset metadata and data dictionaries](features/metadata.md) | Complementing the report with dataset details and column-specific data dictionaries | | [Customizing the report's appearance](features/custom_reports.md ) | Changing the appearance of the report's page and of the contained visualizations | @@ -64,10 +64,10 @@ YData-profiling can be used to deliver a variety of different applications. The ### Tutorials -Looking for how to use certain features or how to integrate `ydata-profiling` in your currect stack and workflows, +Looking for how to use certain features or how to integrate `data-profiling` in your currect stack and workflows, check our step-by-step tutorials. -- **How to master exploratory data analysis with ydata-profiling?** Check this [step-by-step tutorial](https://medium.com/ydata-ai/auditing-data-quality-with-pandas-profiling-b1bf1919f856). +- **How to master exploratory data analysis with data-profiling?** Check this [step-by-step tutorial](https://medium.com/ydata-ai/auditing-data-quality-with-pandas-profiling-b1bf1919f856). - **Looking on how to do exploratory data analysis for Time-series 🕛?** Check how to in this [blogpost](https://towardsdatascience.com/how-to-do-an-eda-for-time-series-cbb92b3b1913). To learn more about this feature [check the documentation](features/time_series_datasets.md). @@ -80,7 +80,7 @@ For more information about spark integration [check the documentation](integrati Need help? Want to share a perspective? Report a bug? Ideas for collaborations? Reach out via the following channels: - [Stack Overflow](https://stackoverflow.com/questions/tagged/pandas-profiling+or+ydata-profiling): ideal for asking questions on how to use the package -- [GitHub Issues](https://github.com/ydataai/ydata-profiling/issues): bugs, proposals for changes, feature requests +- [GitHub Issues](https://github.com/Data-Centric-AI-Community/data-profiling/issues): bugs, proposals for changes, feature requests - [Discord](https://tiny.ydata.ai/dcai-ydata-profiling): ideal for projects discussions, ask questions, collaborations, general chat !!! tip "Help us prioritizing - before reporting, double check, it is always better to upvote!" @@ -100,4 +100,4 @@ A big thank you to all our amazing contributors! ### ⚡ We need your help - Spark! Spark support has been released, but we are always looking for an extra pair of hands 👐. -[Check current work in progress!](https://github.com/ydataai/ydata-profiling/projects/3). +[Check current work in progress!](https://github.com/Data-Centric-AI-Community/data-profiling/projects/3). diff --git a/docs/integrations/bytewax.md b/docs/integrations/bytewax.md index 783e9dbae..5fe1d624e 100644 --- a/docs/integrations/bytewax.md +++ b/docs/integrations/bytewax.md @@ -9,7 +9,7 @@ applications with capabilities similar to Flink, Spark, and Kafka Streams, while providing a friendly and familiar interface and 100% compatibility with the Python ecosystem. -## Stream processing with Bytewax and ydata-profiling +## Stream processing with Bytewax and data-profiling Data Profiling is key to a successful start of any machine learning task, and refers to the step of [thoroughly understanding our data](https://ydata.ai/resources/advanced-eda-made-simple-using-pandas-profiling): its structure, behavior, and quality. @@ -22,7 +22,7 @@ collection or processing (e.g., erroneous values or inconsistent features). !!! note "Package versions" - The integration with bytewax is available for ydata-profiling with + The integration with bytewax is available for data-profiling with any version >=3.0.0 ### Simulating a streaming @@ -65,10 +65,10 @@ flow.map(lambda reading_data: (reading_data["device"], reading_data)) Now we will take advantage of the stateful capabilities of bytewax to gather data for each device over a duration of time that we have -defined. ydata-profiling expects a snapshot of the data over time, which +defined. data-profiling expects a snapshot of the data over time, which makes the window operator the perfect method to use to do this. -In ydata-profiling, we are able to produce summarizing statistics for a +In data-profiling, we are able to produce summarizing statistics for a dataframe which is specified for a particular context. For instance, in this example, we can produce snapshots of data referring to each IoT device or to particular time frames: @@ -101,13 +101,13 @@ flow.fold_window("running_average", cc, wc, list, acc_values) flow.inspect(print) ``` -After the snapshots are defined, leveraging ydata-profiling is as simple +After the snapshots are defined, leveraging data-profiling is as simple as calling the ProfileReport for each of the dataframes we would like to analyze: ``` python import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def profile(device_id__readings): @@ -158,7 +158,7 @@ Assuming we are in the same directory as the file with the dataflow definition, we can run it using: ``` linenums="1" -python -m bytewax.run ydata-profiling-streaming:flow +python -m bytewax.run data-profiling-streaming:flow ``` We can then use the profiling reports to validate the data quality, @@ -185,6 +185,6 @@ comparison_report.to_file("comparison_report.html") Now you're all set to start exploring your data streams! Bytewax takes care of all the processes necessary to handle and structure data streams into snapshots, which can then be summarized and compared with -ydata-profiling through a comprehensive report of data characteristics. +data-profiling through a comprehensive report of data characteristics. \ No newline at end of file diff --git a/docs/integrations/great_expectations.md b/docs/integrations/great_expectations.md index 0707f0d6e..5f41da49a 100644 --- a/docs/integrations/great_expectations.md +++ b/docs/integrations/great_expectations.md @@ -6,7 +6,7 @@ You can recreate the integration with the following packages versions: - - ydata-profiling==2.1.0 + - data-profiling==2.1.0 - great-expectations==0.13.4 [Great Expectations](https://www.greatexpectations.io) is a Python-based @@ -15,7 +15,7 @@ data. It helps you to maintain data quality and improve communication about data between teams. With Great Expectations, you can assert what you expect from the data you load and transform, and catch data issues quickly -- Expectations are basically *unit tests for your data*. -`ydata-profiling` features a method to create a suite of Expectations +`data-profiling` features a method to create a suite of Expectations based on the results of your `ProfileReport`! ## About Great Expectations @@ -47,13 +47,13 @@ documentation](https://docs.greatexpectations.io/en/latest/) and join the [Great Expectations Slack channel](https://www.greatexpectations.io/slack) for help. -## Creating Expectation Suites with ydata-profiling +## Creating Expectation Suites with adata-profiling An *Expectation Suite* is simply a set of Expectations. You can create Expectation Suites by writing out individual statements, such as the one above, or by automatically generating them based on profiler results. -`ydata-profiling` provides a simple `to_expectation_suite()` method that +`data-profiling` provides a simple `to_expectation_suite()` method that returns a Great Expectations `ExpectationSuite` object which contains a set of Expectations. @@ -68,11 +68,11 @@ project\'s directory. ``` python linenums="1" title="Get your set of expectations" import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport df = pd.read_csv("titanic.csv") -profile = ProfileReport(df, title="YData Profiling Report", explorative=True) +profile = ProfileReport(df, title="Data Profiling Report", explorative=True) # Obtain an Expectation Suite with a set of default Expectations # By default, this also profiles the dataset, saves the suite, runs validation, and builds Data Docs @@ -106,7 +106,7 @@ suite = profile.to_expectation_suite( ``` See [the Great Expectations -Examples](https://github.com/ydataai/ydata-profiling/blob/master/examples/integrations/great_expectations/great_expectations_example.py) +Examples](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/examples/integrations/great_expectations/great_expectations_example.py) for complete examples. diff --git a/docs/integrations/ides.md b/docs/integrations/ides.md index e40ba6a9c..f54b0d52f 100644 --- a/docs/integrations/ides.md +++ b/docs/integrations/ides.md @@ -5,22 +5,22 @@ Environments, such as [PyCharm](https://www.jetbrains.com/pycharm/). ## PyCharm -1. Install `ydata-profiling` via +1. Install `data-profiling` via `../getting_started/installation`{.interpreted-text role="doc"} -2. Locate your `ydata-profiling` executable. +2. Locate your `data-profiling` executable. On macOS / Linux / BSD: ``` console - $ which ydata_profiling - (example) /usr/local/bin/ydata_profiling + $ which data_profiling + (example) /usr/local/bin/data_profiling ``` On Windows: ``` console - $ where ydata_profiling - (example) C:\ProgramData\Anaconda3\Scripts\ydata_profiling.exe + $ where data_profiling + (example) C:\ProgramData\Anaconda3\Scripts\data_profiling.exe ``` 3. In PyCharm, go to *Settings* (or *Preferences* on macOS) \ *Tools* diff --git a/docs/integrations/interactive_applications.md b/docs/integrations/interactive_applications.md index 53801a2e7..36ab47eb9 100644 --- a/docs/integrations/interactive_applications.md +++ b/docs/integrations/interactive_applications.md @@ -1,6 +1,6 @@ # Interactive applications -The `ydata-profiling` report, through several of its interfaces, can be +The `data-profiling` report, through several of its interfaces, can be integrated in interactive data applications such as those developed with [Streamlit](https://streamlit.io) or [Panel](https://panel.holoviz.org). @@ -11,14 +11,14 @@ made to build web-apps for machine learning and data science. !!! note - This feature is only available for versions previous to ydata-profiling + This feature is only available for versions previous to data-profiling (<=3.6.2). ![image](https://user-images.githubusercontent.com/9756388/140196751-69b0a361-99ed-4fc3-8282-cb0cd1fb0d59.gif) -``` python linenums="1" title="Creating a simple Streamlit app with ydata-profiling" +``` python linenums="1" title="Creating a simple Streamlit app with data-profiling" import pandas as pd -import ydata_profiling +import data_profiling import streamlit as st from streamlit_pandas_profiling import st_profile_report df = pd.read_csv( @@ -31,7 +31,7 @@ st.write(df) st_profile_report(pr) ``` -You can install the [ydata-profiling +You can install the [data-profiling component](https://github.com/Ghasel/streamlit-pandas-profiling) for Streamlit with pip. @@ -44,8 +44,8 @@ pip install streamlit-pandas-profiling [Dash](hhttps://github.com/plotly/dash) is a Python framework for building machine learning & data science web apps, built on top of Plotly.js, React and Flask. It is commonly used for interactive data -exploration, precisely where `ydata-profiling` also focuses. Inline -access to the insights provided by `ydata-profiling` can help guide the +exploration, precisely where `data-profiling` also focuses. Inline +access to the insights provided by `data-profiling` can help guide the exploratory work allowed by Dash. To integrate a Profiling Report inside a Dash app, two options exist: @@ -55,7 +55,7 @@ Assuming the HTML version of the report is in `report.html`, move it to a folder called `assets`. The snippet below shows a simple Dash app, `app.py`, embedding this report: -``` python linenums="1" title="Create a Dash dashboard with ydata-profiling integrated" +``` python linenums="1" title="Create a Dash dashboard with data-profiling integrated" import dash from dash import html @@ -92,7 +92,7 @@ And configure the Dash app as in the following snippet: ``` python linenums="1" title="Embed the raw html into Dash" import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport import dash from dash import html import dash_dangerously_set_inner_html @@ -122,9 +122,9 @@ if __name__ == "__main__": When running `python app.py`, a Dash app with the report embedded will be available on -`` `_. While this option is somewhat more direct, **the embedded report will not be fully interactive, with some buttons unclickable**. Panel ----- For more information on how to use ``ydata-profiling\`[ +`` `_. While this option is somewhat more direct, **the embedded report will not be fully interactive, with some buttons unclickable**. Panel ----- For more information on how to use ``data-profiling\`[ in Panel, see \`this GitHub issue -\]{.title-ref}\_ +\]{.title-ref}\_ and [this integration example](https://awesome-panel.org/pandas_profiling_app). diff --git a/docs/integrations/other_dataframe_libraries.md b/docs/integrations/other_dataframe_libraries.md index 5dcf5373f..8169c4466 100644 --- a/docs/integrations/other_dataframe_libraries.md +++ b/docs/integrations/other_dataframe_libraries.md @@ -1,12 +1,12 @@ # Other DataFrame libraries -`ydata-profiling` is built on `pandas` and `numpy`. Pandas supports a +`data-profiling` is built on `pandas` and `numpy`. Pandas supports a wide range of data formats including CSV, XLSX, SQL, JSON, HDF5, SAS, BigQuery and Stata. Read more on [supported formats by Pandas](https://pandas.pydata.org/docs/user_guide/io.html). If you have data in another framework of the Python Data ecosystem, you -can use `ydata-profiling` by converting to a pandas `DataFrame`, as +can use `data-profiling` by converting to a pandas `DataFrame`, as direct integrations are not yet supported. Large datasets might require sampling (as seen in our documentation on [how to profile large datasets](../features/big_data.md)). diff --git a/docs/integrations/pipelines.md b/docs/integrations/pipelines.md index 721a54444..46a78501e 100644 --- a/docs/integrations/pipelines.md +++ b/docs/integrations/pipelines.md @@ -1,6 +1,6 @@ # Pipelines -With Python, command-line and Jupyter interfaces, `ydata-profiling` +With Python, command-line and Jupyter interfaces, `data-profiling` integrates seamlessly with DAG execution tools like Airflow, Dagster, Kedro and Prefect, allowing it to easily becomes a building block of data ingestion and analysis pipelines. Integration with @@ -13,9 +13,9 @@ similar way as with Airflow. !!! tip "Fabric Community version" [YData Fabric](https://ydata.ai/products/fabric) has a community version that you can start using today to create data workflows with pipelines. - [Sign up here](http://ydata.ai/register?utm_source=ydata-profiling&utm_medium=documentation&utm_campaign=YData%20Fabric%20Community) and start building your pipelines. ydata-profiling is installed by default in all YData images. + [Sign up here](http://ydata.ai/register?utm_source=ydata-profiling&utm_medium=documentation&utm_campaign=YData%20Fabric%20Community) and start building your pipelines. data-profiling is installed by default in all YData images. -![ydata-profiling in a pipeline](../_static/img/profiling_pipelines.png) +![data-profiling in a pipeline](../_static/img/profiling_pipelines.png) YData Fabric's data pipelines are engineered to harness the capabilities of [Kubeflow](https://www.kubeflow.org/), providing a robust foundation for scalable and efficient data workflows. This technical integration ensures that data pipelines can seamlessly handle high data volumes and execute operations with optimal resource utilization. @@ -24,12 +24,12 @@ YData Fabric simplifies the process of data pipeline setup by abstracting comple The setup is done through a drag-and-drop experience while leveraging existing Jupyter Notebook environments. Check this video to see [how to create a pipeline in YData Fabric](https://www.youtube.com/watch?v=feNoXv34waM&t=8s). -```python linenums="1" title="Profile a csv with ydata-profiling in a pipeline" +```python linenums="1" title="Profile a csv with data-profiling in a pipeline" # Import required packages import json import pandas as pd -from ydata.profiling import ProfileReport +from data_profiling import ProfileReport # Read your dataset as a CSV dataset = pd.read_csv('data.csv') @@ -58,7 +58,7 @@ with open('mlpipeline-ui-metadata.json', 'w') as metadata_file: json.dump(metadata, metadata_file) ``` -You can find the notebook with this implementation in [ydata-profiling examples folder](https://github.com/ydataai/ydata-profiling/blob/develop/examples/integrations/ydata_fabric_pipelines/data_profiling.ipynb). +You can find the notebook with this implementation in [data-profiling examples folder](https://github.com/Data-Centric-AI-Community/data-profiling/blob/develop/examples/integrations/ydata_fabric_pipelines/data_profiling.ipynb). ## Airflow @@ -67,7 +67,7 @@ Integration with Airflow can be easily achieved through the or the [PythonOperator](https://airflow.apache.org/docs/stable/_api/airflow/operators/python_operator/index.html#airflow.operators.python_operator.PythonOperator). -``` python linenums="1" title="ydata-profiling with Airflow" +``` python linenums="1" title="data-profiling with Airflow" # Using the command line interface profiling_task = BashOperator( task_id="Profile Data", @@ -76,13 +76,13 @@ profiling_task = BashOperator( ) ``` -``` python linenums="1" title="ydata-profiling with Airflow" +``` python linenums="1" title="data-profiling with Airflow" # Using the Python interface -import ydata_profiling +import data_profiling def profile_data(file_name, report_file): df = pd.read_csv(file_name) - report = pandas_profiling.ProfileReport(df, title="Profiling Report in Airflow") + report = data_profiling.ProfileReport(df, title="Profiling Report in Airflow") report.to_file(report_file) return "Report generated at {}".format(report_file) diff --git a/docs/integrations/pyspark.md b/docs/integrations/pyspark.md index 7da7851ec..a988e04ee 100644 --- a/docs/integrations/pyspark.md +++ b/docs/integrations/pyspark.md @@ -2,7 +2,7 @@ !!! note ""Spark support" **Spark dataframes support** - Spark Dataframes profiling is available - from ydata-profiling version 4.0.0 onwards + from data-profiling version 4.0.0 onwards Data Profiling is a core step in the process of developing AI solutions. For small datasets, the data can be loaded into memory and easily @@ -11,7 +11,7 @@ what can be done? Big data engines, that distribute the workload through different machines, are the answer. Particularly, Spark rose as one of the most -used and adopted engines by the data community. `ydata-profiling` +used and adopted engines by the data community. `data-profiling` provides an ease-to-use interface to generate complete and comprehensive data profiling out of your Spark dataframes with a single line of code. @@ -71,12 +71,12 @@ command line to launch PySpark shell and confirm both python and pyspark versions. A more detailed tutorial for the installation can be found [here](https://sparkbyexamples.com/pyspark/how-to-install-pyspark-on-mac/) -### Install ydata-profiling +### Install data-profiling -Create a pip virtual environment or a conda environment and install `ydata-profiling` with pyspark as a dependency +Create a pip virtual environment or a conda environment and install `data-profiling` with pyspark as a dependency ```console -pip install ydata-profiling[pyspark] +pip install data-profiling[pyspark] ``` ## Profiling with Spark - Supported Features @@ -84,9 +84,9 @@ pip install ydata-profiling[pyspark] !!! note "Minimal mode" This mode was introduced in version v4.0.0 -`ydata-profiling` now supports Spark Dataframes profiling. You can find +`data-profiling` now supports Spark Dataframes profiling. You can find an example of the integration -[here](https://github.com/ydataai/ydata-profiling/blob/master/examples/features/spark_example.py). +[here](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/examples/features/spark_example.py). **Features supported:** @@ -103,7 +103,7 @@ an example of the integration ## Profiling with Spark DataFrames A quickstart example to profile data from a CSV leveraging Pyspark -engine and `ydata-profiling`. +engine and `data-profiling`. ``` python linenums="1" title="Profiling with Spark Dataframes" from pyspark.sql import SparkSession @@ -119,13 +119,13 @@ a = ProfileReport(df) a.to_file("spark_profile.html") ``` -### ydata-profiling in Databricks +### data-profiling in Databricks Yes! We have fantastic new coming with a full tutorial on how you can -use ydata-profiling in Databricks Notebooks. +use data-profiling in Databricks Notebooks. The notebook example can be found -[here](https://github.com/ydataai/ydata-profiling/tree/master/examples/integrations/databricks). +[here](https://github.com/Data-Centric-AI-Community/data-profiling/tree/master/examples/integrations/databricks). Stay tuned - we are going to update the documentation soon! diff --git a/docs/reference/history.md b/docs/reference/history.md index da58ab94b..866d7a4b3 100644 --- a/docs/reference/history.md +++ b/docs/reference/history.md @@ -1,10 +1,10 @@ # History & community -The `ydata-profiling` project became what it is today due to the work of +The `data-profiling` project became what it is today due to the work of the creators to make it successful. This page aims to highlight a bit of the development history. For the full picture, have a look at the [contributor -history](https://github.com/ydataai/pandas-profiling/graphs/contributors). +history](https://github.com/Data-Centric-AI-Community/data-profiling/graphs/contributors). **[YData](https://ydata.ai/)** is the company behind this successful package being responsible for releases such as the support for time-series, compare datasets and spark @@ -13,8 +13,8 @@ support. ## Thank you to our amazing contributors A big thank you to all our amazing contributors! - - + + Contributors wall made with [contrib.rocks](https://contrib.rocks). @@ -65,7 +65,7 @@ Chan](https://www.linkedin.com/in/edwin-chan/). ## Where are we now? At the time of writing, `pandas-profiling` is receiving a new face and -name `ydata-profiling`. Derived from the most recent and major feature, +name `data-profiling`. Derived from the most recent and major feature, Spark support, we have decided to move from [pandas]{.title-ref} to a name that opens the possibility of new integrations and developments. @@ -76,7 +76,7 @@ insurance companies, startups and universities. ## What's next? -`ydata-profiling` is committed to the mission of helping data-scientists +`data-profiling` is committed to the mission of helping data-scientists to adopt a Data-Centric approach towards the development of AI. Continuous development and support will to be part of the development of one of the most beloved open-sources by the data science community. diff --git a/docs/reference/resources.md b/docs/reference/resources.md index 54169461f..25befb472 100644 --- a/docs/reference/resources.md +++ b/docs/reference/resources.md @@ -68,7 +68,7 @@ Feel free to contribute it via a pull request on GitHub. ## Videos -- [How to install ydata-profiling with conda and Python 3.11](https://www.youtube.com/watch?v=fvXZcpTwbtA) +- [How to install data-profiling with conda and Python 3.11](https://www.youtube.com/watch?v=fvXZcpTwbtA) (Fabiana Clemente, June 2023, 2023) - [Installing Anaconda, Creating a virtual environment and installing pandas-profiling in it](https://www.youtube.com/watch?v=q2E8RLsznaA) diff --git a/docs/support-contribution/common_issues.md b/docs/support-contribution/common_issues.md index e0235aaae..1c581ec20 100644 --- a/docs/support-contribution/common_issues.md +++ b/docs/support-contribution/common_issues.md @@ -7,18 +7,18 @@ This error occurs when using outdated versions of the package. Ensure that you are using the latest version, and when in a notebook, ensure that you\'ve restarted the kernel when needed. Also make sure that you install in the right Python environment (please use -`!{sys.executable} -m pip install -U ydata-profiling`!). More +`!{sys.executable} -m pip install -U data-profiling`!). More information on installing Python packages directly from a notebook: [\'Installing Python Packages from a Jupyter Notebook\'](https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/). Related GitHub issues: -- [\[950\]](https://github.com/ydataai/ydata-profiling/issues/950) -- [\[939\]](https://github.com/ydataai/ydata-profiling/issues/939) -- [\[528\]](https://github.com/ydataai/ydata-profiling/issues/528) -- [\[485\]](https://github.com/ydataai/ydata-profiling/issues/485) -- [\[396\]](https://github.com/ydataai/ydata-profiling/issues/396) +- [\[950\]](https://github.com/Data-Centric-AI-Community/data-profiling/issues/950) +- [\[939\]](https://github.com/Data-Centric-AI-Community/data-profiling/issues/939) +- [\[528\]](https://github.com/Data-Centric-AI-Community/data-profiling/issues/528) +- [\[485\]](https://github.com/Data-Centric-AI-Community/data-profiling/issues/485) +- [\[396\]](https://github.com/Data-Centric-AI-Community/data-profiling/issues/396) ## Jupyter \"IntSlider(value=0)\" @@ -40,7 +40,7 @@ fixed. One workaround is to filter out large outliers prior to report computation. Related StackOverflow questions: -- [MemoryError when using ydata_profiling +- [MemoryError when using data_profiling profile_report](https://stackoverflow.com/questions/67342168/memoryerror-when-using-pandas-profiling-profile-report) \ No newline at end of file diff --git a/docs/support-contribution/contribution_guidelines.md b/docs/support-contribution/contribution_guidelines.md index cb9e4d72d..c4dd76975 100644 --- a/docs/support-contribution/contribution_guidelines.md +++ b/docs/support-contribution/contribution_guidelines.md @@ -84,6 +84,6 @@ community](https://discord.com/invite/mw7xjJ7b7s). ## More information Read more on getting involved in the [Contribution Guide available on -GitHub](https://github.com/ydataai/ydata-profiling/blob/master/CONTRIBUTING.md). +GitHub](https://github.com/Data-Centric-AI-Community/data-profiling/blob/master/CONTRIBUTING.md). \ No newline at end of file diff --git a/docs/support-contribution/help_troubleshoot.md b/docs/support-contribution/help_troubleshoot.md index 230927d93..897236791 100644 --- a/docs/support-contribution/help_troubleshoot.md +++ b/docs/support-contribution/help_troubleshoot.md @@ -16,9 +16,9 @@ previously identified common issue. ## Reporting a bug To ensure the bug was not already reported by searching on Github under -[Issues](https://github.com/ydataai/ydata-profiling/issues). If you\'re +[Issues](https://github.com/Data-Centric-AI-Community/data-profiling/issues). If you\'re unable to find an open issue addressing the problem, [open a new -one](https://github.com/ydataai/ydata-profiling/issues/new/choose). If +one](https://github.com/Data-Centric-AI-Community/data-profiling/issues/new/choose). If possible, use the relevant bug report templates to create the issue. You should provide the **minimal information to reproduce this bug**. @@ -68,20 +68,20 @@ recommended: ## Using Stack Overflow -Users with a request for help on how to use `ydata-profiling` should +Users with a request for help on how to use `data-profiling` should consider asking their question on Stack Overflow, under the dedicated -`ydata-profiling` tag: +`data-profiling` tag: -[![Questions: Stackoverflow \"ydata-profiling\"](https://img.shields.io/badge/stackoverflow%20tag-ydata%20profiling-yellow)](https://stackoverflow.com/questions/tagged/ydata-profiling) or, -[![Questions: Stackoverflow \"ydata-profiling\"](https://img.shields.io/badge/stackoverflow%20tag-pandas%20profiling-yellow)](https://stackoverflow.com/questions/tagged/pandas-profiling) +[![Questions: Stackoverflow \"data-profiling\"](https://img.shields.io/badge/stackoverflow%20tag-data%20profiling-yellow)](https://stackoverflow.com/questions/tagged/data-profiling) or, +[![Questions: Stackoverflow \"data-profiling\"](https://img.shields.io/badge/stackoverflow%20tag-pandas%20profiling-yellow)](https://stackoverflow.com/questions/tagged/pandas-profiling) -for questions about `ydata-profiling` older versions. +for questions about `data-profiling` older versions. ## :fontawesome-brands-discord: Discord community [Join the Discord community](https://discord.com/invite/mw7xjJ7b7s) to connect with both other users and developers that might be able to -answer your questions. The **#ydata-profiling** and **#need-help** +answer your questions. The **#data-profiling** and **#need-help** channels are recommended for questions and issues. \ No newline at end of file diff --git a/examples/bank_marketing_data/banking_data.py b/examples/bank_marketing_data/banking_data.py index dcd3da1b4..b325793e0 100644 --- a/examples/bank_marketing_data/banking_data.py +++ b/examples/bank_marketing_data/banking_data.py @@ -4,8 +4,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_zipped_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_zipped_file if __name__ == "__main__": file_name = cache_zipped_file( diff --git a/examples/census/census.py b/examples/census/census.py index e77ae6dd5..c5c775a85 100644 --- a/examples/census/census.py +++ b/examples/census/census.py @@ -4,9 +4,9 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.config import Dataset -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.config import Dataset +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/chicago_employees/chicago_employees.py b/examples/chicago_employees/chicago_employees.py index 263a7658e..1aeddb94d 100644 --- a/examples/chicago_employees/chicago_employees.py +++ b/examples/chicago_employees/chicago_employees.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/colors/colors.py b/examples/colors/colors.py index 9dc57e626..4fca3d9e7 100644 --- a/examples/colors/colors.py +++ b/examples/colors/colors.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/features/correlation_auto_example.py b/examples/features/correlation_auto_example.py index c32e0e150..606ed313d 100644 --- a/examples/features/correlation_auto_example.py +++ b/examples/features/correlation_auto_example.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_zipped_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_zipped_file """ The "Auto" correlation is an interpretable pairwise column metric of the following mapping: diff --git a/examples/features/correlation_demo.py b/examples/features/correlation_demo.py index c2a6341d4..933d2e38c 100644 --- a/examples/features/correlation_demo.py +++ b/examples/features/correlation_demo.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_zipped_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_zipped_file """ The "Auto" correlation is an interpretable pairwise column metric of the following mapping: @@ -32,7 +32,7 @@ profile = ProfileReport( df, title="Profile Report of the UCI Bank Marketing Dataset", - config_file="src/ydata_profiling/config_default.yaml", + config_file="src/data_profiling/config_default.yaml", correlations={ "auto": {"n_bins": 8}, }, @@ -57,7 +57,7 @@ no_auto_profile = ProfileReport( df, title="Profile Report of the UCI Bank Marketing Dataset", - config_file="src/ydata_profiling/config_default.yaml", + config_file="src/data_profiling/config_default.yaml", correlations={ "auto": {"calculate": False}, "pearson": {"calculate": True}, diff --git a/examples/features/eda_dataset_compare.py b/examples/features/eda_dataset_compare.py index 64eb50e94..36c63e783 100644 --- a/examples/features/eda_dataset_compare.py +++ b/examples/features/eda_dataset_compare.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": # Read the Titanic Dataset diff --git a/examples/features/images_cats_and_dogs.py b/examples/features/images_cats_and_dogs.py index 0fe7392ea..91ac7edb5 100644 --- a/examples/features/images_cats_and_dogs.py +++ b/examples/features/images_cats_and_dogs.py @@ -1,8 +1,8 @@ import kaggle import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.paths import get_data_path +from data_profiling import ProfileReport +from data_profiling.utils.paths import get_data_path # The dataset in this example is obtained using the `kaggle` api. # If you haven't done so already, you should set up the api credentials: diff --git a/examples/features/images_exif.py b/examples/features/images_exif.py index 323225f5f..58435583f 100644 --- a/examples/features/images_exif.py +++ b/examples/features/images_exif.py @@ -1,8 +1,8 @@ import kaggle import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.paths import get_data_path +from data_profiling import ProfileReport +from data_profiling.utils.paths import get_data_path # The dataset in this example is obtained using the `kaggle` api. # If you haven't done so already, you should set up the api credentials: diff --git a/examples/features/mask_sensitive.py b/examples/features/mask_sensitive.py index 9e517e78d..123d3746d 100644 --- a/examples/features/mask_sensitive.py +++ b/examples/features/mask_sensitive.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file("auto2.dta", "http://www.stata-press.com/data/r15/auto2.dta") diff --git a/examples/features/russian_vocabulary.py b/examples/features/russian_vocabulary.py index 6095f3756..e64790ef0 100644 --- a/examples/features/russian_vocabulary.py +++ b/examples/features/russian_vocabulary.py @@ -2,7 +2,7 @@ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport if __name__ == "__main__": df = pd.read_csv( diff --git a/examples/features/spark_example.py b/examples/features/spark_example.py index 5c6fbf587..898336d00 100644 --- a/examples/features/spark_example.py +++ b/examples/features/spark_example.py @@ -7,8 +7,8 @@ from matplotlib import MatplotlibDeprecationWarning from pyspark.sql import SparkSession -from ydata_profiling import ProfileReport -from ydata_profiling.config import Settings +from data_profiling import ProfileReport +from data_profiling.config import Settings logging.basicConfig(level=logging.INFO) diff --git a/examples/features/theme_flatly_demo.py b/examples/features/theme_flatly_demo.py index 45f5ecefa..9864747ad 100644 --- a/examples/features/theme_flatly_demo.py +++ b/examples/features/theme_flatly_demo.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/features/theme_united_demo.py b/examples/features/theme_united_demo.py index 27d760941..fabff285e 100644 --- a/examples/features/theme_united_demo.py +++ b/examples/features/theme_united_demo.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/features/urls.py b/examples/features/urls.py index 452b86f8a..0da0fd623 100644 --- a/examples/features/urls.py +++ b/examples/features/urls.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/hcc/eda-with-feature-comparison.ipynb b/examples/hcc/eda-with-feature-comparison.ipynb index d61390050..41091349f 100644 --- a/examples/hcc/eda-with-feature-comparison.ipynb +++ b/examples/hcc/eda-with-feature-comparison.ipynb @@ -25,7 +25,7 @@ "source": [ "import pandas as pd\n", "\n", - "from ydata_profiling import ProfileReport" + "from data_profiling import ProfileReport" ] }, { diff --git a/examples/hcc/eda-with-feature-comparison.py b/examples/hcc/eda-with-feature-comparison.py index 666bca9af..c03a73262 100644 --- a/examples/hcc/eda-with-feature-comparison.py +++ b/examples/hcc/eda-with-feature-comparison.py @@ -4,7 +4,7 @@ import pandas as pd from sklearn.impute import SimpleImputer -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport if __name__ == "__main__": diff --git a/examples/integrations/databricks/ydata-profiling in Databricks.ipynb b/examples/integrations/databricks/ydata-profiling in Databricks.ipynb index 2e57f489a..2561a6d44 100644 --- a/examples/integrations/databricks/ydata-profiling in Databricks.ipynb +++ b/examples/integrations/databricks/ydata-profiling in Databricks.ipynb @@ -14,7 +14,7 @@ "source": [ "# Yellow Taxy NYC\n", "\n", - "### Data Profiling in Databricks with ydata-profiling" + "### Data Profiling in Databricks with data-profiling" ] }, { @@ -79,9 +79,9 @@ } }, "source": [ - "## Data profiling with YData Profiling\n", + "## Data profiling with data-profiling\n", "\n", - "pandas-profiling is now ydata-profiling and includes support for Spark dataframes." + "pandas-profiling is now data-profiling and includes support for Spark dataframes." ] }, { @@ -98,7 +98,7 @@ }, "outputs": [], "source": [ - "from ydata_profiling import ProfileReport\n", + "from data_profiling import ProfileReport\n", "\n", "report = ProfileReport(\n", " df,\n", @@ -214,6 +214,9 @@ "notebookName": "YData-profiling in Databricks", "notebookOrigID": 329200988581789, "widgets": {} + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/examples/integrations/great_expectations/great_expectations_example.py b/examples/integrations/great_expectations/great_expectations_example.py index 946f8ab94..a007a5ab8 100644 --- a/examples/integrations/great_expectations/great_expectations_example.py +++ b/examples/integrations/great_expectations/great_expectations_example.py @@ -1,8 +1,8 @@ import great_expectations as ge import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file file_name = cache_file( "titanic.csv", diff --git a/examples/meteorites/meteorites.ipynb b/examples/meteorites/meteorites.ipynb index 70efe52b8..1d848fdd5 100644 --- a/examples/meteorites/meteorites.ipynb +++ b/examples/meteorites/meteorites.ipynb @@ -40,7 +40,7 @@ "source": [ "import sys\n", "\n", - "!{sys.executable} -m pip install -U ydata-profiling[notebook]\n", + "!{sys.executable} -m pip install -U data-profiling[notebook]\n", "!pip install jupyter-contrib-nbextensions\n", "!jupyter nbextension enable --py widgetsnbextension" ] @@ -71,8 +71,8 @@ "import pandas as pd\n", "import requests\n", "\n", - "import ydata_profiling\n", - "from ydata_profiling.utils.cache import cache_file" + "import data_profiling\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/examples/meteorites/meteorites.py b/examples/meteorites/meteorites.py index def3f09d0..05df5e57b 100644 --- a/examples/meteorites/meteorites.py +++ b/examples/meteorites/meteorites.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/meteorites/meteorites_cloud.ipynb b/examples/meteorites/meteorites_cloud.ipynb index d006cc926..2fcdf62f8 100644 --- a/examples/meteorites/meteorites_cloud.ipynb +++ b/examples/meteorites/meteorites_cloud.ipynb @@ -21,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -U ydata-profiling" + "!pip install -U data-profiling" ] }, { @@ -40,8 +40,8 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "import ydata_profiling\n", - "from ydata_profiling.utils.cache import cache_file" + "import data_profiling\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/examples/musical_instrument_reviews/review.py b/examples/musical_instrument_reviews/review.py index 27c481555..1f64ac5c2 100644 --- a/examples/musical_instrument_reviews/review.py +++ b/examples/musical_instrument_reviews/review.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/nza/nza.py b/examples/nza/nza.py index 333355170..206ddb741 100644 --- a/examples/nza/nza.py +++ b/examples/nza/nza.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/rdw/rdw.py b/examples/rdw/rdw.py index aefad76d4..c0fe3ada7 100644 --- a/examples/rdw/rdw.py +++ b/examples/rdw/rdw.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/stata_auto/stata_auto.py b/examples/stata_auto/stata_auto.py index 91b275f4c..e1b28240a 100644 --- a/examples/stata_auto/stata_auto.py +++ b/examples/stata_auto/stata_auto.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file("auto2.dta", "http://www.stata-press.com/data/r15/auto2.dta") diff --git a/examples/titanic/titanic.ipynb b/examples/titanic/titanic.ipynb index 31bee8192..abc2b6cf4 100644 --- a/examples/titanic/titanic.ipynb +++ b/examples/titanic/titanic.ipynb @@ -32,7 +32,7 @@ "source": [ "import sys\n", "\n", - "!\"{sys.executable}\" -m pip install -U ydata-profiling[notebook]\n", + "!\"{sys.executable}\" -m pip install -U data-profiling[notebook]\n", "!pip install jupyter-contrib-nbextensions\n", "!jupyter nbextension enable --py widgetsnbextension" ] @@ -58,8 +58,8 @@ "from ipywidgets import widgets\n", "\n", "# Our package\n", - "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from data_profiling import ProfileReport\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/examples/titanic/titanic.py b/examples/titanic/titanic.py index 560d560d8..d5fa3e756 100644 --- a/examples/titanic/titanic.py +++ b/examples/titanic/titanic.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/examples/titanic/titanic_cloud.ipynb b/examples/titanic/titanic_cloud.ipynb index d06713382..0f773bd01 100644 --- a/examples/titanic/titanic_cloud.ipynb +++ b/examples/titanic/titanic_cloud.ipynb @@ -13,7 +13,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -U ydata-profiling" + "!pip install -U data-profiling" ] }, { @@ -29,8 +29,8 @@ "import pandas as pd\n", "\n", "# Our package\n", - "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from data_profiling import ProfileReport\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/examples/usaairquality/usaairquality.ipynb b/examples/usaairquality/usaairquality.ipynb index 783fa436c..55ede117e 100644 --- a/examples/usaairquality/usaairquality.ipynb +++ b/examples/usaairquality/usaairquality.ipynb @@ -33,7 +33,7 @@ "id": "fd3ef034-d2d1-4e12-9c8d-7cd685bfe001", "metadata": {}, "source": [ - "Make sure that we have the latest version of ydata-profiling." + "Make sure that we have the latest version of data-profiling." ] }, { @@ -46,7 +46,7 @@ "%%capture\n", "import sys\n", "\n", - "!{sys.executable} -m pip install -U ydata-profiling[notebook]\n", + "!{sys.executable} -m pip install -U data-profiling[notebook]\n", "!pip install jupyter-contrib-nbextensions\n", "!jupyter nbextension enable --py widgetsnbextension" ] @@ -76,8 +76,8 @@ "source": [ "import pandas as pd\n", "\n", - "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from data_profiling import ProfileReport\n", + "from data_profiling.utils.cache import cache_file" ] }, { @@ -107,6 +107,14 @@ "df[\"Date Local\"] = pd.to_datetime(df[\"Date Local\"])" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "55c69e09", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "cf4fc84f-6a82-43c7-b17f-24f1a56cb3e1", @@ -120,7 +128,7 @@ "id": "01519464", "metadata": {}, "source": [ - "The support to time series can be enabled by passing the parameter tsmode=True to the ProfileReport when its enabled, ydata-profiling will try to identify time-dependent features using the feature's autocorrelation, which requires a sorted DataFrame or the definition of the `sortby` parameter.\n", + "The support to time series can be enabled by passing the parameter tsmode=True to the ProfileReport when its enabled, data-profiling will try to identify time-dependent features using the feature's autocorrelation, which requires a sorted DataFrame or the definition of the `sortby` parameter.\n", "\n", "When a feature is identified as time series will trigger the following changes:\n", " - the histogram will be replaced by a line plot\n", @@ -145,7 +153,7 @@ "metadata": {}, "outputs": [], "source": [ - "from ydata_profiling.visualisation.plot import timeseries_heatmap\n", + "from data_profiling.visualisation.plot import timeseries_heatmap\n", "\n", "timeseries_heatmap(dataframe=df, entity_column=\"Site Num\", sortby=\"Date Local\")" ] diff --git a/examples/usaairquality/usaairquality.py b/examples/usaairquality/usaairquality.py index be800b774..d4573a03b 100644 --- a/examples/usaairquality/usaairquality.py +++ b/examples/usaairquality/usaairquality.py @@ -3,9 +3,9 @@ """ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file -from ydata_profiling.visualisation.plot import timeseries_heatmap +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file +from data_profiling.visualisation.plot import timeseries_heatmap if __name__ == "__main__": diff --git a/examples/vektis/vektis.py b/examples/vektis/vektis.py index 7180cda4a..64b8a169a 100644 --- a/examples/vektis/vektis.py +++ b/examples/vektis/vektis.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_file if __name__ == "__main__": file_name = cache_file( diff --git a/mkdocs.yml b/mkdocs.yml index 174c7d3c2..cbb1fac55 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,6 @@ site_name: "YData Profiling" -repo_url: https://github.com/ydataai/ydata-profiling -repo_name: ydataai/ydata-profiling +repo_url: https://github.com/Data-Centric-AI-Community/fg-data-profiling +repo_name: Data-Centric-AI-Community/fg-data-profiling dev_addr: 0.0.0.0:1235 site_dir: static/docs nav: diff --git a/pyproject.toml b/pyproject.toml index e46cb237c..b17954c96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,16 @@ [build-system] build-backend = "setuptools.build_meta" requires = [ - "setuptools>=72.0.0,<80.0.0", + "setuptools>=72.0.0,<81.0.0", "setuptools-scm>=8.0.0,<9.0.0", "wheel>=0.38.4,<1.0.0" ] [packaging] -package_name = "ydata-profiling" +package_name = "fg-data-profiling" [project] -name = "ydata-profiling" +name = "fg-data-profiling" requires-python = ">=3.10,<3.14" authors = [ {name = "YData Labs Inc", email = "opensource@ydata.ai"} @@ -133,11 +133,11 @@ unicode= [ [project.urls] Homepage = "https://ydata.ai" -Repository = "https://github.com/ydataai/ydata-profiling" +Repository = "https://github.com/Data-Centric-AI-Community/fg-data-profiling" [project.scripts] -ydata_profiling = "ydata_profiling.controller.console:main" -pandas_profiling = "ydata_profiling.controller.console:main" +data_profiling = "data_profiling.controller.console:main" +pandas_profiling = "data_profiling.controller.console:main" # setuptools relative @@ -145,7 +145,7 @@ pandas_profiling = "ydata_profiling.controller.console:main" include-package-data = true [tool.setuptools.package-data] -ydata_profiling = ["py.typed"] +data_profiling = ["py.typed"] [tool.distutils.bdist_wheel] universal = true diff --git a/setup.py b/setup.py index 4824966a1..45a1f51d9 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ except FileNotFoundError: version = "0.0.dev0" -with open(source_root / "src/ydata_profiling/version.py", "w") as version_file: +with open(source_root / "src/data_profiling/version.py", "w") as version_file: version_file.write(f"__version__ = '{version}'") setup( diff --git a/src/data_profiling/__init__.py b/src/data_profiling/__init__.py new file mode 100644 index 000000000..e5e2886d4 --- /dev/null +++ b/src/data_profiling/__init__.py @@ -0,0 +1,34 @@ +"""Main module of data-profiling. + +.. include:: ../../README.md +""" +# ignore numba warnings +import warnings # isort:skip # noqa + +import importlib.util # isort:skip # noqa +from warnings import warn + +from data_profiling.compare_reports import compare # isort:skip # noqa +from data_profiling.controller import pandas_decorator # isort:skip # noqa +from data_profiling.profile_report import ProfileReport # isort:skip # noqa +from data_profiling.version import __version__ # isort:skip # noqa + +# backend +import data_profiling.model.pandas # isort:skip # noqa + +spec = importlib.util.find_spec("pyspark") +if spec is not None: + import data_profiling.model.spark # isort:skip # noqa + +spec_numba = importlib.util.find_spec("numba") +if spec_numba is not None: + from numba.core.errors import NumbaDeprecationWarning # isort:skip # noqa + + warnings.simplefilter("ignore", category=NumbaDeprecationWarning) + +__all__ = [ + "pandas_decorator", + "ProfileReport", + "__version__", + "compare", +] diff --git a/src/ydata_profiling/compare_reports.py b/src/data_profiling/compare_reports.py similarity index 97% rename from src/ydata_profiling/compare_reports.py rename to src/data_profiling/compare_reports.py index 282312a9f..51ae906dd 100644 --- a/src/ydata_profiling/compare_reports.py +++ b/src/data_profiling/compare_reports.py @@ -6,10 +6,10 @@ import pandas as pd from dacite import from_dict -from ydata_profiling.config import Correlation, Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.model.alerts import Alert -from ydata_profiling.profile_report import ProfileReport +from data_profiling.config import Correlation, Settings +from data_profiling.model import BaseDescription +from data_profiling.model.alerts import Alert +from data_profiling.profile_report import ProfileReport def _should_wrap(v1: Any, v2: Any) -> bool: @@ -281,7 +281,7 @@ def compare( all_configs = [r.config for r in reports] # type: ignore else: configs_str = [ - json.loads(r.package["ydata_profiling_config"]) for r in reports # type: ignore + json.loads(r.package["data_profiling_config"]) for r in reports # type: ignore ] all_configs = [] for c_str in configs_str: diff --git a/src/ydata_profiling/config.py b/src/data_profiling/config.py similarity index 100% rename from src/ydata_profiling/config.py rename to src/data_profiling/config.py diff --git a/src/ydata_profiling/config_default.yaml b/src/data_profiling/config_default.yaml similarity index 100% rename from src/ydata_profiling/config_default.yaml rename to src/data_profiling/config_default.yaml diff --git a/src/ydata_profiling/config_minimal.yaml b/src/data_profiling/config_minimal.yaml similarity index 100% rename from src/ydata_profiling/config_minimal.yaml rename to src/data_profiling/config_minimal.yaml diff --git a/src/ydata_profiling/controller/__init__.py b/src/data_profiling/controller/__init__.py similarity index 100% rename from src/ydata_profiling/controller/__init__.py rename to src/data_profiling/controller/__init__.py diff --git a/src/ydata_profiling/controller/console.py b/src/data_profiling/controller/console.py similarity index 92% rename from src/ydata_profiling/controller/console.py rename to src/data_profiling/controller/console.py index 10344875e..373bf53e5 100644 --- a/src/ydata_profiling/controller/console.py +++ b/src/data_profiling/controller/console.py @@ -3,12 +3,12 @@ from pathlib import Path from typing import Any, List, Optional -from ydata_profiling.__init__ import ProfileReport, __version__ -from ydata_profiling.utils.dataframe import read_pandas +from data_profiling.__init__ import ProfileReport, __version__ +from data_profiling.utils.dataframe import read_pandas def parse_args(args: Optional[List[Any]] = None) -> argparse.Namespace: - """Parse the command line arguments for the `ydata_profiling` binary. + """Parse the command line arguments for the `data_profiling` binary. Args: args: List of input arguments. (Default value=None). @@ -97,7 +97,7 @@ def parse_args(args: Optional[List[Any]] = None) -> argparse.Namespace: def main(args: Optional[List[Any]] = None) -> None: - """Run the `ydata_profiling` package. + """Run the `data_profiling` package. Args: args: Arguments for the programme (Default value=None). diff --git a/src/ydata_profiling/controller/pandas_decorator.py b/src/data_profiling/controller/pandas_decorator.py similarity index 88% rename from src/ydata_profiling/controller/pandas_decorator.py rename to src/data_profiling/controller/pandas_decorator.py index 98c3b51dd..9d56ec1f2 100644 --- a/src/ydata_profiling/controller/pandas_decorator.py +++ b/src/data_profiling/controller/pandas_decorator.py @@ -1,7 +1,7 @@ """This file add the decorator on the DataFrame object.""" from pandas import DataFrame -from ydata_profiling.profile_report import ProfileReport +from data_profiling.profile_report import ProfileReport def profile_report(df: DataFrame, **kwargs) -> ProfileReport: diff --git a/src/ydata_profiling/expectations_report.py b/src/data_profiling/expectations_report.py similarity index 95% rename from src/ydata_profiling/expectations_report.py rename to src/data_profiling/expectations_report.py index a4f32e2a3..1ab455fce 100644 --- a/src/ydata_profiling/expectations_report.py +++ b/src/data_profiling/expectations_report.py @@ -3,10 +3,10 @@ import pandas as pd from visions import VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription, expectation_algorithms -from ydata_profiling.model.handler import Handler -from ydata_profiling.utils.dataframe import slugify +from data_profiling.config import Settings +from data_profiling.model import BaseDescription, expectation_algorithms +from data_profiling.model.handler import Handler +from data_profiling.utils.dataframe import slugify class ExpectationHandler(Handler): diff --git a/src/ydata_profiling/model/__init__.py b/src/data_profiling/model/__init__.py similarity index 67% rename from src/ydata_profiling/model/__init__.py rename to src/data_profiling/model/__init__.py index ece447210..2946e9644 100644 --- a/src/ydata_profiling/model/__init__.py +++ b/src/data_profiling/model/__init__.py @@ -1,4 +1,4 @@ """The model module handles all logic/calculations, e.g. calculate statistics, testing for special conditions.""" -from ydata_profiling.model.description import BaseAnalysis, BaseDescription +from data_profiling.model.description import BaseAnalysis, BaseDescription __all__ = ["BaseAnalysis", "BaseDescription"] diff --git a/src/ydata_profiling/model/alerts.py b/src/data_profiling/model/alerts.py similarity index 99% rename from src/ydata_profiling/model/alerts.py rename to src/data_profiling/model/alerts.py index 1b16d27a0..7a0d7b776 100644 --- a/src/ydata_profiling/model/alerts.py +++ b/src/data_profiling/model/alerts.py @@ -7,9 +7,9 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.correlations import perform_check_correlation -from ydata_profiling.utils.styles import get_alert_styles +from data_profiling.config import Settings +from data_profiling.model.correlations import perform_check_correlation +from data_profiling.utils.styles import get_alert_styles def fmt_percent(value: float, edge_cases: bool = True) -> str: diff --git a/src/ydata_profiling/model/correlations.py b/src/data_profiling/model/correlations.py similarity index 95% rename from src/ydata_profiling/model/correlations.py rename to src/data_profiling/model/correlations.py index 2bbaa1112..f6018ebec 100644 --- a/src/ydata_profiling/model/correlations.py +++ b/src/data_profiling/model/correlations.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings +from data_profiling.config import Settings try: from pandas.core.base import DataError @@ -23,11 +23,11 @@ class CorrelationBackend: def __init__(self, df: Sized): """Determine backend once and store it for all correlation computations.""" if isinstance(df, pd.DataFrame): - from ydata_profiling.model.pandas import ( + from data_profiling.model.pandas import ( correlations_pandas as correlation_backend, # type: ignore ) else: - from ydata_profiling.model.spark import ( + from data_profiling.model.spark import ( correlations_spark as correlation_backend, # type: ignore ) @@ -89,7 +89,7 @@ def warn_correlation(correlation_name: str, error: str) -> None: To hide this warning, disable the calculation (using `df.profile_report(correlations={{\"{correlation_name}\": {{\"calculate\": False}}}})` If this is problematic for your use case, please report this as an issue: -https://github.com/ydataai/ydata-profiling/issues +https://github.com/Data-Centric-AI-Community/data-profiling/issues (include the error message: '{error}')""" ) diff --git a/src/ydata_profiling/model/dataframe.py b/src/data_profiling/model/dataframe.py similarity index 82% rename from src/ydata_profiling/model/dataframe.py rename to src/data_profiling/model/dataframe.py index dd01f8ffe..06ac76ce2 100644 --- a/src/ydata_profiling/model/dataframe.py +++ b/src/data_profiling/model/dataframe.py @@ -3,8 +3,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess +from data_profiling.config import Settings +from data_profiling.model.pandas.dataframe_pandas import pandas_preprocess spec = importlib.util.find_spec("pyspark") if spec is None: @@ -14,7 +14,7 @@ else: from pyspark.sql import DataFrame as sparkDataFrame # type: ignore - from ydata_profiling.model.spark.dataframe_spark import spark_preprocess + from data_profiling.model.spark.dataframe_spark import spark_preprocess def preprocess(config: Settings, df: Any) -> Any: diff --git a/src/ydata_profiling/model/describe.py b/src/data_profiling/model/describe.py similarity index 85% rename from src/ydata_profiling/model/describe.py rename to src/data_profiling/model/describe.py index 74bdf924a..f730fc5d7 100644 --- a/src/ydata_profiling/model/describe.py +++ b/src/data_profiling/model/describe.py @@ -6,25 +6,25 @@ from tqdm.auto import tqdm from visions import VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseAnalysis, BaseDescription -from ydata_profiling.model.alerts import get_alerts -from ydata_profiling.model.correlations import ( +from data_profiling.config import Settings +from data_profiling.model import BaseAnalysis, BaseDescription +from data_profiling.model.alerts import get_alerts +from data_profiling.model.correlations import ( calculate_correlation, get_active_correlations, ) -from ydata_profiling.model.dataframe import preprocess -from ydata_profiling.model.description import TimeIndexAnalysis -from ydata_profiling.model.duplicates import get_duplicates -from ydata_profiling.model.missing import get_missing_active, get_missing_diagram -from ydata_profiling.model.pairwise import get_scatter_plot, get_scatter_tasks -from ydata_profiling.model.sample import get_custom_sample, get_sample -from ydata_profiling.model.summarizer import BaseSummarizer -from ydata_profiling.model.summary import get_series_descriptions -from ydata_profiling.model.table import get_table_stats -from ydata_profiling.model.timeseries_index import get_time_index_description -from ydata_profiling.utils.progress_bar import progress -from ydata_profiling.version import __version__ +from data_profiling.model.dataframe import preprocess +from data_profiling.model.description import TimeIndexAnalysis +from data_profiling.model.duplicates import get_duplicates +from data_profiling.model.missing import get_missing_active, get_missing_diagram +from data_profiling.model.pairwise import get_scatter_plot, get_scatter_tasks +from data_profiling.model.sample import get_custom_sample, get_sample +from data_profiling.model.summarizer import BaseSummarizer +from data_profiling.model.summary import get_series_descriptions +from data_profiling.model.table import get_table_stats +from data_profiling.model.timeseries_index import get_time_index_description +from data_profiling.utils.progress_bar import progress +from data_profiling.version import __version__ def describe( @@ -180,8 +180,8 @@ def describe( pbar.set_postfix_str("Get reproduction details") package = { - "ydata_profiling_version": __version__, - "ydata_profiling_config": config.json(), + "data_profiling_version": __version__, + "data_profiling_config": config.json(), } pbar.update() diff --git a/src/ydata_profiling/model/description.py b/src/data_profiling/model/description.py similarity index 97% rename from src/ydata_profiling/model/description.py rename to src/data_profiling/model/description.py index fd1d22ae6..a028f1f16 100644 --- a/src/ydata_profiling/model/description.py +++ b/src/data_profiling/model/description.py @@ -90,7 +90,7 @@ class BaseDescription: correlations (Dict[str, Any]): Prepare correlation matrix for DataFrame missing (Dict[str, Any]): Describe missing values. alerts (Any): Take alerts from all modules (variables, scatter, correlations), and group them. - package (Dict[str, Any]): Contains version of ydata-profiling and config. + package (Dict[str, Any]): Contains version of data-profiling and config. sample (Any): Sample of data. duplicates (Any): Description of duplicates. """ diff --git a/src/ydata_profiling/model/duplicates.py b/src/data_profiling/model/duplicates.py similarity index 83% rename from src/ydata_profiling/model/duplicates.py rename to src/data_profiling/model/duplicates.py index 49b1176df..992b67f50 100644 --- a/src/ydata_profiling/model/duplicates.py +++ b/src/data_profiling/model/duplicates.py @@ -2,7 +2,7 @@ from multimethod import multimethod -from ydata_profiling.config import Settings +from data_profiling.config import Settings T = TypeVar("T") diff --git a/src/ydata_profiling/model/expectation_algorithms.py b/src/data_profiling/model/expectation_algorithms.py similarity index 100% rename from src/ydata_profiling/model/expectation_algorithms.py rename to src/data_profiling/model/expectation_algorithms.py diff --git a/src/ydata_profiling/model/handler.py b/src/data_profiling/model/handler.py similarity index 93% rename from src/ydata_profiling/model/handler.py rename to src/data_profiling/model/handler.py index 992c1840c..5b6e3cb01 100644 --- a/src/ydata_profiling/model/handler.py +++ b/src/data_profiling/model/handler.py @@ -61,7 +61,7 @@ def handle(self, dtype: str, *args, **kwargs) -> dict: def get_render_map() -> Dict[str, Callable]: - import ydata_profiling.report.structure.variables as render_algorithms + import data_profiling.report.structure.variables as render_algorithms render_map = { "Boolean": render_algorithms.render_boolean, diff --git a/src/ydata_profiling/model/missing.py b/src/data_profiling/model/missing.py similarity index 94% rename from src/ydata_profiling/model/missing.py rename to src/data_profiling/model/missing.py index 46ec2dee3..24921caf5 100644 --- a/src/ydata_profiling/model/missing.py +++ b/src/data_profiling/model/missing.py @@ -4,7 +4,7 @@ import pandas as pd -from ydata_profiling.config import Settings +from data_profiling.config import Settings class MissingDataBackend: @@ -13,9 +13,9 @@ class MissingDataBackend: def __init__(self, df: Sized): """Determine backend once and store it for all missing-data computations.""" if isinstance(df, pd.DataFrame): - self.backend_module = "ydata_profiling.model.pandas.missing_pandas" + self.backend_module = "data_profiling.model.pandas.missing_pandas" else: - self.backend_module = "ydata_profiling.model.spark.missing_spark" + self.backend_module = "data_profiling.model.spark.missing_spark" self.module = importlib.import_module(self.backend_module) @@ -134,7 +134,7 @@ def get_missing_diagram( To hide this warning, disable the calculation (using `df.profile_report(missing_diagrams={{"{settings['name']}": False}}`) If this is problematic for your use case, please report this as an issue: - https://github.com/ydataai/ydata-profiling/issues + https://github.com/Data-Centric-AI-Community/data-profiling/issues (include the error message: '{e}')""" ) return None diff --git a/src/ydata_profiling/model/pairwise.py b/src/data_profiling/model/pairwise.py similarity index 87% rename from src/ydata_profiling/model/pairwise.py rename to src/data_profiling/model/pairwise.py index a224369b4..3dc0e8d1b 100644 --- a/src/ydata_profiling/model/pairwise.py +++ b/src/data_profiling/model/pairwise.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.visualisation.plot import scatter_pairwise +from data_profiling.config import Settings +from data_profiling.visualisation.plot import scatter_pairwise def get_scatter_tasks( diff --git a/src/ydata_profiling/model/pandas/__init__.py b/src/data_profiling/model/pandas/__init__.py similarity index 94% rename from src/ydata_profiling/model/pandas/__init__.py rename to src/data_profiling/model/pandas/__init__.py index 381df8e86..9268346ca 100644 --- a/src/ydata_profiling/model/pandas/__init__.py +++ b/src/data_profiling/model/pandas/__init__.py @@ -24,7 +24,7 @@ # Dynamically import and expose functions from modules for module_name in PANDAS_MODULES: - module = importlib.import_module(f"ydata_profiling.model.pandas.{module_name}") + module = importlib.import_module(f"data_profiling.model.pandas.{module_name}") globals().update( { name: getattr(module, name) diff --git a/src/ydata_profiling/model/pandas/correlations_pandas.py b/src/data_profiling/model/pandas/correlations_pandas.py similarity index 98% rename from src/ydata_profiling/model/pandas/correlations_pandas.py rename to src/data_profiling/model/pandas/correlations_pandas.py index 94eef2f95..890d70af2 100644 --- a/src/ydata_profiling/model/pandas/correlations_pandas.py +++ b/src/data_profiling/model/pandas/correlations_pandas.py @@ -7,8 +7,8 @@ import pandas as pd from scipy import stats -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.discretize_pandas import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.discretize_pandas import ( DiscretizationType, Discretizer, ) diff --git a/src/ydata_profiling/model/pandas/dataframe_pandas.py b/src/data_profiling/model/pandas/dataframe_pandas.py similarity index 85% rename from src/ydata_profiling/model/pandas/dataframe_pandas.py rename to src/data_profiling/model/pandas/dataframe_pandas.py index e98dc7d24..87729d025 100644 --- a/src/ydata_profiling/model/pandas/dataframe_pandas.py +++ b/src/data_profiling/model/pandas/dataframe_pandas.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.utils.dataframe import rename_index +from data_profiling.config import Settings +from data_profiling.utils.dataframe import rename_index def pandas_preprocess(config: Settings, df: pd.DataFrame) -> pd.DataFrame: diff --git a/src/ydata_profiling/model/pandas/describe_boolean_pandas.py b/src/data_profiling/model/pandas/describe_boolean_pandas.py similarity index 85% rename from src/ydata_profiling/model/pandas/describe_boolean_pandas.py rename to src/data_profiling/model/pandas/describe_boolean_pandas.py index 9b2014db7..80cb422d4 100644 --- a/src/ydata_profiling/model/pandas/describe_boolean_pandas.py +++ b/src/data_profiling/model/pandas/describe_boolean_pandas.py @@ -3,9 +3,9 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.imbalance_pandas import column_imbalance_score -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.imbalance_pandas import column_imbalance_score +from data_profiling.model.summary_algorithms import ( describe_boolean_1d, series_hashable, ) diff --git a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py b/src/data_profiling/model/pandas/describe_categorical_pandas.py similarity index 97% rename from src/ydata_profiling/model/pandas/describe_categorical_pandas.py rename to src/data_profiling/model/pandas/describe_categorical_pandas.py index a53f16d91..14337c21c 100644 --- a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py +++ b/src/data_profiling/model/pandas/describe_categorical_pandas.py @@ -6,10 +6,10 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.imbalance_pandas import column_imbalance_score -from ydata_profiling.model.pandas.utils_pandas import weighted_median -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.imbalance_pandas import column_imbalance_score +from data_profiling.model.pandas.utils_pandas import weighted_median +from data_profiling.model.summary_algorithms import ( chi_square, describe_categorical_1d, histogram_compute, diff --git a/src/ydata_profiling/model/pandas/describe_counts_pandas.py b/src/data_profiling/model/pandas/describe_counts_pandas.py similarity index 93% rename from src/ydata_profiling/model/pandas/describe_counts_pandas.py rename to src/data_profiling/model/pandas/describe_counts_pandas.py index 07cdad9d5..a1e1eb422 100644 --- a/src/ydata_profiling/model/pandas/describe_counts_pandas.py +++ b/src/data_profiling/model/pandas/describe_counts_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_counts +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_counts @describe_counts.register diff --git a/src/ydata_profiling/model/pandas/describe_date_pandas.py b/src/data_profiling/model/pandas/describe_date_pandas.py similarity index 92% rename from src/ydata_profiling/model/pandas/describe_date_pandas.py rename to src/data_profiling/model/pandas/describe_date_pandas.py index 72b25a697..669e2e40b 100644 --- a/src/ydata_profiling/model/pandas/describe_date_pandas.py +++ b/src/data_profiling/model/pandas/describe_date_pandas.py @@ -3,15 +3,15 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import ( chi_square, describe_date_1d, histogram_compute, series_handle_nulls, series_hashable, ) -from ydata_profiling.model.typeset_relations import is_pandas_1 +from data_profiling.model.typeset_relations import is_pandas_1 def to_datetime(series: pd.Series) -> pd.Series: diff --git a/src/ydata_profiling/model/pandas/describe_file_pandas.py b/src/data_profiling/model/pandas/describe_file_pandas.py similarity index 91% rename from src/ydata_profiling/model/pandas/describe_file_pandas.py rename to src/data_profiling/model/pandas/describe_file_pandas.py index 84ee3c4ab..0af73a4ce 100644 --- a/src/ydata_profiling/model/pandas/describe_file_pandas.py +++ b/src/data_profiling/model/pandas/describe_file_pandas.py @@ -4,8 +4,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_file_1d, histogram_compute +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_file_1d, histogram_compute def file_summary(series: pd.Series) -> dict: diff --git a/src/ydata_profiling/model/pandas/describe_generic_pandas.py b/src/data_profiling/model/pandas/describe_generic_pandas.py similarity index 88% rename from src/ydata_profiling/model/pandas/describe_generic_pandas.py rename to src/data_profiling/model/pandas/describe_generic_pandas.py index 21b804e66..61ba503fa 100644 --- a/src/ydata_profiling/model/pandas/describe_generic_pandas.py +++ b/src/data_profiling/model/pandas/describe_generic_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_generic +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_generic @describe_generic.register diff --git a/src/ydata_profiling/model/pandas/describe_image_pandas.py b/src/data_profiling/model/pandas/describe_image_pandas.py similarity index 98% rename from src/ydata_profiling/model/pandas/describe_image_pandas.py rename to src/data_profiling/model/pandas/describe_image_pandas.py index 24524d586..76654ef09 100644 --- a/src/ydata_profiling/model/pandas/describe_image_pandas.py +++ b/src/data_profiling/model/pandas/describe_image_pandas.py @@ -7,8 +7,8 @@ import pandas as pd from PIL import ExifTags, Image -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import ( describe_image_1d, named_aggregate_summary, ) diff --git a/src/ydata_profiling/model/pandas/describe_numeric_pandas.py b/src/data_profiling/model/pandas/describe_numeric_pandas.py similarity index 97% rename from src/ydata_profiling/model/pandas/describe_numeric_pandas.py rename to src/data_profiling/model/pandas/describe_numeric_pandas.py index 21eee6c11..6d000dce1 100644 --- a/src/ydata_profiling/model/pandas/describe_numeric_pandas.py +++ b/src/data_profiling/model/pandas/describe_numeric_pandas.py @@ -3,15 +3,15 @@ import numpy as np import pandas as pd -from ydata_profiling.utils.compat import pandas_version_info +from data_profiling.utils.compat import pandas_version_info if pandas_version_info() >= (1, 5): from pandas.core.arrays.integer import IntegerDtype else: from pandas.core.arrays.integer import _IntegerDtype as IntegerDtype -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import ( chi_square, describe_numeric_1d, histogram_compute, diff --git a/src/ydata_profiling/model/pandas/describe_path_pandas.py b/src/data_profiling/model/pandas/describe_path_pandas.py similarity index 94% rename from src/ydata_profiling/model/pandas/describe_path_pandas.py rename to src/data_profiling/model/pandas/describe_path_pandas.py index e3e536f99..ddcd8b7f7 100644 --- a/src/ydata_profiling/model/pandas/describe_path_pandas.py +++ b/src/data_profiling/model/pandas/describe_path_pandas.py @@ -3,8 +3,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_path_1d +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_path_1d def path_summary(series: pd.Series) -> dict: diff --git a/src/ydata_profiling/model/pandas/describe_supported_pandas.py b/src/data_profiling/model/pandas/describe_supported_pandas.py similarity index 89% rename from src/ydata_profiling/model/pandas/describe_supported_pandas.py rename to src/data_profiling/model/pandas/describe_supported_pandas.py index 16bd9ab38..d23847973 100644 --- a/src/ydata_profiling/model/pandas/describe_supported_pandas.py +++ b/src/data_profiling/model/pandas/describe_supported_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_supported, series_hashable +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_supported, series_hashable @describe_supported.register diff --git a/src/ydata_profiling/model/pandas/describe_text_pandas.py b/src/data_profiling/model/pandas/describe_text_pandas.py similarity index 89% rename from src/ydata_profiling/model/pandas/describe_text_pandas.py rename to src/data_profiling/model/pandas/describe_text_pandas.py index d1dc734c4..16146af3e 100644 --- a/src/ydata_profiling/model/pandas/describe_text_pandas.py +++ b/src/data_profiling/model/pandas/describe_text_pandas.py @@ -2,13 +2,13 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.describe_categorical_pandas import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.describe_categorical_pandas import ( length_summary_vc, unicode_summary_vc, word_summary_vc, ) -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.model.summary_algorithms import ( histogram_compute, series_handle_nulls, series_hashable, diff --git a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py b/src/data_profiling/model/pandas/describe_timeseries_pandas.py similarity index 98% rename from src/ydata_profiling/model/pandas/describe_timeseries_pandas.py rename to src/data_profiling/model/pandas/describe_timeseries_pandas.py index 7db4d56f3..545ae6e03 100644 --- a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py +++ b/src/data_profiling/model/pandas/describe_timeseries_pandas.py @@ -6,8 +6,8 @@ from scipy.signal import find_peaks from statsmodels.tsa.stattools import adfuller -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import ( describe_numeric_1d, describe_timeseries_1d, series_handle_nulls, diff --git a/src/ydata_profiling/model/pandas/describe_url_pandas.py b/src/data_profiling/model/pandas/describe_url_pandas.py similarity index 92% rename from src/ydata_profiling/model/pandas/describe_url_pandas.py rename to src/data_profiling/model/pandas/describe_url_pandas.py index bfe5239bf..dedb3e600 100644 --- a/src/ydata_profiling/model/pandas/describe_url_pandas.py +++ b/src/data_profiling/model/pandas/describe_url_pandas.py @@ -3,8 +3,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_url_1d +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_url_1d def url_summary(series: pd.Series) -> dict: diff --git a/src/ydata_profiling/model/pandas/discretize_pandas.py b/src/data_profiling/model/pandas/discretize_pandas.py similarity index 100% rename from src/ydata_profiling/model/pandas/discretize_pandas.py rename to src/data_profiling/model/pandas/discretize_pandas.py diff --git a/src/ydata_profiling/model/pandas/duplicates_pandas.py b/src/data_profiling/model/pandas/duplicates_pandas.py similarity index 94% rename from src/ydata_profiling/model/pandas/duplicates_pandas.py rename to src/data_profiling/model/pandas/duplicates_pandas.py index 2923a643f..97693969c 100644 --- a/src/ydata_profiling/model/pandas/duplicates_pandas.py +++ b/src/data_profiling/model/pandas/duplicates_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.duplicates import get_duplicates +from data_profiling.config import Settings +from data_profiling.model.duplicates import get_duplicates @get_duplicates.register(Settings, pd.DataFrame, Sequence) diff --git a/src/ydata_profiling/model/pandas/imbalance_pandas.py b/src/data_profiling/model/pandas/imbalance_pandas.py similarity index 100% rename from src/ydata_profiling/model/pandas/imbalance_pandas.py rename to src/data_profiling/model/pandas/imbalance_pandas.py diff --git a/src/ydata_profiling/model/pandas/missing_pandas.py b/src/data_profiling/model/pandas/missing_pandas.py similarity index 92% rename from src/ydata_profiling/model/pandas/missing_pandas.py rename to src/data_profiling/model/pandas/missing_pandas.py index 60c57c13d..bf8533aa0 100644 --- a/src/ydata_profiling/model/pandas/missing_pandas.py +++ b/src/data_profiling/model/pandas/missing_pandas.py @@ -1,8 +1,8 @@ import numpy as np import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.visualisation.missing import ( +from data_profiling.config import Settings +from data_profiling.visualisation.missing import ( plot_missing_bar, plot_missing_heatmap, plot_missing_matrix, diff --git a/src/ydata_profiling/model/pandas/sample_pandas.py b/src/data_profiling/model/pandas/sample_pandas.py similarity index 89% rename from src/ydata_profiling/model/pandas/sample_pandas.py rename to src/data_profiling/model/pandas/sample_pandas.py index 10feb8969..66fad2687 100644 --- a/src/ydata_profiling/model/pandas/sample_pandas.py +++ b/src/data_profiling/model/pandas/sample_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.sample import Sample, get_sample +from data_profiling.config import Settings +from data_profiling.model.sample import Sample, get_sample @get_sample.register(Settings, pd.DataFrame) diff --git a/src/ydata_profiling/model/pandas/summary_pandas.py b/src/data_profiling/model/pandas/summary_pandas.py similarity index 93% rename from src/ydata_profiling/model/pandas/summary_pandas.py rename to src/data_profiling/model/pandas/summary_pandas.py index 2103844f8..cce7264aa 100644 --- a/src/ydata_profiling/model/pandas/summary_pandas.py +++ b/src/data_profiling/model/pandas/summary_pandas.py @@ -8,10 +8,10 @@ from tqdm import tqdm from visions import VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model.typeset import ProfilingTypeSet -from ydata_profiling.utils.compat import optional_option_context -from ydata_profiling.utils.dataframe import sort_column_names +from data_profiling.config import Settings +from data_profiling.model.typeset import ProfilingTypeSet +from data_profiling.utils.compat import optional_option_context +from data_profiling.utils.dataframe import sort_column_names BaseSummarizer: Any = "BaseSummarizer" # type: ignore diff --git a/src/ydata_profiling/model/pandas/table_pandas.py b/src/data_profiling/model/pandas/table_pandas.py similarity index 94% rename from src/ydata_profiling/model/pandas/table_pandas.py rename to src/data_profiling/model/pandas/table_pandas.py index a919ee33b..ce663cfe2 100644 --- a/src/ydata_profiling/model/pandas/table_pandas.py +++ b/src/data_profiling/model/pandas/table_pandas.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.model.table import get_table_stats +from data_profiling.config import Settings +from data_profiling.model.table import get_table_stats @get_table_stats.register diff --git a/src/ydata_profiling/model/pandas/timeseries_index_pandas.py b/src/data_profiling/model/pandas/timeseries_index_pandas.py similarity index 86% rename from src/ydata_profiling/model/pandas/timeseries_index_pandas.py rename to src/data_profiling/model/pandas/timeseries_index_pandas.py index 3dd8081b3..42295ecc7 100644 --- a/src/ydata_profiling/model/pandas/timeseries_index_pandas.py +++ b/src/data_profiling/model/pandas/timeseries_index_pandas.py @@ -3,8 +3,8 @@ import pandas as pd from pandas.api.types import is_numeric_dtype -from ydata_profiling.config import Settings -from ydata_profiling.model.timeseries_index import get_time_index_description +from data_profiling.config import Settings +from data_profiling.model.timeseries_index import get_time_index_description @get_time_index_description.register diff --git a/src/ydata_profiling/model/pandas/utils_pandas.py b/src/data_profiling/model/pandas/utils_pandas.py similarity index 100% rename from src/ydata_profiling/model/pandas/utils_pandas.py rename to src/data_profiling/model/pandas/utils_pandas.py diff --git a/src/ydata_profiling/model/sample.py b/src/data_profiling/model/sample.py similarity index 90% rename from src/ydata_profiling/model/sample.py rename to src/data_profiling/model/sample.py index c440bb26e..f99ced909 100644 --- a/src/ydata_profiling/model/sample.py +++ b/src/data_profiling/model/sample.py @@ -3,7 +3,7 @@ from multimethod import multimethod from pydantic.v1 import BaseModel -from ydata_profiling.config import Settings +from data_profiling.config import Settings T = TypeVar("T") # type: ignore diff --git a/src/ydata_profiling/model/spark/__init__.py b/src/data_profiling/model/spark/__init__.py similarity index 93% rename from src/ydata_profiling/model/spark/__init__.py rename to src/data_profiling/model/spark/__init__.py index b71241218..c50184559 100644 --- a/src/ydata_profiling/model/spark/__init__.py +++ b/src/data_profiling/model/spark/__init__.py @@ -22,7 +22,7 @@ # Load modules dynamically for module_name in SPARK_MODULES: - module = importlib.import_module(f"ydata_profiling.model.spark.{module_name}") + module = importlib.import_module(f"data_profiling.model.spark.{module_name}") globals().update( { name: getattr(module, name) diff --git a/src/ydata_profiling/model/spark/correlations_spark.py b/src/data_profiling/model/spark/correlations_spark.py similarity index 99% rename from src/ydata_profiling/model/spark/correlations_spark.py rename to src/data_profiling/model/spark/correlations_spark.py index 441dfbf0f..7cb4bc123 100644 --- a/src/ydata_profiling/model/spark/correlations_spark.py +++ b/src/data_profiling/model/spark/correlations_spark.py @@ -10,7 +10,7 @@ from pyspark.sql.functions import PandasUDFType, lit, pandas_udf from pyspark.sql.types import ArrayType, DoubleType, StructField, StructType -from ydata_profiling.config import Settings +from data_profiling.config import Settings CORRELATION_PEARSON = "pearson" CORRELATION_SPEARMAN = "spearman" diff --git a/src/ydata_profiling/model/spark/dataframe_spark.py b/src/data_profiling/model/spark/dataframe_spark.py similarity index 96% rename from src/ydata_profiling/model/spark/dataframe_spark.py rename to src/data_profiling/model/spark/dataframe_spark.py index 9ec1a5bd3..0b0c1c5c4 100644 --- a/src/ydata_profiling/model/spark/dataframe_spark.py +++ b/src/data_profiling/model/spark/dataframe_spark.py @@ -3,7 +3,7 @@ from pyspark.sql import DataFrame from pyspark.sql.types import MapType -from ydata_profiling.config import Settings +from data_profiling.config import Settings def spark_preprocess(config: Settings, df: DataFrame) -> DataFrame: diff --git a/src/ydata_profiling/model/spark/describe_boolean_spark.py b/src/data_profiling/model/spark/describe_boolean_spark.py similarity index 93% rename from src/ydata_profiling/model/spark/describe_boolean_spark.py rename to src/data_profiling/model/spark/describe_boolean_spark.py index 148dbce6c..fc935d434 100644 --- a/src/ydata_profiling/model/spark/describe_boolean_spark.py +++ b/src/data_profiling/model/spark/describe_boolean_spark.py @@ -2,7 +2,7 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings +from data_profiling.config import Settings def describe_boolean_1d_spark( diff --git a/src/ydata_profiling/model/spark/describe_categorical_spark.py b/src/data_profiling/model/spark/describe_categorical_spark.py similarity index 84% rename from src/ydata_profiling/model/spark/describe_categorical_spark.py rename to src/data_profiling/model/spark/describe_categorical_spark.py index 5afdb475c..5aa3941d2 100644 --- a/src/ydata_profiling/model/spark/describe_categorical_spark.py +++ b/src/data_profiling/model/spark/describe_categorical_spark.py @@ -2,8 +2,8 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_categorical_1d +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_categorical_1d @describe_categorical_1d.register diff --git a/src/ydata_profiling/model/spark/describe_counts_spark.py b/src/data_profiling/model/spark/describe_counts_spark.py similarity index 97% rename from src/ydata_profiling/model/spark/describe_counts_spark.py rename to src/data_profiling/model/spark/describe_counts_spark.py index f02d1043c..af4840d93 100644 --- a/src/ydata_profiling/model/spark/describe_counts_spark.py +++ b/src/data_profiling/model/spark/describe_counts_spark.py @@ -8,8 +8,8 @@ from pyspark.sql import functions as F from pyspark.sql import types as T -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_counts +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_counts @describe_counts.register diff --git a/src/ydata_profiling/model/spark/describe_date_spark.py b/src/data_profiling/model/spark/describe_date_spark.py similarity index 96% rename from src/ydata_profiling/model/spark/describe_date_spark.py rename to src/data_profiling/model/spark/describe_date_spark.py index c44d36650..6fe64f63a 100644 --- a/src/ydata_profiling/model/spark/describe_date_spark.py +++ b/src/data_profiling/model/spark/describe_date_spark.py @@ -4,7 +4,7 @@ from numpy import array from pyspark.sql import DataFrame -from ydata_profiling.config import Settings +from data_profiling.config import Settings def date_stats_spark(df: DataFrame, summary: dict) -> dict: diff --git a/src/ydata_profiling/model/spark/describe_generic_spark.py b/src/data_profiling/model/spark/describe_generic_spark.py similarity index 94% rename from src/ydata_profiling/model/spark/describe_generic_spark.py rename to src/data_profiling/model/spark/describe_generic_spark.py index 1171881cd..c67b9baf9 100644 --- a/src/ydata_profiling/model/spark/describe_generic_spark.py +++ b/src/data_profiling/model/spark/describe_generic_spark.py @@ -2,7 +2,7 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings +from data_profiling.config import Settings def describe_generic_spark( diff --git a/src/ydata_profiling/model/spark/describe_numeric_spark.py b/src/data_profiling/model/spark/describe_numeric_spark.py similarity index 97% rename from src/ydata_profiling/model/spark/describe_numeric_spark.py rename to src/data_profiling/model/spark/describe_numeric_spark.py index 8c299577e..a0d5b262b 100644 --- a/src/ydata_profiling/model/spark/describe_numeric_spark.py +++ b/src/data_profiling/model/spark/describe_numeric_spark.py @@ -4,8 +4,8 @@ import pyspark.sql.functions as F from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import histogram_compute +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import histogram_compute def numeric_stats_spark(df: DataFrame, summary: dict) -> dict: diff --git a/src/ydata_profiling/model/spark/describe_supported_spark.py b/src/data_profiling/model/spark/describe_supported_spark.py similarity index 89% rename from src/ydata_profiling/model/spark/describe_supported_spark.py rename to src/data_profiling/model/spark/describe_supported_spark.py index 7dcaff62c..469128514 100644 --- a/src/ydata_profiling/model/spark/describe_supported_spark.py +++ b/src/data_profiling/model/spark/describe_supported_spark.py @@ -2,8 +2,8 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import describe_supported +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import describe_supported @describe_supported.register diff --git a/src/ydata_profiling/model/spark/describe_text_spark.py b/src/data_profiling/model/spark/describe_text_spark.py similarity index 93% rename from src/ydata_profiling/model/spark/describe_text_spark.py rename to src/data_profiling/model/spark/describe_text_spark.py index 6d7804cf5..79c6010e4 100644 --- a/src/ydata_profiling/model/spark/describe_text_spark.py +++ b/src/data_profiling/model/spark/describe_text_spark.py @@ -2,7 +2,7 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings +from data_profiling.config import Settings def describe_text_1d_spark( diff --git a/src/ydata_profiling/model/spark/duplicates_spark.py b/src/data_profiling/model/spark/duplicates_spark.py similarity index 93% rename from src/ydata_profiling/model/spark/duplicates_spark.py rename to src/data_profiling/model/spark/duplicates_spark.py index 95e32cc30..9123b9edf 100644 --- a/src/ydata_profiling/model/spark/duplicates_spark.py +++ b/src/data_profiling/model/spark/duplicates_spark.py @@ -3,8 +3,8 @@ import pyspark.sql.functions as F from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.duplicates import get_duplicates +from data_profiling.config import Settings +from data_profiling.model.duplicates import get_duplicates @get_duplicates.register diff --git a/src/ydata_profiling/model/spark/missing_spark.py b/src/data_profiling/model/spark/missing_spark.py similarity index 97% rename from src/ydata_profiling/model/spark/missing_spark.py rename to src/data_profiling/model/spark/missing_spark.py index deacf1b89..093fa2358 100644 --- a/src/ydata_profiling/model/spark/missing_spark.py +++ b/src/data_profiling/model/spark/missing_spark.py @@ -3,8 +3,8 @@ import numpy as np from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.visualisation.missing import ( +from data_profiling.config import Settings +from data_profiling.visualisation.missing import ( plot_missing_bar, plot_missing_heatmap, plot_missing_matrix, diff --git a/src/ydata_profiling/model/spark/sample_spark.py b/src/data_profiling/model/spark/sample_spark.py similarity index 90% rename from src/ydata_profiling/model/spark/sample_spark.py rename to src/data_profiling/model/spark/sample_spark.py index 05636b354..00e41962e 100644 --- a/src/ydata_profiling/model/spark/sample_spark.py +++ b/src/data_profiling/model/spark/sample_spark.py @@ -3,8 +3,8 @@ from pyspark.sql.dataframe import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.sample import Sample, get_sample +from data_profiling.config import Settings +from data_profiling.model.sample import Sample, get_sample @get_sample.register diff --git a/src/ydata_profiling/model/spark/summary_spark.py b/src/data_profiling/model/spark/summary_spark.py similarity index 94% rename from src/ydata_profiling/model/spark/summary_spark.py rename to src/data_profiling/model/spark/summary_spark.py index 75e01e240..b1ebef988 100644 --- a/src/ydata_profiling/model/spark/summary_spark.py +++ b/src/data_profiling/model/spark/summary_spark.py @@ -6,9 +6,9 @@ from tqdm import tqdm from visions import VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model.summarizer import BaseSummarizer -from ydata_profiling.utils.dataframe import sort_column_names +from data_profiling.config import Settings +from data_profiling.model.summarizer import BaseSummarizer +from data_profiling.utils.dataframe import sort_column_names def spark_describe_1d( diff --git a/src/ydata_profiling/model/spark/table_spark.py b/src/data_profiling/model/spark/table_spark.py similarity index 94% rename from src/ydata_profiling/model/spark/table_spark.py rename to src/data_profiling/model/spark/table_spark.py index 33e862e61..cbe34aa13 100644 --- a/src/ydata_profiling/model/spark/table_spark.py +++ b/src/data_profiling/model/spark/table_spark.py @@ -2,8 +2,8 @@ from pyspark.sql import DataFrame -from ydata_profiling.config import Settings -from ydata_profiling.model.table import get_table_stats +from data_profiling.config import Settings +from data_profiling.model.table import get_table_stats @get_table_stats.register diff --git a/src/ydata_profiling/model/spark/timeseries_index_spark.py b/src/data_profiling/model/spark/timeseries_index_spark.py similarity index 83% rename from src/ydata_profiling/model/spark/timeseries_index_spark.py rename to src/data_profiling/model/spark/timeseries_index_spark.py index e8145d76c..1cb34e0a6 100644 --- a/src/ydata_profiling/model/spark/timeseries_index_spark.py +++ b/src/data_profiling/model/spark/timeseries_index_spark.py @@ -1,7 +1,7 @@ """Compute statistical description of datasets.""" from pyspark.sql import DataFrame -from ydata_profiling.config import Settings +from data_profiling.config import Settings def spark_get_time_index_description_spark( diff --git a/src/ydata_profiling/model/summarizer.py b/src/data_profiling/model/summarizer.py similarity index 90% rename from src/ydata_profiling/model/summarizer.py rename to src/data_profiling/model/summarizer.py index d733a7d36..40d902bbb 100644 --- a/src/ydata_profiling/model/summarizer.py +++ b/src/data_profiling/model/summarizer.py @@ -7,10 +7,10 @@ import pandas as pd from visions import VisionsBaseType, VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.model.handler import Handler -from ydata_profiling.model.pandas import ( +from data_profiling.config import Settings +from data_profiling.model import BaseDescription +from data_profiling.model.handler import Handler +from data_profiling.model.pandas import ( pandas_describe_boolean_1d, pandas_describe_categorical_1d, pandas_describe_counts, @@ -24,17 +24,17 @@ pandas_describe_timeseries_1d, pandas_describe_url_1d, ) -from ydata_profiling.model.pandas.describe_supported_pandas import ( +from data_profiling.model.pandas.describe_supported_pandas import ( pandas_describe_supported, ) -from ydata_profiling.model.summary_algorithms import ( # Check what is this method used for +from data_profiling.model.summary_algorithms import ( # Check what is this method used for describe_file_1d, describe_image_1d, describe_path_1d, describe_timeseries_1d, describe_url_1d, ) -from ydata_profiling.utils.backend import is_pyspark_installed +from data_profiling.utils.backend import is_pyspark_installed class BaseSummarizer(Handler): @@ -67,7 +67,7 @@ def summary_map(self) -> Dict[str, List[Callable]]: def _create_summary_map(self) -> Dict[str, List[Callable]]: """Creates the summary map for Pandas summarization.""" if self.use_spark: - from ydata_profiling.model.spark import ( + from data_profiling.model.spark import ( describe_boolean_1d_spark, describe_categorical_1d_spark, describe_counts_spark, diff --git a/src/ydata_profiling/model/summary.py b/src/data_profiling/model/summary.py similarity index 87% rename from src/ydata_profiling/model/summary.py rename to src/data_profiling/model/summary.py index 4fa9831a7..c7155ea87 100644 --- a/src/ydata_profiling/model/summary.py +++ b/src/data_profiling/model/summary.py @@ -6,12 +6,12 @@ from tqdm import tqdm from visions import VisionsTypeset -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.summary_pandas import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.summary_pandas import ( pandas_describe_1d, pandas_get_series_descriptions, ) -from ydata_profiling.model.summarizer import BaseSummarizer +from data_profiling.model.summarizer import BaseSummarizer spec = importlib.util.find_spec("pyspark") if spec is None: @@ -22,7 +22,7 @@ else: from pyspark.sql import DataFrame as sparkDataFrame # type: ignore - from ydata_profiling.model.spark.summary_spark import ( # noqa: E402 + from data_profiling.model.spark.summary_spark import ( # noqa: E402 get_series_descriptions_spark, spark_describe_1d, ) diff --git a/src/ydata_profiling/model/summary_algorithms.py b/src/data_profiling/model/summary_algorithms.py similarity index 95% rename from src/ydata_profiling/model/summary_algorithms.py rename to src/data_profiling/model/summary_algorithms.py index 9c3e5ef38..dccd4178c 100644 --- a/src/ydata_profiling/model/summary_algorithms.py +++ b/src/data_profiling/model/summary_algorithms.py @@ -6,7 +6,7 @@ from multimethod import multimethod from scipy.stats import chisquare -from ydata_profiling.config import Settings +from data_profiling.config import Settings T = TypeVar("T") diff --git a/src/ydata_profiling/model/table.py b/src/data_profiling/model/table.py similarity index 80% rename from src/ydata_profiling/model/table.py rename to src/data_profiling/model/table.py index e5eb6fdc2..2373a85ac 100644 --- a/src/ydata_profiling/model/table.py +++ b/src/data_profiling/model/table.py @@ -2,7 +2,7 @@ from multimethod import multimethod -from ydata_profiling.config import Settings +from data_profiling.config import Settings @multimethod diff --git a/src/ydata_profiling/model/timeseries_index.py b/src/data_profiling/model/timeseries_index.py similarity index 85% rename from src/ydata_profiling/model/timeseries_index.py rename to src/data_profiling/model/timeseries_index.py index 261569496..d67d94956 100644 --- a/src/ydata_profiling/model/timeseries_index.py +++ b/src/data_profiling/model/timeseries_index.py @@ -4,7 +4,7 @@ from multimethod import multimethod -from ydata_profiling.config import Settings +from data_profiling.config import Settings @multimethod diff --git a/src/ydata_profiling/model/typeset.py b/src/data_profiling/model/typeset.py similarity index 96% rename from src/ydata_profiling/model/typeset.py rename to src/data_profiling/model/typeset.py index e4e8eeecc..33b8620b4 100644 --- a/src/ydata_profiling/model/typeset.py +++ b/src/data_profiling/model/typeset.py @@ -13,8 +13,8 @@ from visions.backends.pandas.series_utils import series_not_empty from visions.relations import IdentityRelation, InferenceRelation, TypeRelation -from ydata_profiling.config import Settings -from ydata_profiling.model.typeset_relations import ( +from data_profiling.config import Settings +from data_profiling.model.typeset_relations import ( numeric_is_category, series_is_string, string_is_bool, diff --git a/src/ydata_profiling/model/typeset_relations.py b/src/data_profiling/model/typeset_relations.py similarity index 94% rename from src/ydata_profiling/model/typeset_relations.py rename to src/data_profiling/model/typeset_relations.py index beac15b51..ba28e26e3 100644 --- a/src/ydata_profiling/model/typeset_relations.py +++ b/src/data_profiling/model/typeset_relations.py @@ -6,8 +6,8 @@ from pandas.api import types as pdt from visions.backends.pandas.series_utils import series_handle_nulls -from ydata_profiling.config import Settings -from ydata_profiling.utils.versions import is_pandas_1 +from data_profiling.config import Settings +from data_profiling.utils.versions import is_pandas_1 def is_nullable(series: pd.Series, state: dict) -> bool: diff --git a/src/ydata_profiling/profile_report.py b/src/data_profiling/profile_report.py similarity index 92% rename from src/ydata_profiling/profile_report.py rename to src/data_profiling/profile_report.py index a7d6d9134..8d68b8775 100644 --- a/src/ydata_profiling/profile_report.py +++ b/src/data_profiling/profile_report.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Any, Optional, Union -from ydata_profiling.utils.backend import is_pyspark_installed +from data_profiling.utils.backend import is_pyspark_installed with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -25,28 +25,28 @@ from typeguard import typechecked from visions import VisionsTypeset -from ydata_profiling.config import Config, Settings, SparkSettings -from ydata_profiling.expectations_report import ExpectationsReport -from ydata_profiling.model import BaseDescription -from ydata_profiling.model.alerts import AlertType -from ydata_profiling.model.describe import describe as describe_df -from ydata_profiling.model.sample import Sample -from ydata_profiling.model.summarizer import ( +from data_profiling.config import Config, Settings, SparkSettings +from data_profiling.expectations_report import ExpectationsReport +from data_profiling.model import BaseDescription +from data_profiling.model.alerts import AlertType +from data_profiling.model.describe import describe as describe_df +from data_profiling.model.sample import Sample +from data_profiling.model.summarizer import ( BaseSummarizer, ProfilingSummarizer, format_summary, redact_summary, ) -from ydata_profiling.model.typeset import ProfilingTypeSet -from ydata_profiling.report import get_report_structure -from ydata_profiling.report.presentation.core import Root -from ydata_profiling.report.presentation.flavours.html.templates import ( +from data_profiling.model.typeset import ProfilingTypeSet +from data_profiling.report import get_report_structure +from data_profiling.report.presentation.core import Root +from data_profiling.report.presentation.flavours.html.templates import ( create_html_assets, ) -from ydata_profiling.serialize_report import SerializeReport -from ydata_profiling.utils.dataframe import hash_dataframe -from ydata_profiling.utils.logger import ProfilingLogger -from ydata_profiling.utils.paths import get_config +from data_profiling.serialize_report import SerializeReport +from data_profiling.utils.dataframe import hash_dataframe +from data_profiling.utils.logger import ProfilingLogger +from data_profiling.utils.paths import get_config logger = ProfilingLogger(name="ReportLogger") @@ -406,7 +406,7 @@ def to_file(self, output_file: Union[str, Path], silent: bool = True) -> None: webbrowser.open_new_tab(output_file.absolute().as_uri()) def _render_html(self) -> str: - from ydata_profiling.report.presentation.flavours import HTMLReport + from data_profiling.report.presentation.flavours import HTMLReport report = self.report @@ -423,7 +423,7 @@ def _render_html(self) -> str: theme=self.config.html.style.theme, title=self.description_set.analysis.title, date=self.description_set.analysis.date_start, - version=self.description_set.package["ydata_profiling_version"], + version=self.description_set.package["data_profiling_version"], ) if self.config.html.minify_html: @@ -434,7 +434,7 @@ def _render_html(self) -> str: return html def _render_widgets(self) -> Any: - from ydata_profiling.report.presentation.flavours import WidgetReport + from data_profiling.report.presentation.flavours import WidgetReport report = self.report @@ -518,7 +518,7 @@ def to_notebook_iframe(self) -> None: """ from IPython.display import display - from ydata_profiling.report.presentation.flavours.widget.notebook import ( + from data_profiling.report.presentation.flavours.widget.notebook import ( get_notebook_iframe, ) @@ -557,9 +557,9 @@ def compare( """Compare this report with another ProfileReport Alias for: ``` - ydata_profiling.compare([report1, report2], config=config) + data_profiling.compare([report1, report2], config=config) ``` - See `ydata_profiling.compare` for details. + See `data_profiling.compare` for details. Args: other: the ProfileReport to compare to @@ -568,6 +568,6 @@ def compare( Returns: Comparison ProfileReport """ - from ydata_profiling.compare_reports import compare + from data_profiling.compare_reports import compare return compare([self, other], config if config is not None else self.config) diff --git a/src/ydata_profiling/report/__init__.py b/src/data_profiling/report/__init__.py similarity index 58% rename from src/ydata_profiling/report/__init__.py rename to src/data_profiling/report/__init__.py index 3009b083f..c45c76aa5 100644 --- a/src/ydata_profiling/report/__init__.py +++ b/src/data_profiling/report/__init__.py @@ -1,4 +1,4 @@ """All functionality concerned with presentation to the user.""" -from ydata_profiling.report.structure.report import get_report_structure +from data_profiling.report.structure.report import get_report_structure __all__ = ["get_report_structure"] diff --git a/src/ydata_profiling/report/formatters.py b/src/data_profiling/report/formatters.py similarity index 100% rename from src/ydata_profiling/report/formatters.py rename to src/data_profiling/report/formatters.py diff --git a/src/ydata_profiling/report/presentation/__init__.py b/src/data_profiling/report/presentation/__init__.py similarity index 100% rename from src/ydata_profiling/report/presentation/__init__.py rename to src/data_profiling/report/presentation/__init__.py diff --git a/src/data_profiling/report/presentation/core/__init__.py b/src/data_profiling/report/presentation/core/__init__.py new file mode 100644 index 000000000..9e8949954 --- /dev/null +++ b/src/data_profiling/report/presentation/core/__init__.py @@ -0,0 +1,39 @@ +from data_profiling.report.presentation.core.alerts import Alerts +from data_profiling.report.presentation.core.collapse import Collapse +from data_profiling.report.presentation.core.container import Container +from data_profiling.report.presentation.core.correlation_table import CorrelationTable +from data_profiling.report.presentation.core.dropdown import Dropdown +from data_profiling.report.presentation.core.duplicate import Duplicate +from data_profiling.report.presentation.core.frequency_table import FrequencyTable +from data_profiling.report.presentation.core.frequency_table_small import ( + FrequencyTableSmall, +) +from data_profiling.report.presentation.core.html import HTML +from data_profiling.report.presentation.core.image import Image +from data_profiling.report.presentation.core.root import Root +from data_profiling.report.presentation.core.sample import Sample +from data_profiling.report.presentation.core.scores import Scores +from data_profiling.report.presentation.core.table import Table +from data_profiling.report.presentation.core.toggle_button import ToggleButton +from data_profiling.report.presentation.core.variable import Variable +from data_profiling.report.presentation.core.variable_info import VariableInfo + +__all__ = [ + "Collapse", + "Container", + "Duplicate", + "Dropdown", + "FrequencyTable", + "FrequencyTableSmall", + "HTML", + "Image", + "Root", + "Sample", + "Table", + "ToggleButton", + "Variable", + "VariableInfo", + "Alerts", + "CorrelationTable", + "Scores", +] diff --git a/src/ydata_profiling/report/presentation/core/alerts.py b/src/data_profiling/report/presentation/core/alerts.py similarity index 69% rename from src/ydata_profiling/report/presentation/core/alerts.py rename to src/data_profiling/report/presentation/core/alerts.py index 79de56287..6d1ae4cb9 100644 --- a/src/ydata_profiling/report/presentation/core/alerts.py +++ b/src/data_profiling/report/presentation/core/alerts.py @@ -1,8 +1,8 @@ from typing import Any, Dict, List, Union -from ydata_profiling.config import Style -from ydata_profiling.model.alerts import Alert -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.config import Style +from data_profiling.model.alerts import Alert +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Alerts(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/collapse.py b/src/data_profiling/report/presentation/core/collapse.py similarity index 72% rename from src/ydata_profiling/report/presentation/core/collapse.py rename to src/data_profiling/report/presentation/core/collapse.py index a7dba34f1..c8ebd8246 100644 --- a/src/ydata_profiling/report/presentation/core/collapse.py +++ b/src/data_profiling/report/presentation/core/collapse.py @@ -1,8 +1,8 @@ from typing import Any, Callable -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.report.presentation.core.toggle_button import ToggleButton +from data_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.toggle_button import ToggleButton class Collapse(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/container.py b/src/data_profiling/report/presentation/core/container.py similarity index 94% rename from src/ydata_profiling/report/presentation/core/container.py rename to src/data_profiling/report/presentation/core/container.py index c82f06266..01b1cc34c 100644 --- a/src/ydata_profiling/report/presentation/core/container.py +++ b/src/data_profiling/report/presentation/core/container.py @@ -1,6 +1,6 @@ from typing import Any, Callable, Optional, Sequence -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.renderable import Renderable class Container(Renderable): diff --git a/src/ydata_profiling/report/presentation/core/correlation_table.py b/src/data_profiling/report/presentation/core/correlation_table.py similarity index 85% rename from src/ydata_profiling/report/presentation/core/correlation_table.py rename to src/data_profiling/report/presentation/core/correlation_table.py index 174d0e708..c6ad62511 100644 --- a/src/ydata_profiling/report/presentation/core/correlation_table.py +++ b/src/data_profiling/report/presentation/core/correlation_table.py @@ -2,7 +2,7 @@ import pandas as pd -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class CorrelationTable(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/dropdown.py b/src/data_profiling/report/presentation/core/dropdown.py similarity index 80% rename from src/ydata_profiling/report/presentation/core/dropdown.py rename to src/data_profiling/report/presentation/core/dropdown.py index c1c2f274e..7405d6ca2 100644 --- a/src/ydata_profiling/report/presentation/core/dropdown.py +++ b/src/data_profiling/report/presentation/core/dropdown.py @@ -1,8 +1,8 @@ from typing import Any, Callable -from ydata_profiling.report.presentation.core.container import Container -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.container import Container +from data_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.renderable import Renderable class Dropdown(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/duplicate.py b/src/data_profiling/report/presentation/core/duplicate.py similarity index 81% rename from src/ydata_profiling/report/presentation/core/duplicate.py rename to src/data_profiling/report/presentation/core/duplicate.py index 907d19376..d85cb372b 100644 --- a/src/ydata_profiling/report/presentation/core/duplicate.py +++ b/src/data_profiling/report/presentation/core/duplicate.py @@ -2,7 +2,7 @@ import pandas as pd -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Duplicate(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/frequency_table.py b/src/data_profiling/report/presentation/core/frequency_table.py similarity index 80% rename from src/ydata_profiling/report/presentation/core/frequency_table.py rename to src/data_profiling/report/presentation/core/frequency_table.py index 060e82da3..71c70df8f 100644 --- a/src/ydata_profiling/report/presentation/core/frequency_table.py +++ b/src/data_profiling/report/presentation/core/frequency_table.py @@ -1,6 +1,6 @@ from typing import Any -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class FrequencyTable(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/frequency_table_small.py b/src/data_profiling/report/presentation/core/frequency_table_small.py similarity index 82% rename from src/ydata_profiling/report/presentation/core/frequency_table_small.py rename to src/data_profiling/report/presentation/core/frequency_table_small.py index c7036b3b7..966788f35 100644 --- a/src/ydata_profiling/report/presentation/core/frequency_table_small.py +++ b/src/data_profiling/report/presentation/core/frequency_table_small.py @@ -1,6 +1,6 @@ from typing import Any, List -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class FrequencyTableSmall(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/html.py b/src/data_profiling/report/presentation/core/html.py similarity index 77% rename from src/ydata_profiling/report/presentation/core/html.py rename to src/data_profiling/report/presentation/core/html.py index a93e53656..1206ed079 100644 --- a/src/ydata_profiling/report/presentation/core/html.py +++ b/src/data_profiling/report/presentation/core/html.py @@ -1,6 +1,6 @@ from typing import Any -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class HTML(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/image.py b/src/data_profiling/report/presentation/core/image.py similarity index 85% rename from src/ydata_profiling/report/presentation/core/image.py rename to src/data_profiling/report/presentation/core/image.py index 4d991922d..a8acefb18 100644 --- a/src/ydata_profiling/report/presentation/core/image.py +++ b/src/data_profiling/report/presentation/core/image.py @@ -1,7 +1,7 @@ from typing import Any, Optional -from ydata_profiling.config import ImageType -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.config import ImageType +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Image(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/item_renderer.py b/src/data_profiling/report/presentation/core/item_renderer.py similarity index 83% rename from src/ydata_profiling/report/presentation/core/item_renderer.py rename to src/data_profiling/report/presentation/core/item_renderer.py index 6b599a83c..41ae42a62 100644 --- a/src/ydata_profiling/report/presentation/core/item_renderer.py +++ b/src/data_profiling/report/presentation/core/item_renderer.py @@ -1,7 +1,7 @@ from abc import ABC from typing import Optional -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.renderable import Renderable class ItemRenderer(Renderable, ABC): diff --git a/src/ydata_profiling/report/presentation/core/renderable.py b/src/data_profiling/report/presentation/core/renderable.py similarity index 100% rename from src/ydata_profiling/report/presentation/core/renderable.py rename to src/data_profiling/report/presentation/core/renderable.py diff --git a/src/ydata_profiling/report/presentation/core/root.py b/src/data_profiling/report/presentation/core/root.py similarity index 79% rename from src/ydata_profiling/report/presentation/core/root.py rename to src/data_profiling/report/presentation/core/root.py index 0c3f1e3c9..211bcc65f 100644 --- a/src/ydata_profiling/report/presentation/core/root.py +++ b/src/data_profiling/report/presentation/core/root.py @@ -1,8 +1,8 @@ from typing import Any, Callable -from ydata_profiling.config import Style -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.config import Style +from data_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.renderable import Renderable class Root(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/sample.py b/src/data_profiling/report/presentation/core/sample.py similarity index 84% rename from src/ydata_profiling/report/presentation/core/sample.py rename to src/data_profiling/report/presentation/core/sample.py index 6ce6194fd..4eafb4c64 100644 --- a/src/ydata_profiling/report/presentation/core/sample.py +++ b/src/data_profiling/report/presentation/core/sample.py @@ -2,7 +2,7 @@ import pandas as pd -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Sample(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/scores.py b/src/data_profiling/report/presentation/core/scores.py similarity index 84% rename from src/ydata_profiling/report/presentation/core/scores.py rename to src/data_profiling/report/presentation/core/scores.py index 7ff70570a..2a33bb391 100644 --- a/src/ydata_profiling/report/presentation/core/scores.py +++ b/src/data_profiling/report/presentation/core/scores.py @@ -3,8 +3,8 @@ """ from typing import Any, Dict, List, Optional -from ydata_profiling.config import Style -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.config import Style +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Scores(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/table.py b/src/data_profiling/report/presentation/core/table.py similarity index 81% rename from src/ydata_profiling/report/presentation/core/table.py rename to src/data_profiling/report/presentation/core/table.py index 46fa9e1ad..9d2adcb6f 100644 --- a/src/ydata_profiling/report/presentation/core/table.py +++ b/src/data_profiling/report/presentation/core/table.py @@ -1,7 +1,7 @@ from typing import Any, Optional, Sequence -from ydata_profiling.config import Style -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.config import Style +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class Table(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/toggle_button.py b/src/data_profiling/report/presentation/core/toggle_button.py similarity index 79% rename from src/ydata_profiling/report/presentation/core/toggle_button.py rename to src/data_profiling/report/presentation/core/toggle_button.py index c6ce1b861..b4427a850 100644 --- a/src/ydata_profiling/report/presentation/core/toggle_button.py +++ b/src/data_profiling/report/presentation/core/toggle_button.py @@ -1,6 +1,6 @@ from typing import Any -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class ToggleButton(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/variable.py b/src/data_profiling/report/presentation/core/variable.py similarity index 87% rename from src/ydata_profiling/report/presentation/core/variable.py rename to src/data_profiling/report/presentation/core/variable.py index cdf063202..4fab266d0 100644 --- a/src/ydata_profiling/report/presentation/core/variable.py +++ b/src/data_profiling/report/presentation/core/variable.py @@ -1,7 +1,7 @@ from typing import Any, Callable, Optional -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.report.presentation.core.renderable import Renderable class Variable(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/core/variable_info.py b/src/data_profiling/report/presentation/core/variable_info.py similarity index 81% rename from src/ydata_profiling/report/presentation/core/variable_info.py rename to src/data_profiling/report/presentation/core/variable_info.py index 9eaa54a39..83bcf5930 100644 --- a/src/ydata_profiling/report/presentation/core/variable_info.py +++ b/src/data_profiling/report/presentation/core/variable_info.py @@ -1,8 +1,8 @@ from typing import Any, List -from ydata_profiling.config import Style -from ydata_profiling.model.alerts import Alert -from ydata_profiling.report.presentation.core.item_renderer import ItemRenderer +from data_profiling.config import Style +from data_profiling.model.alerts import Alert +from data_profiling.report.presentation.core.item_renderer import ItemRenderer class VariableInfo(ItemRenderer): diff --git a/src/ydata_profiling/report/presentation/flavours/__init__.py b/src/data_profiling/report/presentation/flavours/__init__.py similarity index 56% rename from src/ydata_profiling/report/presentation/flavours/__init__.py rename to src/data_profiling/report/presentation/flavours/__init__.py index 123f22a74..829b7f674 100644 --- a/src/ydata_profiling/report/presentation/flavours/__init__.py +++ b/src/data_profiling/report/presentation/flavours/__init__.py @@ -1,4 +1,4 @@ -from ydata_profiling.report.presentation.flavours.flavours import ( +from data_profiling.report.presentation.flavours.flavours import ( HTMLReport, WidgetReport, ) diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_html.py b/src/data_profiling/report/presentation/flavours/flavour_html.py similarity index 85% rename from src/ydata_profiling/report/presentation/flavours/flavour_html.py rename to src/data_profiling/report/presentation/flavours/flavour_html.py index b342ff32f..0a63da8ac 100644 --- a/src/ydata_profiling/report/presentation/flavours/flavour_html.py +++ b/src/data_profiling/report/presentation/flavours/flavour_html.py @@ -1,7 +1,7 @@ """ HTML flavour mapping """ -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( HTML, Alerts, Collapse, @@ -20,8 +20,8 @@ Variable, VariableInfo, ) -from ydata_profiling.report.presentation.flavours.flavours import register_flavour -from ydata_profiling.report.presentation.flavours.html import ( +from data_profiling.report.presentation.flavours.flavours import register_flavour +from data_profiling.report.presentation.flavours.html import ( HTMLHTML, HTMLAlerts, HTMLCollapse, diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py b/src/data_profiling/report/presentation/flavours/flavour_widget.py similarity index 85% rename from src/ydata_profiling/report/presentation/flavours/flavour_widget.py rename to src/data_profiling/report/presentation/flavours/flavour_widget.py index b95d724f1..d63afb4c7 100644 --- a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py +++ b/src/data_profiling/report/presentation/flavours/flavour_widget.py @@ -1,7 +1,7 @@ """ Flavour widget """ -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( HTML, Alerts, Collapse, @@ -19,8 +19,8 @@ Variable, VariableInfo, ) -from ydata_profiling.report.presentation.flavours.flavours import register_flavour -from ydata_profiling.report.presentation.flavours.widget import ( +from data_profiling.report.presentation.flavours.flavours import register_flavour +from data_profiling.report.presentation.flavours.widget import ( WidgetAlerts, WidgetCollapse, WidgetContainer, diff --git a/src/ydata_profiling/report/presentation/flavours/flavours.py b/src/data_profiling/report/presentation/flavours/flavours.py similarity index 76% rename from src/ydata_profiling/report/presentation/flavours/flavours.py rename to src/data_profiling/report/presentation/flavours/flavours.py index 10a5fa522..6594d0a25 100644 --- a/src/ydata_profiling/report/presentation/flavours/flavours.py +++ b/src/data_profiling/report/presentation/flavours/flavours.py @@ -1,8 +1,8 @@ """ Flavours registry information """ -from ydata_profiling.report.presentation.core import Root -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core import Root +from data_profiling.report.presentation.core.renderable import Renderable _FLAVOUR_REGISTRY: dict = {} @@ -26,7 +26,7 @@ def apply_renderable_mapping( def HTMLReport(structure: Root) -> Root: - from ydata_profiling.report.presentation.flavours import flavour_html # noqa: F401 + from data_profiling.report.presentation.flavours import flavour_html # noqa: F401 mapping = get_flavour_mapping("html") apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport) @@ -34,7 +34,7 @@ def HTMLReport(structure: Root) -> Root: def WidgetReport(structure: Root) -> Root: - from ydata_profiling.report.presentation.flavours import ( # noqa: F401 + from data_profiling.report.presentation.flavours import ( # noqa: F401 flavour_widget, ) diff --git a/src/data_profiling/report/presentation/flavours/html/__init__.py b/src/data_profiling/report/presentation/flavours/html/__init__.py new file mode 100644 index 000000000..44459dc40 --- /dev/null +++ b/src/data_profiling/report/presentation/flavours/html/__init__.py @@ -0,0 +1,47 @@ +from data_profiling.report.presentation.flavours.html.alerts import HTMLAlerts +from data_profiling.report.presentation.flavours.html.collapse import HTMLCollapse +from data_profiling.report.presentation.flavours.html.container import HTMLContainer +from data_profiling.report.presentation.flavours.html.correlation_table import ( + HTMLCorrelationTable, +) +from data_profiling.report.presentation.flavours.html.dropdown import HTMLDropdown +from data_profiling.report.presentation.flavours.html.duplicate import HTMLDuplicate +from data_profiling.report.presentation.flavours.html.frequency_table import ( + HTMLFrequencyTable, +) +from data_profiling.report.presentation.flavours.html.frequency_table_small import ( + HTMLFrequencyTableSmall, +) +from data_profiling.report.presentation.flavours.html.html import HTMLHTML +from data_profiling.report.presentation.flavours.html.image import HTMLImage +from data_profiling.report.presentation.flavours.html.root import HTMLRoot +from data_profiling.report.presentation.flavours.html.sample import HTMLSample +from data_profiling.report.presentation.flavours.html.scores import HTMLScores +from data_profiling.report.presentation.flavours.html.table import HTMLTable +from data_profiling.report.presentation.flavours.html.toggle_button import ( + HTMLToggleButton, +) +from data_profiling.report.presentation.flavours.html.variable import HTMLVariable +from data_profiling.report.presentation.flavours.html.variable_info import ( + HTMLVariableInfo, +) + +__all__ = [ + "HTMLCollapse", + "HTMLContainer", + "HTMLDuplicate", + "HTMLDropdown", + "HTMLFrequencyTable", + "HTMLFrequencyTableSmall", + "HTMLHTML", + "HTMLImage", + "HTMLRoot", + "HTMLSample", + "HTMLTable", + "HTMLToggleButton", + "HTMLVariable", + "HTMLVariableInfo", + "HTMLAlerts", + "HTMLCorrelationTable", + "HTMLScores", +] diff --git a/src/data_profiling/report/presentation/flavours/html/alerts.py b/src/data_profiling/report/presentation/flavours/html/alerts.py new file mode 100644 index 000000000..101fcb5d0 --- /dev/null +++ b/src/data_profiling/report/presentation/flavours/html/alerts.py @@ -0,0 +1,10 @@ +from data_profiling.report.presentation.core.alerts import Alerts +from data_profiling.report.presentation.flavours.html import templates +from data_profiling.utils.styles import get_alert_styles + + +class HTMLAlerts(Alerts): + def render(self) -> str: + styles = get_alert_styles() + + return templates.template("alerts.html").render(**self.content, styles=styles) diff --git a/src/ydata_profiling/report/presentation/flavours/html/collapse.py b/src/data_profiling/report/presentation/flavours/html/collapse.py similarity index 50% rename from src/ydata_profiling/report/presentation/flavours/html/collapse.py rename to src/data_profiling/report/presentation/flavours/html/collapse.py index 0ea5b770b..c52992d4c 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/collapse.py +++ b/src/data_profiling/report/presentation/flavours/html/collapse.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import Collapse -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import Collapse +from data_profiling.report.presentation.flavours.html import templates class HTMLCollapse(Collapse): diff --git a/src/ydata_profiling/report/presentation/flavours/html/container.py b/src/data_profiling/report/presentation/flavours/html/container.py similarity index 94% rename from src/ydata_profiling/report/presentation/flavours/html/container.py rename to src/data_profiling/report/presentation/flavours/html/container.py index 7cfa60896..3c30374be 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/container.py +++ b/src/data_profiling/report/presentation/flavours/html/container.py @@ -1,8 +1,8 @@ """ Container class definition """ -from ydata_profiling.report.presentation.core.container import Container -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.container import Container +from data_profiling.report.presentation.flavours.html import templates class HTMLContainer(Container): diff --git a/src/ydata_profiling/report/presentation/flavours/html/correlation_table.py b/src/data_profiling/report/presentation/flavours/html/correlation_table.py similarity index 72% rename from src/ydata_profiling/report/presentation/flavours/html/correlation_table.py rename to src/data_profiling/report/presentation/flavours/html/correlation_table.py index a08601aaf..cf0834f48 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/correlation_table.py +++ b/src/data_profiling/report/presentation/flavours/html/correlation_table.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core.correlation_table import CorrelationTable -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.correlation_table import CorrelationTable +from data_profiling.report.presentation.flavours.html import templates class HTMLCorrelationTable(CorrelationTable): diff --git a/src/ydata_profiling/report/presentation/flavours/html/dropdown.py b/src/data_profiling/report/presentation/flavours/html/dropdown.py similarity index 50% rename from src/ydata_profiling/report/presentation/flavours/html/dropdown.py rename to src/data_profiling/report/presentation/flavours/html/dropdown.py index 7d2fb0ecb..1536a74fd 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/dropdown.py +++ b/src/data_profiling/report/presentation/flavours/html/dropdown.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import Dropdown -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import Dropdown +from data_profiling.report.presentation.flavours.html import templates class HTMLDropdown(Dropdown): diff --git a/src/ydata_profiling/report/presentation/flavours/html/duplicate.py b/src/data_profiling/report/presentation/flavours/html/duplicate.py similarity index 80% rename from src/ydata_profiling/report/presentation/flavours/html/duplicate.py rename to src/data_profiling/report/presentation/flavours/html/duplicate.py index 721060056..849bb70f3 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/duplicate.py +++ b/src/data_profiling/report/presentation/flavours/html/duplicate.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling.report.presentation.core.duplicate import Duplicate -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.duplicate import Duplicate +from data_profiling.report.presentation.flavours.html import templates def to_html(df: pd.DataFrame) -> str: diff --git a/src/ydata_profiling/report/presentation/flavours/html/frequency_table.py b/src/data_profiling/report/presentation/flavours/html/frequency_table.py similarity index 80% rename from src/ydata_profiling/report/presentation/flavours/html/frequency_table.py rename to src/data_profiling/report/presentation/flavours/html/frequency_table.py index 63b1db0b9..c8682b67b 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/frequency_table.py +++ b/src/data_profiling/report/presentation/flavours/html/frequency_table.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import FrequencyTable -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import FrequencyTable +from data_profiling.report.presentation.flavours.html import templates class HTMLFrequencyTable(FrequencyTable): diff --git a/src/ydata_profiling/report/presentation/flavours/html/frequency_table_small.py b/src/data_profiling/report/presentation/flavours/html/frequency_table_small.py similarity index 72% rename from src/ydata_profiling/report/presentation/flavours/html/frequency_table_small.py rename to src/data_profiling/report/presentation/flavours/html/frequency_table_small.py index a846cf350..44cd66b53 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/frequency_table_small.py +++ b/src/data_profiling/report/presentation/flavours/html/frequency_table_small.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import FrequencyTableSmall -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import FrequencyTableSmall +from data_profiling.report.presentation.flavours.html import templates class HTMLFrequencyTableSmall(FrequencyTableSmall): diff --git a/src/ydata_profiling/report/presentation/flavours/html/html.py b/src/data_profiling/report/presentation/flavours/html/html.py similarity index 60% rename from src/ydata_profiling/report/presentation/flavours/html/html.py rename to src/data_profiling/report/presentation/flavours/html/html.py index eefea5d1a..e9bf54ccc 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/html.py +++ b/src/data_profiling/report/presentation/flavours/html/html.py @@ -1,4 +1,4 @@ -from ydata_profiling.report.presentation.core import HTML +from data_profiling.report.presentation.core import HTML class HTMLHTML(HTML): diff --git a/src/data_profiling/report/presentation/flavours/html/image.py b/src/data_profiling/report/presentation/flavours/html/image.py new file mode 100644 index 000000000..863cfd9e6 --- /dev/null +++ b/src/data_profiling/report/presentation/flavours/html/image.py @@ -0,0 +1,7 @@ +from data_profiling.report.presentation.core import Image +from data_profiling.report.presentation.flavours.html import templates + + +class HTMLImage(Image): + def render(self) -> str: + return templates.template("diagram.html").render(**self.content) diff --git a/src/ydata_profiling/report/presentation/flavours/html/root.py b/src/data_profiling/report/presentation/flavours/html/root.py similarity index 71% rename from src/ydata_profiling/report/presentation/flavours/html/root.py rename to src/data_profiling/report/presentation/flavours/html/root.py index 72ef0dbac..47e05ece9 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/root.py +++ b/src/data_profiling/report/presentation/flavours/html/root.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core.root import Root -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.root import Root +from data_profiling.report.presentation.flavours.html import templates class HTMLRoot(Root): diff --git a/src/ydata_profiling/report/presentation/flavours/html/sample.py b/src/data_profiling/report/presentation/flavours/html/sample.py similarity index 67% rename from src/ydata_profiling/report/presentation/flavours/html/sample.py rename to src/data_profiling/report/presentation/flavours/html/sample.py index 73fd1eca6..7abfba157 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/sample.py +++ b/src/data_profiling/report/presentation/flavours/html/sample.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core.sample import Sample -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.sample import Sample +from data_profiling.report.presentation.flavours.html import templates class HTMLSample(Sample): diff --git a/src/ydata_profiling/report/presentation/flavours/html/scores.py b/src/data_profiling/report/presentation/flavours/html/scores.py similarity index 58% rename from src/ydata_profiling/report/presentation/flavours/html/scores.py rename to src/data_profiling/report/presentation/flavours/html/scores.py index cc8e6f5c4..142c08c45 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/scores.py +++ b/src/data_profiling/report/presentation/flavours/html/scores.py @@ -1,8 +1,8 @@ """ Scores HTML renderer class """ -from ydata_profiling.report.presentation.core.scores import Scores -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core.scores import Scores +from data_profiling.report.presentation.flavours.html import templates # create the logic for this one diff --git a/src/data_profiling/report/presentation/flavours/html/table.py b/src/data_profiling/report/presentation/flavours/html/table.py new file mode 100644 index 000000000..6a5b144f4 --- /dev/null +++ b/src/data_profiling/report/presentation/flavours/html/table.py @@ -0,0 +1,7 @@ +from data_profiling.report.presentation.core.table import Table +from data_profiling.report.presentation.flavours.html import templates + + +class HTMLTable(Table): + def render(self) -> str: + return templates.template("table.html").render(**self.content) diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates.py b/src/data_profiling/report/presentation/flavours/html/templates.py similarity index 91% rename from src/ydata_profiling/report/presentation/flavours/html/templates.py rename to src/data_profiling/report/presentation/flavours/html/templates.py index 85e24a46a..b1ba21cf8 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates.py +++ b/src/data_profiling/report/presentation/flavours/html/templates.py @@ -4,12 +4,12 @@ import jinja2 -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_badge, fmt_numeric, fmt_percent +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_badge, fmt_numeric, fmt_percent # Initializing Jinja package_loader = jinja2.PackageLoader( - "ydata_profiling", "report/presentation/flavours/html/templates" + "data_profiling", "report/presentation/flavours/html/templates" ) jinja2_env = jinja2.Environment( lstrip_blocks=True, trim_blocks=True, loader=package_loader diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant_length.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_constant_length.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant_length.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_constant_length.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_dirty_category.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_dirty_category.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_dirty_category.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_dirty_category.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_duplicates.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_duplicates.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_duplicates.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_duplicates.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_empty.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_empty.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_empty.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_empty.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_high_cardinality.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_high_cardinality.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_high_cardinality.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_high_cardinality.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_high_correlation.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_high_correlation.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_high_correlation.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_high_correlation.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_imbalance.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_imbalance.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_imbalance.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_imbalance.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_infinite.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_infinite.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_infinite.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_infinite.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_missing.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_missing.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_missing.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_missing.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_near_duplicates.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_near_duplicates.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_near_duplicates.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_near_duplicates.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_non_stationary.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_non_stationary.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_non_stationary.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_non_stationary.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_seasonal.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_seasonal.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_seasonal.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_seasonal.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_skewed.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_skewed.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_skewed.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_skewed.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_truncated.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_truncated.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_truncated.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_truncated.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_type_date.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_type_date.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_type_date.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_type_date.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_uniform.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_uniform.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_uniform.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_uniform.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_unique.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_unique.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_unique.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_unique.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_unsupported.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_unsupported.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_unsupported.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_unsupported.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_zeros.html b/src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_zeros.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_zeros.html rename to src/data_profiling/report/presentation/flavours/html/templates/alerts/alert_zeros.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/collapse.html b/src/data_profiling/report/presentation/flavours/html/templates/collapse.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/collapse.html rename to src/data_profiling/report/presentation/flavours/html/templates/collapse.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/correlation_table.html b/src/data_profiling/report/presentation/flavours/html/templates/correlation_table.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/correlation_table.html rename to src/data_profiling/report/presentation/flavours/html/templates/correlation_table.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/diagram.html b/src/data_profiling/report/presentation/flavours/html/templates/diagram.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/diagram.html rename to src/data_profiling/report/presentation/flavours/html/templates/diagram.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/dropdown.html b/src/data_profiling/report/presentation/flavours/html/templates/dropdown.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/dropdown.html rename to src/data_profiling/report/presentation/flavours/html/templates/dropdown.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/duplicate.html b/src/data_profiling/report/presentation/flavours/html/templates/duplicate.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/duplicate.html rename to src/data_profiling/report/presentation/flavours/html/templates/duplicate.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/frequency_table.html b/src/data_profiling/report/presentation/flavours/html/templates/frequency_table.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/frequency_table.html rename to src/data_profiling/report/presentation/flavours/html/templates/frequency_table.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/frequency_table_small.html b/src/data_profiling/report/presentation/flavours/html/templates/frequency_table_small.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/frequency_table_small.html rename to src/data_profiling/report/presentation/flavours/html/templates/frequency_table_small.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/report.html b/src/data_profiling/report/presentation/flavours/html/templates/report.html similarity index 90% rename from src/ydata_profiling/report/presentation/flavours/html/templates/report.html rename to src/data_profiling/report/presentation/flavours/html/templates/report.html index e3e44604a..039497ede 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates/report.html +++ b/src/data_profiling/report/presentation/flavours/html/templates/report.html @@ -6,7 +6,7 @@ - + {{ title | striptags }} diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sample.html b/src/data_profiling/report/presentation/flavours/html/templates/sample.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sample.html rename to src/data_profiling/report/presentation/flavours/html/templates/sample.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/scores.html b/src/data_profiling/report/presentation/flavours/html/templates/scores.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/scores.html rename to src/data_profiling/report/presentation/flavours/html/templates/scores.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/batch_grid.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/batch_grid.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/batch_grid.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/batch_grid.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/grid.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/grid.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/grid.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/grid.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/list.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/list.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/list.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/list.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/named_list.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/named_list.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/named_list.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/named_list.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/overview_tabs.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/overview_tabs.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/overview_tabs.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/overview_tabs.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/scores.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/scores.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/scores.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/scores.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/sections.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/sections.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/sections.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/select.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/select.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/select.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/select.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/sequence/tabs.html b/src/data_profiling/report/presentation/flavours/html/templates/sequence/tabs.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/sequence/tabs.html rename to src/data_profiling/report/presentation/flavours/html/templates/sequence/tabs.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/table.html b/src/data_profiling/report/presentation/flavours/html/templates/table.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/table.html rename to src/data_profiling/report/presentation/flavours/html/templates/table.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/toggle_button.html b/src/data_profiling/report/presentation/flavours/html/templates/toggle_button.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/toggle_button.html rename to src/data_profiling/report/presentation/flavours/html/templates/toggle_button.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/variable.html b/src/data_profiling/report/presentation/flavours/html/templates/variable.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/variable.html rename to src/data_profiling/report/presentation/flavours/html/templates/variable.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/variable_info.html b/src/data_profiling/report/presentation/flavours/html/templates/variable_info.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/variable_info.html rename to src/data_profiling/report/presentation/flavours/html/templates/variable_info.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.bundle.min.js b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.bundle.min.js similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.bundle.min.js rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.bundle.min.js diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.min.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.min.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.min.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/bootstrap.min.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/cosmo.bootstrap.min.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/cosmo.bootstrap.min.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/cosmo.bootstrap.min.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/cosmo.bootstrap.min.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/flatly.bootstrap.min.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/flatly.bootstrap.min.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/flatly.bootstrap.min.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/flatly.bootstrap.min.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/script.js b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/script.js similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/script.js rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/script.js diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/simplex.bootstrap.min.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/simplex.bootstrap.min.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/simplex.bootstrap.min.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/simplex.bootstrap.min.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/style.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/united.bootstrap.min.css b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/united.bootstrap.min.css similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/assets/united.bootstrap.min.css rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/assets/united.bootstrap.min.css diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/footer.html b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/footer.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/footer.html rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/footer.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/javascript.html b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/javascript.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/javascript.html rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/javascript.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/navigation.html b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/navigation.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/navigation.html rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/navigation.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/style.html b/src/data_profiling/report/presentation/flavours/html/templates/wrapper/style.html similarity index 100% rename from src/ydata_profiling/report/presentation/flavours/html/templates/wrapper/style.html rename to src/data_profiling/report/presentation/flavours/html/templates/wrapper/style.html diff --git a/src/ydata_profiling/report/presentation/flavours/html/toggle_button.py b/src/data_profiling/report/presentation/flavours/html/toggle_button.py similarity index 51% rename from src/ydata_profiling/report/presentation/flavours/html/toggle_button.py rename to src/data_profiling/report/presentation/flavours/html/toggle_button.py index f14d17f0e..b7e829368 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/toggle_button.py +++ b/src/data_profiling/report/presentation/flavours/html/toggle_button.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import ToggleButton -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import ToggleButton +from data_profiling.report.presentation.flavours.html import templates class HTMLToggleButton(ToggleButton): diff --git a/src/ydata_profiling/report/presentation/flavours/html/variable.py b/src/data_profiling/report/presentation/flavours/html/variable.py similarity index 50% rename from src/ydata_profiling/report/presentation/flavours/html/variable.py rename to src/data_profiling/report/presentation/flavours/html/variable.py index d2ba51b63..3fd85081e 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/variable.py +++ b/src/data_profiling/report/presentation/flavours/html/variable.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import Variable -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import Variable +from data_profiling.report.presentation.flavours.html import templates class HTMLVariable(Variable): diff --git a/src/ydata_profiling/report/presentation/flavours/html/variable_info.py b/src/data_profiling/report/presentation/flavours/html/variable_info.py similarity index 51% rename from src/ydata_profiling/report/presentation/flavours/html/variable_info.py rename to src/data_profiling/report/presentation/flavours/html/variable_info.py index 3885243ed..f44217251 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/variable_info.py +++ b/src/data_profiling/report/presentation/flavours/html/variable_info.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.core import VariableInfo -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import VariableInfo +from data_profiling.report.presentation.flavours.html import templates class HTMLVariableInfo(VariableInfo): diff --git a/src/data_profiling/report/presentation/flavours/widget/__init__.py b/src/data_profiling/report/presentation/flavours/widget/__init__.py new file mode 100644 index 000000000..91d47bef6 --- /dev/null +++ b/src/data_profiling/report/presentation/flavours/widget/__init__.py @@ -0,0 +1,49 @@ +from data_profiling.report.presentation.flavours.widget.alerts import WidgetAlerts +from data_profiling.report.presentation.flavours.widget.collapse import WidgetCollapse +from data_profiling.report.presentation.flavours.widget.container import ( + WidgetContainer, +) +from data_profiling.report.presentation.flavours.widget.correlation_table import ( + WidgetCorrelationTable, +) +from data_profiling.report.presentation.flavours.widget.dropdown import WidgetDropdown +from data_profiling.report.presentation.flavours.widget.duplicate import ( + WidgetDuplicate, +) +from data_profiling.report.presentation.flavours.widget.frequency_table import ( + WidgetFrequencyTable, +) +from data_profiling.report.presentation.flavours.widget.frequency_table_small import ( + WidgetFrequencyTableSmall, +) +from data_profiling.report.presentation.flavours.widget.html import WidgetHTML +from data_profiling.report.presentation.flavours.widget.image import WidgetImage +from data_profiling.report.presentation.flavours.widget.root import WidgetRoot +from data_profiling.report.presentation.flavours.widget.sample import WidgetSample +from data_profiling.report.presentation.flavours.widget.table import WidgetTable +from data_profiling.report.presentation.flavours.widget.toggle_button import ( + WidgetToggleButton, +) +from data_profiling.report.presentation.flavours.widget.variable import WidgetVariable +from data_profiling.report.presentation.flavours.widget.variable_info import ( + WidgetVariableInfo, +) + +__all__ = [ + "WidgetCollapse", + "WidgetContainer", + "WidgetDuplicate", + "WidgetDropdown", + "WidgetFrequencyTable", + "WidgetFrequencyTableSmall", + "WidgetHTML", + "WidgetImage", + "WidgetRoot", + "WidgetSample", + "WidgetTable", + "WidgetToggleButton", + "WidgetVariable", + "WidgetVariableInfo", + "WidgetAlerts", + "WidgetCorrelationTable", +] diff --git a/src/ydata_profiling/report/presentation/flavours/widget/alerts.py b/src/data_profiling/report/presentation/flavours/widget/alerts.py similarity index 86% rename from src/ydata_profiling/report/presentation/flavours/widget/alerts.py rename to src/data_profiling/report/presentation/flavours/widget/alerts.py index 6032ea8a5..40e95c422 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/alerts.py +++ b/src/data_profiling/report/presentation/flavours/widget/alerts.py @@ -2,9 +2,9 @@ from ipywidgets import HTML, Button, widgets -from ydata_profiling.report.presentation.core import Alerts -from ydata_profiling.report.presentation.flavours.html import templates -from ydata_profiling.utils.styles import get_alert_styles +from data_profiling.report.presentation.core import Alerts +from data_profiling.report.presentation.flavours.html import templates +from data_profiling.utils.styles import get_alert_styles def get_row(items: List[widgets.Widget]) -> widgets.GridBox: diff --git a/src/ydata_profiling/report/presentation/flavours/widget/collapse.py b/src/data_profiling/report/presentation/flavours/widget/collapse.py similarity index 95% rename from src/ydata_profiling/report/presentation/flavours/widget/collapse.py rename to src/data_profiling/report/presentation/flavours/widget/collapse.py index 2c580a27d..f7fb6683d 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/collapse.py +++ b/src/data_profiling/report/presentation/flavours/widget/collapse.py @@ -1,6 +1,6 @@ from ipywidgets import Box, widgets -from ydata_profiling.report.presentation.core import Collapse +from data_profiling.report.presentation.core import Collapse class WidgetCollapse(Collapse): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/container.py b/src/data_profiling/report/presentation/flavours/widget/container.py similarity index 96% rename from src/ydata_profiling/report/presentation/flavours/widget/container.py rename to src/data_profiling/report/presentation/flavours/widget/container.py index 854f467a5..9f4b1ecc4 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/container.py +++ b/src/data_profiling/report/presentation/flavours/widget/container.py @@ -2,8 +2,8 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core.container import Container -from ydata_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.container import Container +from data_profiling.report.presentation.core.renderable import Renderable def get_name(item: Renderable) -> str: diff --git a/src/ydata_profiling/report/presentation/flavours/widget/correlation_table.py b/src/data_profiling/report/presentation/flavours/widget/correlation_table.py similarity index 80% rename from src/ydata_profiling/report/presentation/flavours/widget/correlation_table.py rename to src/data_profiling/report/presentation/flavours/widget/correlation_table.py index 86a1ab68a..804a274ae 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/correlation_table.py +++ b/src/data_profiling/report/presentation/flavours/widget/correlation_table.py @@ -1,7 +1,7 @@ from IPython.display import display from ipywidgets import Output, widgets -from ydata_profiling.report.presentation.core.correlation_table import CorrelationTable +from data_profiling.report.presentation.core.correlation_table import CorrelationTable class WidgetCorrelationTable(CorrelationTable): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/dropdown.py b/src/data_profiling/report/presentation/flavours/widget/dropdown.py similarity index 93% rename from src/ydata_profiling/report/presentation/flavours/widget/dropdown.py rename to src/data_profiling/report/presentation/flavours/widget/dropdown.py index e17653674..651a56079 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/dropdown.py +++ b/src/data_profiling/report/presentation/flavours/widget/dropdown.py @@ -1,6 +1,6 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core import Dropdown +from data_profiling.report.presentation.core import Dropdown class WidgetDropdown(Dropdown): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/duplicate.py b/src/data_profiling/report/presentation/flavours/widget/duplicate.py similarity index 82% rename from src/ydata_profiling/report/presentation/flavours/widget/duplicate.py rename to src/data_profiling/report/presentation/flavours/widget/duplicate.py index a12281faf..75754500c 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/duplicate.py +++ b/src/data_profiling/report/presentation/flavours/widget/duplicate.py @@ -1,7 +1,7 @@ from IPython.display import display from ipywidgets import Output, widgets -from ydata_profiling.report.presentation.core.duplicate import Duplicate +from data_profiling.report.presentation.core.duplicate import Duplicate class WidgetDuplicate(Duplicate): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/frequency_table.py b/src/data_profiling/report/presentation/flavours/widget/frequency_table.py similarity index 95% rename from src/ydata_profiling/report/presentation/flavours/widget/frequency_table.py rename to src/data_profiling/report/presentation/flavours/widget/frequency_table.py index 2e60c05a0..c0db0aa36 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/frequency_table.py +++ b/src/data_profiling/report/presentation/flavours/widget/frequency_table.py @@ -2,7 +2,7 @@ from ipywidgets import GridspecLayout, VBox, widgets -from ydata_profiling.report.presentation.core.frequency_table import FrequencyTable +from data_profiling.report.presentation.core.frequency_table import FrequencyTable def get_table( diff --git a/src/ydata_profiling/report/presentation/flavours/widget/frequency_table_small.py b/src/data_profiling/report/presentation/flavours/widget/frequency_table_small.py similarity index 96% rename from src/ydata_profiling/report/presentation/flavours/widget/frequency_table_small.py rename to src/data_profiling/report/presentation/flavours/widget/frequency_table_small.py index 2cd202055..1dabb4c29 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/frequency_table_small.py +++ b/src/data_profiling/report/presentation/flavours/widget/frequency_table_small.py @@ -2,7 +2,7 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core.frequency_table_small import ( +from data_profiling.report.presentation.core.frequency_table_small import ( FrequencyTableSmall, ) diff --git a/src/ydata_profiling/report/presentation/flavours/widget/html.py b/src/data_profiling/report/presentation/flavours/widget/html.py similarity index 79% rename from src/ydata_profiling/report/presentation/flavours/widget/html.py rename to src/data_profiling/report/presentation/flavours/widget/html.py index 3b6d69c91..12e34a0f9 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/html.py +++ b/src/data_profiling/report/presentation/flavours/widget/html.py @@ -1,6 +1,6 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core.html import HTML +from data_profiling.report.presentation.core.html import HTML class WidgetHTML(HTML): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/image.py b/src/data_profiling/report/presentation/flavours/widget/image.py similarity index 87% rename from src/ydata_profiling/report/presentation/flavours/widget/image.py rename to src/data_profiling/report/presentation/flavours/widget/image.py index 282500a9a..71cf07251 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/image.py +++ b/src/data_profiling/report/presentation/flavours/widget/image.py @@ -2,8 +2,8 @@ from ipywidgets import widgets -from ydata_profiling.config import ImageType -from ydata_profiling.report.presentation.core.image import Image +from data_profiling.config import ImageType +from data_profiling.report.presentation.core.image import Image class WidgetImage(Image): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/notebook.py b/src/data_profiling/report/presentation/flavours/widget/notebook.py similarity index 95% rename from src/ydata_profiling/report/presentation/flavours/widget/notebook.py rename to src/data_profiling/report/presentation/flavours/widget/notebook.py index 90e7fe6bf..1ee97936e 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/notebook.py +++ b/src/data_profiling/report/presentation/flavours/widget/notebook.py @@ -8,8 +8,8 @@ from IPython.display import HTML from IPython.lib.display import IFrame -from ydata_profiling import ProfileReport -from ydata_profiling.config import IframeAttribute, Settings +from data_profiling import ProfileReport +from data_profiling.config import IframeAttribute, Settings def get_notebook_iframe_srcdoc(config: Settings, profile: ProfileReport) -> "HTML": diff --git a/src/ydata_profiling/report/presentation/flavours/widget/root.py b/src/data_profiling/report/presentation/flavours/widget/root.py similarity index 77% rename from src/ydata_profiling/report/presentation/flavours/widget/root.py rename to src/data_profiling/report/presentation/flavours/widget/root.py index 46656ac5d..3c3df4327 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/root.py +++ b/src/data_profiling/report/presentation/flavours/widget/root.py @@ -1,6 +1,6 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core.root import Root +from data_profiling.report.presentation.core.root import Root class WidgetRoot(Root): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/sample.py b/src/data_profiling/report/presentation/flavours/widget/sample.py similarity index 83% rename from src/ydata_profiling/report/presentation/flavours/widget/sample.py rename to src/data_profiling/report/presentation/flavours/widget/sample.py index 24f9a3647..1238e604f 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/sample.py +++ b/src/data_profiling/report/presentation/flavours/widget/sample.py @@ -1,7 +1,7 @@ from IPython.display import display from ipywidgets import Output, widgets -from ydata_profiling.report.presentation.core.sample import Sample +from data_profiling.report.presentation.core.sample import Sample class WidgetSample(Sample): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/table.py b/src/data_profiling/report/presentation/flavours/widget/table.py similarity index 87% rename from src/ydata_profiling/report/presentation/flavours/widget/table.py rename to src/data_profiling/report/presentation/flavours/widget/table.py index 07a97e3eb..c933856a6 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/table.py +++ b/src/data_profiling/report/presentation/flavours/widget/table.py @@ -2,8 +2,8 @@ from ipywidgets import GridspecLayout, VBox, widgets -from ydata_profiling.report.formatters import fmt_color -from ydata_profiling.report.presentation.core.table import Table +from data_profiling.report.formatters import fmt_color +from data_profiling.report.presentation.core.table import Table def get_table(items: List[Dict[str, Any]]) -> GridspecLayout: diff --git a/src/ydata_profiling/report/presentation/flavours/widget/toggle_button.py b/src/data_profiling/report/presentation/flavours/widget/toggle_button.py similarity index 87% rename from src/ydata_profiling/report/presentation/flavours/widget/toggle_button.py rename to src/data_profiling/report/presentation/flavours/widget/toggle_button.py index 498e417d2..2c924ccad 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/toggle_button.py +++ b/src/data_profiling/report/presentation/flavours/widget/toggle_button.py @@ -1,6 +1,6 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core import ToggleButton +from data_profiling.report.presentation.core import ToggleButton class WidgetToggleButton(ToggleButton): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/variable.py b/src/data_profiling/report/presentation/flavours/widget/variable.py similarity index 82% rename from src/ydata_profiling/report/presentation/flavours/widget/variable.py rename to src/data_profiling/report/presentation/flavours/widget/variable.py index c309f17b3..6edd05f30 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/variable.py +++ b/src/data_profiling/report/presentation/flavours/widget/variable.py @@ -1,6 +1,6 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core import Variable +from data_profiling.report.presentation.core import Variable class WidgetVariable(Variable): diff --git a/src/ydata_profiling/report/presentation/flavours/widget/variable_info.py b/src/data_profiling/report/presentation/flavours/widget/variable_info.py similarity index 62% rename from src/ydata_profiling/report/presentation/flavours/widget/variable_info.py rename to src/data_profiling/report/presentation/flavours/widget/variable_info.py index c186a9972..553de8717 100644 --- a/src/ydata_profiling/report/presentation/flavours/widget/variable_info.py +++ b/src/data_profiling/report/presentation/flavours/widget/variable_info.py @@ -1,7 +1,7 @@ from ipywidgets import widgets -from ydata_profiling.report.presentation.core import VariableInfo -from ydata_profiling.report.presentation.flavours.html import templates +from data_profiling.report.presentation.core import VariableInfo +from data_profiling.report.presentation.flavours.html import templates class WidgetVariableInfo(VariableInfo): diff --git a/src/ydata_profiling/report/presentation/frequency_table_utils.py b/src/data_profiling/report/presentation/frequency_table_utils.py similarity index 100% rename from src/ydata_profiling/report/presentation/frequency_table_utils.py rename to src/data_profiling/report/presentation/frequency_table_utils.py diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/data_profiling/report/structure/__init__.py similarity index 100% rename from src/ydata_profiling/report/structure/__init__.py rename to src/data_profiling/report/structure/__init__.py diff --git a/src/ydata_profiling/report/structure/correlations.py b/src/data_profiling/report/structure/correlations.py similarity index 92% rename from src/ydata_profiling/report/structure/correlations.py rename to src/data_profiling/report/structure/correlations.py index b56e5a245..e46444c51 100644 --- a/src/ydata_profiling/report/structure/correlations.py +++ b/src/data_profiling/report/structure/correlations.py @@ -1,10 +1,10 @@ from typing import List, Optional -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.report.presentation.core import Container, CorrelationTable, Image -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.visualisation import plot +from data_profiling.config import Settings +from data_profiling.model import BaseDescription +from data_profiling.report.presentation.core import Container, CorrelationTable, Image +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.visualisation import plot def get_correlation_items( diff --git a/src/ydata_profiling/report/structure/overview.py b/src/data_profiling/report/structure/overview.py similarity index 92% rename from src/ydata_profiling/report/structure/overview.py rename to src/data_profiling/report/structure/overview.py index 38b129934..561c03967 100644 --- a/src/ydata_profiling/report/structure/overview.py +++ b/src/data_profiling/report/structure/overview.py @@ -2,11 +2,11 @@ from typing import Any, List from urllib.parse import quote -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.model.alerts import AlertType -from ydata_profiling.model.description import TimeIndexAnalysis -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.model import BaseDescription +from data_profiling.model.alerts import AlertType +from data_profiling.model.description import TimeIndexAnalysis +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_number, @@ -16,11 +16,11 @@ fmt_timespan_timedelta, list_args, ) -from ydata_profiling.report.presentation.core import Alerts, Container -from ydata_profiling.report.presentation.core import Image as ImageWidget -from ydata_profiling.report.presentation.core import Table -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.visualisation.plot import plot_overview_timeseries +from data_profiling.report.presentation.core import Alerts, Container +from data_profiling.report.presentation.core import Image as ImageWidget +from data_profiling.report.presentation.core import Table +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.visualisation.plot import plot_overview_timeseries def get_dataset_overview(config: Settings, summary: BaseDescription) -> Renderable: @@ -153,15 +153,15 @@ def get_dataset_reproduction(config: Settings, summary: BaseDescription) -> Rend A renderable object """ - version = summary.package["ydata_profiling_version"] - config_file = summary.package["ydata_profiling_config"] + version = summary.package["data_profiling_version"] + config_file = summary.package["data_profiling_config"] date_start = summary.analysis.date_start date_end = summary.analysis.date_end duration = summary.analysis.duration @list_args def fmt_version(version: str) -> str: - return f'ydata-profiling v{version}' + return f'data-profiling v{version}' @list_args def fmt_config(config: str) -> str: diff --git a/src/ydata_profiling/report/structure/report.py b/src/data_profiling/report/structure/report.py similarity index 94% rename from src/ydata_profiling/report/structure/report.py rename to src/data_profiling/report/structure/report.py index 482b410b2..47b97394b 100644 --- a/src/ydata_profiling/report/structure/report.py +++ b/src/data_profiling/report/structure/report.py @@ -4,24 +4,24 @@ import pandas as pd from tqdm.auto import tqdm -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.model.alerts import AlertType -from ydata_profiling.model.handler import get_render_map -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.model import BaseDescription +from data_profiling.model.alerts import AlertType +from data_profiling.model.handler import get_render_map +from data_profiling.report.presentation.core import ( HTML, Collapse, Container, Dropdown, Duplicate, ) -from ydata_profiling.report.presentation.core import Image as ImageWidget -from ydata_profiling.report.presentation.core import Sample, ToggleButton, Variable -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.report.presentation.core.root import Root -from ydata_profiling.report.structure.correlations import get_correlation_items -from ydata_profiling.report.structure.overview import get_dataset_items -from ydata_profiling.utils.dataframe import slugify +from data_profiling.report.presentation.core import Image as ImageWidget +from data_profiling.report.presentation.core import Sample, ToggleButton, Variable +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.core.root import Root +from data_profiling.report.structure.correlations import get_correlation_items +from data_profiling.report.structure.overview import get_dataset_items +from data_profiling.utils.dataframe import slugify def get_missing_items(config: Settings, summary: BaseDescription) -> list: diff --git a/src/data_profiling/report/structure/variables/__init__.py b/src/data_profiling/report/structure/variables/__init__.py new file mode 100644 index 000000000..9b15b6e96 --- /dev/null +++ b/src/data_profiling/report/structure/variables/__init__.py @@ -0,0 +1,35 @@ +from data_profiling.report.structure.variables.render_boolean import render_boolean +from data_profiling.report.structure.variables.render_categorical import ( + render_categorical, +) +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.structure.variables.render_complex import render_complex +from data_profiling.report.structure.variables.render_count import render_count +from data_profiling.report.structure.variables.render_date import render_date +from data_profiling.report.structure.variables.render_file import render_file +from data_profiling.report.structure.variables.render_generic import render_generic +from data_profiling.report.structure.variables.render_image import render_image +from data_profiling.report.structure.variables.render_path import render_path +from data_profiling.report.structure.variables.render_real import render_real +from data_profiling.report.structure.variables.render_text import render_text +from data_profiling.report.structure.variables.render_timeseries import ( + render_timeseries, +) +from data_profiling.report.structure.variables.render_url import render_url + +__all__ = [ + "render_boolean", + "render_categorical", + "render_common", + "render_complex", + "render_count", + "render_date", + "render_file", + "render_generic", + "render_image", + "render_path", + "render_real", + "render_text", + "render_timeseries", + "render_url", +] diff --git a/src/ydata_profiling/report/structure/variables/render_boolean.py b/src/data_profiling/report/structure/variables/render_boolean.py similarity index 89% rename from src/ydata_profiling/report/structure/variables/render_boolean.py rename to src/data_profiling/report/structure/variables/render_boolean.py index e6bdbe4d0..8b646f3d2 100644 --- a/src/ydata_profiling/report/structure/variables/render_boolean.py +++ b/src/data_profiling/report/structure/variables/render_boolean.py @@ -1,8 +1,8 @@ from typing import List -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent +from data_profiling.report.presentation.core import ( Container, FrequencyTable, FrequencyTableSmall, @@ -10,10 +10,10 @@ Table, VariableInfo, ) -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.visualisation.plot import cat_frequency_plot +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.visualisation.plot import cat_frequency_plot def render_boolean(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_categorical.py b/src/data_profiling/report/structure/variables/render_categorical.py similarity index 97% rename from src/ydata_profiling/report/structure/variables/render_categorical.py rename to src/data_profiling/report/structure/variables/render_categorical.py index 8fa6b6ae0..6123678c2 100644 --- a/src/ydata_profiling/report/structure/variables/render_categorical.py +++ b/src/data_profiling/report/structure/variables/render_categorical.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_number, @@ -11,7 +11,7 @@ fmt_percent, help, ) -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( HTML, Container, FrequencyTable, @@ -20,11 +20,11 @@ Table, VariableInfo, ) -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import cat_frequency_plot, histogram +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import cat_frequency_plot, histogram def render_categorical_frequency( diff --git a/src/ydata_profiling/report/structure/variables/render_common.py b/src/data_profiling/report/structure/variables/render_common.py similarity index 87% rename from src/ydata_profiling/report/structure/variables/render_common.py rename to src/data_profiling/report/structure/variables/render_common.py index aef8de357..02233b965 100644 --- a/src/ydata_profiling/report/structure/variables/render_common.py +++ b/src/data_profiling/report/structure/variables/render_common.py @@ -1,5 +1,5 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.presentation.frequency_table_utils import ( +from data_profiling.config import Settings +from data_profiling.report.presentation.frequency_table_utils import ( extreme_obs_table, freq_table, ) diff --git a/src/ydata_profiling/report/structure/variables/render_complex.py b/src/data_profiling/report/structure/variables/render_complex.py similarity index 92% rename from src/ydata_profiling/report/structure/variables/render_complex.py rename to src/data_profiling/report/structure/variables/render_complex.py index 5995285e5..72d94a014 100644 --- a/src/ydata_profiling/report/structure/variables/render_complex.py +++ b/src/data_profiling/report/structure/variables/render_complex.py @@ -1,18 +1,18 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_numeric, fmt_percent, ) -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( HTML, Container, Image, Table, VariableInfo, ) -from ydata_profiling.visualisation.plot import scatter_complex +from data_profiling.visualisation.plot import scatter_complex def render_complex(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_count.py b/src/data_profiling/report/structure/variables/render_count.py similarity index 92% rename from src/ydata_profiling/report/structure/variables/render_count.py rename to src/data_profiling/report/structure/variables/render_count.py index 776bb6763..667458b19 100644 --- a/src/ydata_profiling/report/structure/variables/render_count.py +++ b/src/data_profiling/report/structure/variables/render_count.py @@ -1,19 +1,19 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_numeric, fmt_percent, ) -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( Container, FrequencyTable, Table, VariableInfo, ) -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import histogram, mini_histogram +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import histogram, mini_histogram def render_count(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_date.py b/src/data_profiling/report/structure/variables/render_date.py similarity index 91% rename from src/ydata_profiling/report/structure/variables/render_date.py rename to src/data_profiling/report/structure/variables/render_date.py index 8d002d885..47f71c92f 100644 --- a/src/ydata_profiling/report/structure/variables/render_date.py +++ b/src/data_profiling/report/structure/variables/render_date.py @@ -1,10 +1,10 @@ from typing import Any, Dict -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent -from ydata_profiling.report.presentation.core import Container, Table, VariableInfo -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import histogram, mini_histogram +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent +from data_profiling.report.presentation.core import Container, Table, VariableInfo +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import histogram, mini_histogram def render_date(config: Settings, summary: Dict[str, Any]) -> Dict[str, Any]: diff --git a/src/ydata_profiling/report/structure/variables/render_file.py b/src/data_profiling/report/structure/variables/render_file.py similarity index 77% rename from src/ydata_profiling/report/structure/variables/render_file.py rename to src/data_profiling/report/structure/variables/render_file.py index e014434ab..c1e18ee0a 100644 --- a/src/ydata_profiling/report/structure/variables/render_file.py +++ b/src/data_profiling/report/structure/variables/render_file.py @@ -1,12 +1,12 @@ from typing import List -from ydata_profiling.config import Settings -from ydata_profiling.report.presentation.core import Container, FrequencyTable -from ydata_profiling.report.presentation.core.renderable import Renderable -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_path import render_path -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import histogram +from data_profiling.config import Settings +from data_profiling.report.presentation.core import Container, FrequencyTable +from data_profiling.report.presentation.core.renderable import Renderable +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_path import render_path +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import histogram def render_file(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_generic.py b/src/data_profiling/report/structure/variables/render_generic.py similarity index 86% rename from src/ydata_profiling/report/structure/variables/render_generic.py rename to src/data_profiling/report/structure/variables/render_generic.py index 0a8ce1e55..55262c2a1 100644 --- a/src/ydata_profiling/report/structure/variables/render_generic.py +++ b/src/data_profiling/report/structure/variables/render_generic.py @@ -1,6 +1,6 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent +from data_profiling.report.presentation.core import ( HTML, Container, Table, diff --git a/src/ydata_profiling/report/structure/variables/render_image.py b/src/data_profiling/report/structure/variables/render_image.py similarity index 94% rename from src/ydata_profiling/report/structure/variables/render_image.py rename to src/data_profiling/report/structure/variables/render_image.py index ea1336208..9188d61b2 100644 --- a/src/ydata_profiling/report/structure/variables/render_image.py +++ b/src/data_profiling/report/structure/variables/render_image.py @@ -1,16 +1,16 @@ import pandas as pd -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt_numeric -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt_numeric +from data_profiling.report.presentation.core import ( Container, FrequencyTable, Image, Table, ) -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_file import render_file -from ydata_profiling.visualisation.plot import scatter_series +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_file import render_file +from data_profiling.visualisation.plot import scatter_series def render_image(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_path.py b/src/data_profiling/report/structure/variables/render_path.py similarity index 92% rename from src/ydata_profiling/report/structure/variables/render_path.py rename to src/data_profiling/report/structure/variables/render_path.py index d7cde6f06..c242aecb8 100644 --- a/src/ydata_profiling/report/structure/variables/render_path.py +++ b/src/data_profiling/report/structure/variables/render_path.py @@ -1,8 +1,8 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_numeric -from ydata_profiling.report.presentation.core import Container, FrequencyTable, Table -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_categorical import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_numeric +from data_profiling.report.presentation.core import Container, FrequencyTable, Table +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_categorical import ( render_categorical, ) diff --git a/src/ydata_profiling/report/structure/variables/render_real.py b/src/data_profiling/report/structure/variables/render_real.py similarity index 96% rename from src/ydata_profiling/report/structure/variables/render_real.py rename to src/data_profiling/report/structure/variables/render_real.py index 92c73fcf3..59764c419 100644 --- a/src/ydata_profiling/report/structure/variables/render_real.py +++ b/src/data_profiling/report/structure/variables/render_real.py @@ -1,20 +1,20 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_monotonic, fmt_numeric, fmt_percent, ) -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( Container, FrequencyTable, Table, VariableInfo, ) -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import histogram, mini_histogram +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import histogram, mini_histogram def render_real(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/structure/variables/render_text.py b/src/data_profiling/report/structure/variables/render_text.py similarity index 92% rename from src/ydata_profiling/report/structure/variables/render_text.py rename to src/data_profiling/report/structure/variables/render_text.py index 5eadf3799..d50dbb5fe 100644 --- a/src/ydata_profiling/report/structure/variables/render_text.py +++ b/src/data_profiling/report/structure/variables/render_text.py @@ -1,15 +1,15 @@ from typing import Any, Dict, List -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent +from data_profiling.report.presentation.core import ( Container, FrequencyTable, Image, Table, ) -from ydata_profiling.report.presentation.core.variable_info import VariableInfo -from ydata_profiling.report.structure.variables.render_categorical import ( +from data_profiling.report.presentation.core.variable_info import VariableInfo +from data_profiling.report.structure.variables.render_categorical import ( _get_n, freq_table, render_categorical, @@ -17,8 +17,8 @@ render_categorical_length, render_categorical_unicode, ) -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.visualisation.plot import plot_word_cloud +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.visualisation.plot import plot_word_cloud def render_text(config: Settings, summary: Dict[str, Any]) -> Dict[str, Any]: diff --git a/src/ydata_profiling/report/structure/variables/render_timeseries.py b/src/data_profiling/report/structure/variables/render_timeseries.py similarity index 97% rename from src/ydata_profiling/report/structure/variables/render_timeseries.py rename to src/data_profiling/report/structure/variables/render_timeseries.py index 17f396ac0..e9b191c89 100644 --- a/src/ydata_profiling/report/structure/variables/render_timeseries.py +++ b/src/data_profiling/report/structure/variables/render_timeseries.py @@ -1,5 +1,5 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import ( fmt, fmt_bytesize, fmt_monotonic, @@ -7,16 +7,16 @@ fmt_percent, fmt_timespan_timedelta, ) -from ydata_profiling.report.presentation.core import ( +from data_profiling.report.presentation.core import ( Container, FrequencyTable, Image, Table, VariableInfo, ) -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.report.utils import image_or_empty -from ydata_profiling.visualisation.plot import ( +from data_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.utils import image_or_empty +from data_profiling.visualisation.plot import ( histogram, mini_ts_plot, plot_acf_pacf, diff --git a/src/ydata_profiling/report/structure/variables/render_url.py b/src/data_profiling/report/structure/variables/render_url.py similarity index 91% rename from src/ydata_profiling/report/structure/variables/render_url.py rename to src/data_profiling/report/structure/variables/render_url.py index f35d6dcb6..061afdcea 100644 --- a/src/ydata_profiling/report/structure/variables/render_url.py +++ b/src/data_profiling/report/structure/variables/render_url.py @@ -1,14 +1,14 @@ -from ydata_profiling.config import Settings -from ydata_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent -from ydata_profiling.report.presentation.core import ( +from data_profiling.config import Settings +from data_profiling.report.formatters import fmt, fmt_bytesize, fmt_percent +from data_profiling.report.presentation.core import ( Container, FrequencyTable, FrequencyTableSmall, Table, VariableInfo, ) -from ydata_profiling.report.presentation.frequency_table_utils import freq_table -from ydata_profiling.report.structure.variables.render_common import render_common +from data_profiling.report.presentation.frequency_table_utils import freq_table +from data_profiling.report.structure.variables.render_common import render_common def render_url(config: Settings, summary: dict) -> dict: diff --git a/src/ydata_profiling/report/utils.py b/src/data_profiling/report/utils.py similarity index 85% rename from src/ydata_profiling/report/utils.py rename to src/data_profiling/report/utils.py index 9d8a09b04..9d0119130 100644 --- a/src/ydata_profiling/report/utils.py +++ b/src/data_profiling/report/utils.py @@ -1,7 +1,7 @@ from typing import Optional -from ydata_profiling.config import ImageType -from ydata_profiling.report.presentation.core import Container, Image +from data_profiling.config import ImageType +from data_profiling.report.presentation.core import Container, Image def image_or_empty( diff --git a/src/ydata_profiling/serialize_report.py b/src/data_profiling/serialize_report.py similarity index 88% rename from src/ydata_profiling/serialize_report.py rename to src/data_profiling/serialize_report.py index 708d29a47..bab5ad5e9 100644 --- a/src/ydata_profiling/serialize_report.py +++ b/src/data_profiling/serialize_report.py @@ -3,12 +3,12 @@ from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: - from ydata_profiling.profile_report import ProfileReport + from data_profiling.profile_report import ProfileReport -from ydata_profiling.config import Settings -from ydata_profiling.model import BaseDescription -from ydata_profiling.report.presentation.core import Root -from ydata_profiling.version import __version__ +from data_profiling.config import Settings +from data_profiling.model import BaseDescription +from data_profiling.report.presentation.core import Root +from data_profiling.version import __version__ class SerializeReport: @@ -102,12 +102,12 @@ def loads(self, data: bytes) -> Union["ProfileReport", "SerializeReport"]: # warn if version not equal if ( loaded_description_set is not None - and loaded_description_set.package["ydata_profiling_version"] + and loaded_description_set.package["data_profiling_version"] != __version__ ): warnings.warn( f"The package version specified in the loaded data is not equal to the version installed. " - f"Currently running on ydata-profiling {__version__} , while loaded data is generated by ydata_profiling, {loaded_description_set.package['ydata_profiling_version']}." + f"Currently running on data-profiling {__version__} , while loaded data is generated by data_profiling, {loaded_description_set.package['data_profiling_version']}." ) # set df_hash diff --git a/src/ydata_profiling/utils/__init__.py b/src/data_profiling/utils/__init__.py similarity index 100% rename from src/ydata_profiling/utils/__init__.py rename to src/data_profiling/utils/__init__.py diff --git a/src/ydata_profiling/utils/backend.py b/src/data_profiling/utils/backend.py similarity index 100% rename from src/ydata_profiling/utils/backend.py rename to src/data_profiling/utils/backend.py diff --git a/src/ydata_profiling/utils/cache.py b/src/data_profiling/utils/cache.py similarity index 96% rename from src/ydata_profiling/utils/cache.py rename to src/data_profiling/utils/cache.py index 8640d86a6..6945d5916 100644 --- a/src/ydata_profiling/utils/cache.py +++ b/src/data_profiling/utils/cache.py @@ -4,7 +4,7 @@ from requests import get as get_file -from ydata_profiling.utils.paths import get_data_path +from data_profiling.utils.paths import get_data_path def cache_file(file_name: str, url: str) -> Path: diff --git a/src/ydata_profiling/utils/common.py b/src/data_profiling/utils/common.py similarity index 96% rename from src/ydata_profiling/utils/common.py rename to src/data_profiling/utils/common.py index 6828ef262..158ae5d20 100644 --- a/src/ydata_profiling/utils/common.py +++ b/src/data_profiling/utils/common.py @@ -15,7 +15,7 @@ import pandas as pd import requests -from ydata_profiling.version import __version__ +from data_profiling.version import __version__ def update(d: dict, u: Mapping) -> dict: @@ -73,7 +73,7 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime: def analytics_features( dataframe: str, datatype: str, report_type: str, ncols: int, nrows: int, dbx: str ) -> None: - endpoint = "https://packages.ydata.ai/ydata-profiling?" + endpoint = "https://packages.ydata.ai/data-profiling?" package_version = __version__ if ( diff --git a/src/ydata_profiling/utils/compat.py b/src/data_profiling/utils/compat.py similarity index 100% rename from src/ydata_profiling/utils/compat.py rename to src/data_profiling/utils/compat.py diff --git a/src/ydata_profiling/utils/dataframe.py b/src/data_profiling/utils/dataframe.py similarity index 99% rename from src/ydata_profiling/utils/dataframe.py rename to src/data_profiling/utils/dataframe.py index fa27a3746..19fe608cc 100644 --- a/src/ydata_profiling/utils/dataframe.py +++ b/src/data_profiling/utils/dataframe.py @@ -21,7 +21,7 @@ def warn_read(extension: str) -> None: To prevent this warning from showing up, please rename the file to any of the extensions supported by pandas (docs: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html) If you think this extension should be supported, please report this as an issue: -https://github.com/ydataai/ydata-profiling/issues""" +https://github.com/Data-Centric-AI-Community/data-profiling/issues""" ) diff --git a/src/ydata_profiling/utils/logger.py b/src/data_profiling/utils/logger.py similarity index 93% rename from src/ydata_profiling/utils/logger.py rename to src/data_profiling/utils/logger.py index b5873726b..0f903ab9b 100644 --- a/src/ydata_profiling/utils/logger.py +++ b/src/data_profiling/utils/logger.py @@ -1,12 +1,12 @@ """ - Logger function for ydata-profiling reports + Logger function for data-profiling reports """ import logging import pandas as pd -from ydata_profiling.utils.common import ( +from data_profiling.utils.common import ( analytics_features, calculate_nrows, is_running_in_databricks, diff --git a/src/ydata_profiling/utils/notebook.py b/src/data_profiling/utils/notebook.py similarity index 100% rename from src/ydata_profiling/utils/notebook.py rename to src/data_profiling/utils/notebook.py diff --git a/src/ydata_profiling/utils/paths.py b/src/data_profiling/utils/paths.py similarity index 100% rename from src/ydata_profiling/utils/paths.py rename to src/data_profiling/utils/paths.py diff --git a/src/ydata_profiling/utils/progress_bar.py b/src/data_profiling/utils/progress_bar.py similarity index 100% rename from src/ydata_profiling/utils/progress_bar.py rename to src/data_profiling/utils/progress_bar.py diff --git a/src/ydata_profiling/utils/styles.py b/src/data_profiling/utils/styles.py similarity index 100% rename from src/ydata_profiling/utils/styles.py rename to src/data_profiling/utils/styles.py diff --git a/src/ydata_profiling/utils/versions.py b/src/data_profiling/utils/versions.py similarity index 100% rename from src/ydata_profiling/utils/versions.py rename to src/data_profiling/utils/versions.py diff --git a/src/ydata_profiling/visualisation/__init__.py b/src/data_profiling/visualisation/__init__.py similarity index 100% rename from src/ydata_profiling/visualisation/__init__.py rename to src/data_profiling/visualisation/__init__.py diff --git a/src/ydata_profiling/visualisation/context.py b/src/data_profiling/visualisation/context.py similarity index 100% rename from src/ydata_profiling/visualisation/context.py rename to src/data_profiling/visualisation/context.py diff --git a/src/ydata_profiling/visualisation/missing.py b/src/data_profiling/visualisation/missing.py similarity index 93% rename from src/ydata_profiling/visualisation/missing.py rename to src/data_profiling/visualisation/missing.py index 9dd1068d0..1d72977c0 100644 --- a/src/ydata_profiling/visualisation/missing.py +++ b/src/data_profiling/visualisation/missing.py @@ -3,14 +3,14 @@ from matplotlib import pyplot as plt -from ydata_profiling.config import Settings -from ydata_profiling.visualisation.context import manage_matplotlib_context -from ydata_profiling.visualisation.plot import ( +from data_profiling.config import Settings +from data_profiling.visualisation.context import manage_matplotlib_context +from data_profiling.visualisation.plot import ( missing_bar, missing_heatmap, missing_matrix, ) -from ydata_profiling.visualisation.utils import hex_to_rgb, plot_360_n0sc0pe +from data_profiling.visualisation.utils import hex_to_rgb, plot_360_n0sc0pe def get_font_size(columns: List[str]) -> float: diff --git a/src/ydata_profiling/visualisation/plot.py b/src/data_profiling/visualisation/plot.py similarity index 99% rename from src/ydata_profiling/visualisation/plot.py rename to src/data_profiling/visualisation/plot.py index b3dc5338e..6f90977e9 100644 --- a/src/ydata_profiling/visualisation/plot.py +++ b/src/data_profiling/visualisation/plot.py @@ -16,10 +16,10 @@ from typeguard import typechecked from wordcloud import WordCloud -from ydata_profiling.config import Settings -from ydata_profiling.utils.common import convert_timestamp_to_datetime -from ydata_profiling.visualisation.context import manage_matplotlib_context -from ydata_profiling.visualisation.utils import plot_360_n0sc0pe +from data_profiling.config import Settings +from data_profiling.utils.common import convert_timestamp_to_datetime +from data_profiling.visualisation.context import manage_matplotlib_context +from data_profiling.visualisation.utils import plot_360_n0sc0pe def format_fn(tick_val: int, tick_pos: Any) -> str: diff --git a/src/ydata_profiling/visualisation/utils.py b/src/data_profiling/visualisation/utils.py similarity index 98% rename from src/ydata_profiling/visualisation/utils.py rename to src/data_profiling/visualisation/utils.py index 696e0d7dc..60d1ae403 100644 --- a/src/ydata_profiling/visualisation/utils.py +++ b/src/data_profiling/visualisation/utils.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt from matplotlib.artist import Artist -from ydata_profiling.config import Settings +from data_profiling.config import Settings def hex_to_rgb(hex: str) -> Tuple[float, ...]: diff --git a/src/pandas_profiling/__init__.py b/src/pandas_profiling/__init__.py deleted file mode 100644 index b9894d547..000000000 --- a/src/pandas_profiling/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Main module of pandas-profiling. - -.. include:: ../../README.md -""" -import importlib.util -from warnings import warn - -from ydata_profiling.compare_reports import compare -from ydata_profiling.controller import pandas_decorator -from ydata_profiling.profile_report import ProfileReport -from ydata_profiling.version import __version__ - -# backend -import ydata_profiling.model.pandas # isort:skip # noqa - -spec = importlib.util.find_spec("pyspark") -if spec is not None: - import ydata_profiling.model.spark # isort:skip # noqa - -warn( - "`import pandas_profiling` is going to be deprecated by April 1st. Please use `import ydata_profiling` instead.", - DeprecationWarning, - stacklevel=2, -) - -__all__ = [ - "pandas_decorator", - "ProfileReport", - "__version__", - "compare", -] diff --git a/src/ydata_profiling/__init__.py b/src/ydata_profiling/__init__.py index 84dc4b020..50dc1d78f 100644 --- a/src/ydata_profiling/__init__.py +++ b/src/ydata_profiling/__init__.py @@ -8,17 +8,17 @@ import importlib.util # isort:skip # noqa from warnings import warn -from ydata_profiling.compare_reports import compare # isort:skip # noqa -from ydata_profiling.controller import pandas_decorator # isort:skip # noqa -from ydata_profiling.profile_report import ProfileReport # isort:skip # noqa -from ydata_profiling.version import __version__ # isort:skip # noqa +from data_profiling.compare_reports import compare # isort:skip # noqa +from data_profiling.controller import pandas_decorator # isort:skip # noqa +from data_profiling.profile_report import ProfileReport # isort:skip # noqa +from data_profiling.version import __version__ # isort:skip # noqa # backend -import ydata_profiling.model.pandas # isort:skip # noqa +import data_profiling.model.pandas # isort:skip # noqa spec = importlib.util.find_spec("pyspark") if spec is not None: - import ydata_profiling.model.spark # isort:skip # noqa + import data_profiling.model.spark # isort:skip # noqa spec_numba = importlib.util.find_spec("numba") if spec_numba is not None: @@ -29,7 +29,7 @@ warn( """ `import ydata_profiling` is deprecated and will not receive more updates. - Please install fg-data-profiling via `pip install fg-data-profiling` and use `import data_profiling` instead. + Please use `import data_profiling` instead. """, DeprecationWarning, stacklevel=2, diff --git a/src/ydata_profiling/report/presentation/core/__init__.py b/src/ydata_profiling/report/presentation/core/__init__.py deleted file mode 100644 index 4662cab12..000000000 --- a/src/ydata_profiling/report/presentation/core/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -from ydata_profiling.report.presentation.core.alerts import Alerts -from ydata_profiling.report.presentation.core.collapse import Collapse -from ydata_profiling.report.presentation.core.container import Container -from ydata_profiling.report.presentation.core.correlation_table import CorrelationTable -from ydata_profiling.report.presentation.core.dropdown import Dropdown -from ydata_profiling.report.presentation.core.duplicate import Duplicate -from ydata_profiling.report.presentation.core.frequency_table import FrequencyTable -from ydata_profiling.report.presentation.core.frequency_table_small import ( - FrequencyTableSmall, -) -from ydata_profiling.report.presentation.core.html import HTML -from ydata_profiling.report.presentation.core.image import Image -from ydata_profiling.report.presentation.core.root import Root -from ydata_profiling.report.presentation.core.sample import Sample -from ydata_profiling.report.presentation.core.scores import Scores -from ydata_profiling.report.presentation.core.table import Table -from ydata_profiling.report.presentation.core.toggle_button import ToggleButton -from ydata_profiling.report.presentation.core.variable import Variable -from ydata_profiling.report.presentation.core.variable_info import VariableInfo - -__all__ = [ - "Collapse", - "Container", - "Duplicate", - "Dropdown", - "FrequencyTable", - "FrequencyTableSmall", - "HTML", - "Image", - "Root", - "Sample", - "Table", - "ToggleButton", - "Variable", - "VariableInfo", - "Alerts", - "CorrelationTable", - "Scores", -] diff --git a/src/ydata_profiling/report/presentation/flavours/html/__init__.py b/src/ydata_profiling/report/presentation/flavours/html/__init__.py deleted file mode 100644 index 50e234f77..000000000 --- a/src/ydata_profiling/report/presentation/flavours/html/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -from ydata_profiling.report.presentation.flavours.html.alerts import HTMLAlerts -from ydata_profiling.report.presentation.flavours.html.collapse import HTMLCollapse -from ydata_profiling.report.presentation.flavours.html.container import HTMLContainer -from ydata_profiling.report.presentation.flavours.html.correlation_table import ( - HTMLCorrelationTable, -) -from ydata_profiling.report.presentation.flavours.html.dropdown import HTMLDropdown -from ydata_profiling.report.presentation.flavours.html.duplicate import HTMLDuplicate -from ydata_profiling.report.presentation.flavours.html.frequency_table import ( - HTMLFrequencyTable, -) -from ydata_profiling.report.presentation.flavours.html.frequency_table_small import ( - HTMLFrequencyTableSmall, -) -from ydata_profiling.report.presentation.flavours.html.html import HTMLHTML -from ydata_profiling.report.presentation.flavours.html.image import HTMLImage -from ydata_profiling.report.presentation.flavours.html.root import HTMLRoot -from ydata_profiling.report.presentation.flavours.html.sample import HTMLSample -from ydata_profiling.report.presentation.flavours.html.scores import HTMLScores -from ydata_profiling.report.presentation.flavours.html.table import HTMLTable -from ydata_profiling.report.presentation.flavours.html.toggle_button import ( - HTMLToggleButton, -) -from ydata_profiling.report.presentation.flavours.html.variable import HTMLVariable -from ydata_profiling.report.presentation.flavours.html.variable_info import ( - HTMLVariableInfo, -) - -__all__ = [ - "HTMLCollapse", - "HTMLContainer", - "HTMLDuplicate", - "HTMLDropdown", - "HTMLFrequencyTable", - "HTMLFrequencyTableSmall", - "HTMLHTML", - "HTMLImage", - "HTMLRoot", - "HTMLSample", - "HTMLTable", - "HTMLToggleButton", - "HTMLVariable", - "HTMLVariableInfo", - "HTMLAlerts", - "HTMLCorrelationTable", - "HTMLScores", -] diff --git a/src/ydata_profiling/report/presentation/flavours/html/alerts.py b/src/ydata_profiling/report/presentation/flavours/html/alerts.py deleted file mode 100644 index d07c1a9a4..000000000 --- a/src/ydata_profiling/report/presentation/flavours/html/alerts.py +++ /dev/null @@ -1,10 +0,0 @@ -from ydata_profiling.report.presentation.core.alerts import Alerts -from ydata_profiling.report.presentation.flavours.html import templates -from ydata_profiling.utils.styles import get_alert_styles - - -class HTMLAlerts(Alerts): - def render(self) -> str: - styles = get_alert_styles() - - return templates.template("alerts.html").render(**self.content, styles=styles) diff --git a/src/ydata_profiling/report/presentation/flavours/html/image.py b/src/ydata_profiling/report/presentation/flavours/html/image.py deleted file mode 100644 index a635bd65a..000000000 --- a/src/ydata_profiling/report/presentation/flavours/html/image.py +++ /dev/null @@ -1,7 +0,0 @@ -from ydata_profiling.report.presentation.core import Image -from ydata_profiling.report.presentation.flavours.html import templates - - -class HTMLImage(Image): - def render(self) -> str: - return templates.template("diagram.html").render(**self.content) diff --git a/src/ydata_profiling/report/presentation/flavours/html/table.py b/src/ydata_profiling/report/presentation/flavours/html/table.py deleted file mode 100644 index c5d71412b..000000000 --- a/src/ydata_profiling/report/presentation/flavours/html/table.py +++ /dev/null @@ -1,7 +0,0 @@ -from ydata_profiling.report.presentation.core.table import Table -from ydata_profiling.report.presentation.flavours.html import templates - - -class HTMLTable(Table): - def render(self) -> str: - return templates.template("table.html").render(**self.content) diff --git a/src/ydata_profiling/report/presentation/flavours/widget/__init__.py b/src/ydata_profiling/report/presentation/flavours/widget/__init__.py deleted file mode 100644 index 22c51d0ca..000000000 --- a/src/ydata_profiling/report/presentation/flavours/widget/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -from ydata_profiling.report.presentation.flavours.widget.alerts import WidgetAlerts -from ydata_profiling.report.presentation.flavours.widget.collapse import WidgetCollapse -from ydata_profiling.report.presentation.flavours.widget.container import ( - WidgetContainer, -) -from ydata_profiling.report.presentation.flavours.widget.correlation_table import ( - WidgetCorrelationTable, -) -from ydata_profiling.report.presentation.flavours.widget.dropdown import WidgetDropdown -from ydata_profiling.report.presentation.flavours.widget.duplicate import ( - WidgetDuplicate, -) -from ydata_profiling.report.presentation.flavours.widget.frequency_table import ( - WidgetFrequencyTable, -) -from ydata_profiling.report.presentation.flavours.widget.frequency_table_small import ( - WidgetFrequencyTableSmall, -) -from ydata_profiling.report.presentation.flavours.widget.html import WidgetHTML -from ydata_profiling.report.presentation.flavours.widget.image import WidgetImage -from ydata_profiling.report.presentation.flavours.widget.root import WidgetRoot -from ydata_profiling.report.presentation.flavours.widget.sample import WidgetSample -from ydata_profiling.report.presentation.flavours.widget.table import WidgetTable -from ydata_profiling.report.presentation.flavours.widget.toggle_button import ( - WidgetToggleButton, -) -from ydata_profiling.report.presentation.flavours.widget.variable import WidgetVariable -from ydata_profiling.report.presentation.flavours.widget.variable_info import ( - WidgetVariableInfo, -) - -__all__ = [ - "WidgetCollapse", - "WidgetContainer", - "WidgetDuplicate", - "WidgetDropdown", - "WidgetFrequencyTable", - "WidgetFrequencyTableSmall", - "WidgetHTML", - "WidgetImage", - "WidgetRoot", - "WidgetSample", - "WidgetTable", - "WidgetToggleButton", - "WidgetVariable", - "WidgetVariableInfo", - "WidgetAlerts", - "WidgetCorrelationTable", -] diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py deleted file mode 100644 index 64f1d6d54..000000000 --- a/src/ydata_profiling/report/structure/variables/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -from ydata_profiling.report.structure.variables.render_boolean import render_boolean -from ydata_profiling.report.structure.variables.render_categorical import ( - render_categorical, -) -from ydata_profiling.report.structure.variables.render_common import render_common -from ydata_profiling.report.structure.variables.render_complex import render_complex -from ydata_profiling.report.structure.variables.render_count import render_count -from ydata_profiling.report.structure.variables.render_date import render_date -from ydata_profiling.report.structure.variables.render_file import render_file -from ydata_profiling.report.structure.variables.render_generic import render_generic -from ydata_profiling.report.structure.variables.render_image import render_image -from ydata_profiling.report.structure.variables.render_path import render_path -from ydata_profiling.report.structure.variables.render_real import render_real -from ydata_profiling.report.structure.variables.render_text import render_text -from ydata_profiling.report.structure.variables.render_timeseries import ( - render_timeseries, -) -from ydata_profiling.report.structure.variables.render_url import render_url - -__all__ = [ - "render_boolean", - "render_categorical", - "render_common", - "render_complex", - "render_count", - "render_date", - "render_file", - "render_generic", - "render_image", - "render_path", - "render_real", - "render_text", - "render_timeseries", - "render_url", -] diff --git a/tests/backends/spark_backend/test_correlations_spark.py b/tests/backends/spark_backend/test_correlations_spark.py index 02ab6886c..4134e5a47 100644 --- a/tests/backends/spark_backend/test_correlations_spark.py +++ b/tests/backends/spark_backend/test_correlations_spark.py @@ -1,17 +1,17 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.correlations_pandas import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.correlations_pandas import ( pearson_compute as pandas_pearson_compute, ) -from ydata_profiling.model.pandas.correlations_pandas import ( +from data_profiling.model.pandas.correlations_pandas import ( spearman_compute as pandas_spearman_compute, ) -from ydata_profiling.model.spark.correlations_spark import ( +from data_profiling.model.spark.correlations_spark import ( pearson_compute as spark_pearson_compute, ) -from ydata_profiling.model.spark.correlations_spark import ( +from data_profiling.model.spark.correlations_spark import ( spearman_compute as spark_spearman_compute, ) @@ -78,7 +78,7 @@ def test_pearson_spark(correlation_data_num, correlation_var_types): def test_kendall_spark(correlation_data_cat): - from ydata_profiling.model.spark.correlations_spark import kendall_compute + from data_profiling.model.spark.correlations_spark import kendall_compute cfg = Settings() diff --git a/tests/backends/spark_backend/test_descriptions_spark.py b/tests/backends/spark_backend/test_descriptions_spark.py index 63aabbe4e..c4c7cb816 100644 --- a/tests/backends/spark_backend/test_descriptions_spark.py +++ b/tests/backends/spark_backend/test_descriptions_spark.py @@ -5,10 +5,10 @@ import pandas as pd import pytest -from ydata_profiling.config import SparkSettings -from ydata_profiling.model.describe import describe +from data_profiling.config import SparkSettings +from data_profiling.model.describe import describe -check_is_NaN = "ydata_profiling.check_is_NaN" +check_is_NaN = "data_profiling.check_is_NaN" @pytest.fixture diff --git a/tests/backends/spark_backend/test_duplicates.py b/tests/backends/spark_backend/test_duplicates.py index ad1ab93bc..d757e3a09 100644 --- a/tests/backends/spark_backend/test_duplicates.py +++ b/tests/backends/spark_backend/test_duplicates.py @@ -1,8 +1,8 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.spark.duplicates_spark import get_duplicates_spark +from data_profiling.config import Settings +from data_profiling.model.spark.duplicates_spark import get_duplicates_spark @pytest.fixture diff --git a/tests/backends/spark_backend/test_issue1429.py b/tests/backends/spark_backend/test_issue1429.py index 3d17b8cef..01c2afe73 100644 --- a/tests/backends/spark_backend/test_issue1429.py +++ b/tests/backends/spark_backend/test_issue1429.py @@ -1,6 +1,6 @@ """ Test for issue 1429: -https://github.com/ydataai/ydata-profiling/issues/1429 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/1429 """ from typing import List, Optional, Tuple @@ -8,15 +8,15 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql import types as T -from ydata_profiling import ProfileReport -from ydata_profiling.config import SparkSettings -from ydata_profiling.model.spark.describe_counts_spark import describe_counts_spark -from ydata_profiling.model.spark.describe_generic_spark import describe_generic_spark -from ydata_profiling.model.spark.describe_numeric_spark import ( +from data_profiling import ProfileReport +from data_profiling.config import SparkSettings +from data_profiling.model.spark.describe_counts_spark import describe_counts_spark +from data_profiling.model.spark.describe_generic_spark import describe_generic_spark +from data_profiling.model.spark.describe_numeric_spark import ( describe_numeric_1d_spark, numeric_stats_spark, ) -from ydata_profiling.model.spark.describe_supported_spark import ( +from data_profiling.model.spark.describe_supported_spark import ( describe_supported_spark, ) diff --git a/tests/backends/spark_backend/test_issue1602.py b/tests/backends/spark_backend/test_issue1602.py index b25ae1873..6dc8c9126 100644 --- a/tests/backends/spark_backend/test_issue1602.py +++ b/tests/backends/spark_backend/test_issue1602.py @@ -5,7 +5,7 @@ from pyspark.sql import types as T -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_spark_handles_decimal_type(test_output_dir, spark_session): diff --git a/tests/backends/spark_backend/test_issue1722.py b/tests/backends/spark_backend/test_issue1722.py index 6cb2d5596..37824aaa9 100644 --- a/tests/backends/spark_backend/test_issue1722.py +++ b/tests/backends/spark_backend/test_issue1722.py @@ -1,6 +1,6 @@ """ Test for issue 1722: -https://github.com/ydataai/ydata-profiling/issues/1722 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/1722 """ from datetime import date, datetime @@ -8,7 +8,7 @@ from pyspark.sql import SparkSession from pyspark.sql import types as T -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def make_non_numeric_df(spark: SparkSession): diff --git a/tests/backends/spark_backend/test_missing_spark.py b/tests/backends/spark_backend/test_missing_spark.py index ea8a442b9..bf58c5859 100644 --- a/tests/backends/spark_backend/test_missing_spark.py +++ b/tests/backends/spark_backend/test_missing_spark.py @@ -4,8 +4,8 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.spark.missing_spark import missing_bar +from data_profiling.config import Settings +from data_profiling.model.spark.missing_spark import missing_bar @pytest.fixture diff --git a/tests/backends/spark_backend/test_report_spark.py b/tests/backends/spark_backend/test_report_spark.py index 7e2302232..8e77fd871 100644 --- a/tests/backends/spark_backend/test_report_spark.py +++ b/tests/backends/spark_backend/test_report_spark.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/backends/spark_backend/test_sample_spark.py b/tests/backends/spark_backend/test_sample_spark.py index 7d7c0652e..a6b3e63fd 100644 --- a/tests/backends/spark_backend/test_sample_spark.py +++ b/tests/backends/spark_backend/test_sample_spark.py @@ -2,8 +2,8 @@ import pytest from pyspark.sql.types import IntegerType, StringType, StructField, StructType -from ydata_profiling.config import Settings -from ydata_profiling.model.spark.sample_spark import get_sample_spark +from data_profiling.config import Settings +from data_profiling.model.spark.sample_spark import get_sample_spark # FIXME: Move to data diff --git a/tests/conftest.py b/tests/conftest.py index 7db073a3c..5ee86f955 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,10 +12,10 @@ except ImportError: has_spark = False -from ydata_profiling.config import Settings -from ydata_profiling.model.summarizer import ProfilingSummarizer -from ydata_profiling.model.typeset import ProfilingTypeSet -from ydata_profiling.utils.cache import cache_file +from data_profiling.config import Settings +from data_profiling.model.summarizer import ProfilingSummarizer +from data_profiling.model.typeset import ProfilingTypeSet +from data_profiling.utils.cache import cache_file def pytest_configure(config): diff --git a/tests/issues/test_issue100.py b/tests/issues/test_issue100.py index 3b31ff58c..db8976c0f 100644 --- a/tests/issues/test_issue100.py +++ b/tests/issues/test_issue100.py @@ -1,11 +1,11 @@ """ Test for issue 100: -https://github.com/ydataai/ydata-profiling/issues/100 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/100 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue100(): diff --git a/tests/issues/test_issue120.py b/tests/issues/test_issue120.py index f077e9b22..38aada00c 100644 --- a/tests/issues/test_issue120.py +++ b/tests/issues/test_issue120.py @@ -1,15 +1,15 @@ """ Test for issue 120: -https://github.com/ydataai/ydata-profiling/issues/120 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/120 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue_120(get_data_file): file_name = get_data_file( - "ydata_profiling_bug.txt", + "data_profiling_bug.txt", "https://github.com/pandas-profiling/pandas-profiling/files/2386812/pandas_profiling_bug.txt", ) df = pd.read_csv(file_name) diff --git a/tests/issues/test_issue147.py b/tests/issues/test_issue147.py index a0b3af818..f3cb2a85e 100644 --- a/tests/issues/test_issue147.py +++ b/tests/issues/test_issue147.py @@ -1,10 +1,10 @@ """ Test for issue 147: -https://github.com/ydataai/ydata-profiling/issues/147 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/147 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue147(get_data_file): diff --git a/tests/issues/test_issue1529.py b/tests/issues/test_issue1529.py index e43199430..ab97f7c0c 100644 --- a/tests/issues/test_issue1529.py +++ b/tests/issues/test_issue1529.py @@ -1,12 +1,12 @@ """ Test for issue 1529: -https://github.com/ydataai/ydata-profiling/issues/1529 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/1529 """ import json import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue1529(): diff --git a/tests/issues/test_issue1631.py b/tests/issues/test_issue1631.py index 88ed22002..099140856 100644 --- a/tests/issues/test_issue1631.py +++ b/tests/issues/test_issue1631.py @@ -1,10 +1,10 @@ """ Test for issue 1631: -https://github.com/ydataai/ydata-profiling/issues/1631 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/1631 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue1631(test_output_dir): diff --git a/tests/issues/test_issue169.py b/tests/issues/test_issue169.py index 136707b67..4dd30afa4 100644 --- a/tests/issues/test_issue169.py +++ b/tests/issues/test_issue169.py @@ -1,13 +1,13 @@ """ Test for issue 169: -https://github.com/ydataai/ydata-profiling/issues/169 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/169 """ from io import StringIO import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/issues/test_issue200.py b/tests/issues/test_issue200.py index 7b7c3384e..daa941669 100644 --- a/tests/issues/test_issue200.py +++ b/tests/issues/test_issue200.py @@ -1,10 +1,10 @@ """ Test for issue 200: -https://github.com/ydataai/ydata-profiling/issues/200 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/200 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue200(): diff --git a/tests/issues/test_issue215.py b/tests/issues/test_issue215.py index ea9452412..9a28040a9 100644 --- a/tests/issues/test_issue215.py +++ b/tests/issues/test_issue215.py @@ -1,10 +1,10 @@ """ Test for issue 215: -https://github.com/ydataai/ydata-profiling/issues/215 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/215 """ import pytest -from ydata_profiling.report.formatters import fmt_percent +from data_profiling.report.formatters import fmt_percent @pytest.mark.parametrize( diff --git a/tests/issues/test_issue243.py b/tests/issues/test_issue243.py index 7bb92a5c5..9c47ddf02 100644 --- a/tests/issues/test_issue243.py +++ b/tests/issues/test_issue243.py @@ -1,10 +1,10 @@ """ Test for issue 243: -https://github.com/ydataai/ydata-profiling/issues/243 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/243 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue243(): diff --git a/tests/issues/test_issue249.py b/tests/issues/test_issue249.py index d58b6fae7..66ca5df84 100644 --- a/tests/issues/test_issue249.py +++ b/tests/issues/test_issue249.py @@ -1,10 +1,10 @@ """ Test for issue 249: -https://github.com/ydataai/ydata-profiling/issues/249 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/249 """ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue249(): diff --git a/tests/issues/test_issue282.py b/tests/issues/test_issue282.py index afea95abc..a62de7d5d 100644 --- a/tests/issues/test_issue282.py +++ b/tests/issues/test_issue282.py @@ -1,11 +1,11 @@ """ Test for issue 282: -https://github.com/ydataai/ydata-profiling/issues/282 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/282 """ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.model import BaseDescription +from data_profiling import ProfileReport +from data_profiling.model import BaseDescription def test_issue282(): diff --git a/tests/issues/test_issue353.py b/tests/issues/test_issue353.py index 9fef11608..436aaa3b0 100644 --- a/tests/issues/test_issue353.py +++ b/tests/issues/test_issue353.py @@ -1,11 +1,11 @@ """ Test for issue 353: -https://github.com/ydataai/ydata-profiling/issues/353 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/353 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue353(): diff --git a/tests/issues/test_issue377.py b/tests/issues/test_issue377.py index 257bcf496..fcef8e226 100644 --- a/tests/issues/test_issue377.py +++ b/tests/issues/test_issue377.py @@ -1,6 +1,6 @@ """ Test for issue 377: -https://github.com/ydataai/ydata-profiling/issues/377 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/377 """ import sys import zipfile @@ -9,8 +9,8 @@ import pytest import requests -from ydata_profiling import ProfileReport -from ydata_profiling.utils.cache import cache_zipped_file +from data_profiling import ProfileReport +from data_profiling.utils.cache import cache_zipped_file @pytest.fixture() diff --git a/tests/issues/test_issue388.py b/tests/issues/test_issue388.py index 08d6c39f1..9e50487b3 100644 --- a/tests/issues/test_issue388.py +++ b/tests/issues/test_issue388.py @@ -1,10 +1,10 @@ """ Test for issue 388: -https://github.com/ydataai/ydata-profiling/issues/388 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/388 """ import pytest -from ydata_profiling.controller import console +from data_profiling.controller import console def test_issue388(get_data_file, test_output_dir): diff --git a/tests/issues/test_issue397.py b/tests/issues/test_issue397.py index 2b93c393a..0c4db068b 100644 --- a/tests/issues/test_issue397.py +++ b/tests/issues/test_issue397.py @@ -1,11 +1,11 @@ """ Test for issue 397 (actually a PR, but ok): -https://github.com/ydataai/ydata-profiling/pull/397 +https://github.com/Data-Centric-AI-Community/data-profiling/pull/397 """ import numpy as np import pandas as pd -import ydata_profiling +import data_profiling def test_issue397(): @@ -20,7 +20,7 @@ def test_issue397(): } ) - report = ydata_profiling.ProfileReport( + report = data_profiling.ProfileReport( df, vars={"num": {"low_categorical_threshold": 0}} ) assert report.config.vars.num.low_categorical_threshold == 0 diff --git a/tests/issues/test_issue416.py b/tests/issues/test_issue416.py index 36651102f..3695a1afb 100644 --- a/tests/issues/test_issue416.py +++ b/tests/issues/test_issue416.py @@ -1,11 +1,11 @@ """ Test for issue 416: -https://github.com/ydataai/ydata-profiling/issues/416 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/416 """ import pandas as pd -import ydata_profiling -from ydata_profiling.utils.cache import cache_file +import data_profiling +from data_profiling.utils.cache import cache_file def test_issue416(): @@ -17,7 +17,7 @@ def test_issue416(): df = pd.read_csv(file_name, sep="\t") df["path"] = df["url"].str.replace("http://www.acme.com", "", regex=False) - profile = ydata_profiling.ProfileReport( + profile = data_profiling.ProfileReport( df, title="YData Profiling Report", html={"style": {"full_width": True}}, diff --git a/tests/issues/test_issue437.py b/tests/issues/test_issue437.py index c4fc994cf..25b2a8634 100644 --- a/tests/issues/test_issue437.py +++ b/tests/issues/test_issue437.py @@ -1,11 +1,11 @@ """ Test for issue 437: -https://github.com/ydataai/ydata-profiling/issues/437 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/437 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue437(): diff --git a/tests/issues/test_issue502.py b/tests/issues/test_issue502.py index 09555305b..9eb4b1dc4 100644 --- a/tests/issues/test_issue502.py +++ b/tests/issues/test_issue502.py @@ -1,10 +1,10 @@ """ Test for issue 502: -https://github.com/ydataai/ydata-profiling/issues/502 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/502 """ import pandas as pd -from ydata_profiling.model.summary import describe_1d +from data_profiling.model.summary import describe_1d def test_issue502(config, summarizer, typeset): diff --git a/tests/issues/test_issue51.py b/tests/issues/test_issue51.py index 7e6fdcab3..a147b7f0d 100644 --- a/tests/issues/test_issue51.py +++ b/tests/issues/test_issue51.py @@ -1,11 +1,11 @@ """ Test for issue 51: -https://github.com/ydataai/ydata-profiling/issues/51 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/51 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue51(get_data_file): diff --git a/tests/issues/test_issue523.py b/tests/issues/test_issue523.py index a97b277a7..e5126aaf8 100644 --- a/tests/issues/test_issue523.py +++ b/tests/issues/test_issue523.py @@ -1,12 +1,12 @@ """ Test for issue 523: -https://github.com/ydataai/ydata-profiling/issues/XXX +https://github.com/Data-Centric-AI-Community/data-profiling/issues/523 """ from dataclasses import asdict import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue523(): diff --git a/tests/issues/test_issue537.py b/tests/issues/test_issue537.py index a62b4a513..affc7d8bd 100644 --- a/tests/issues/test_issue537.py +++ b/tests/issues/test_issue537.py @@ -8,7 +8,7 @@ import pytest import requests -from ydata_profiling.model.summary import describe_1d +from data_profiling.model.summary import describe_1d def mock_multiprocess_1d(args, config, summarizer, typeset) -> Tuple[str, dict]: diff --git a/tests/issues/test_issue545.py b/tests/issues/test_issue545.py index f63db309b..04825bc82 100644 --- a/tests/issues/test_issue545.py +++ b/tests/issues/test_issue545.py @@ -1,6 +1,6 @@ """ Test for issue 545: -https://github.com/ydataai/ydata-profiling/issues/545 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/545 """ from pathlib import Path @@ -8,8 +8,8 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport -from ydata_profiling.utils.compat import pandas_version_info +from data_profiling import ProfileReport +from data_profiling.utils.compat import pandas_version_info @pytest.mark.skipif( diff --git a/tests/issues/test_issue613.py b/tests/issues/test_issue613.py index a1e590c09..4f26fd2ea 100644 --- a/tests/issues/test_issue613.py +++ b/tests/issues/test_issue613.py @@ -1,11 +1,11 @@ """ Test for issue 613: -https://github.com/ydataai/ydata-profiling/issues/613 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/613 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue613(): diff --git a/tests/issues/test_issue664.py b/tests/issues/test_issue664.py index 6719086da..c2eddd6d9 100644 --- a/tests/issues/test_issue664.py +++ b/tests/issues/test_issue664.py @@ -1,11 +1,11 @@ """ Test for issue 664: -https://github.com/ydataai/ydata-profiling/issues/664 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/664 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue664(): diff --git a/tests/issues/test_issue671.py b/tests/issues/test_issue671.py index a8e03bce1..1bbbf453d 100644 --- a/tests/issues/test_issue671.py +++ b/tests/issues/test_issue671.py @@ -1,11 +1,11 @@ """ Test for issue 671: -https://github.com/ydataai/ydata-profiling/issues/671 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/671 """ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue671(): diff --git a/tests/issues/test_issue72.py b/tests/issues/test_issue72.py index 5c8bd19d3..89efc4ec4 100644 --- a/tests/issues/test_issue72.py +++ b/tests/issues/test_issue72.py @@ -1,18 +1,18 @@ """ Test for issue 72: -https://github.com/ydataai/ydata-profiling/issues/72 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/72 """ import numpy as np import pandas as pd -import ydata_profiling +import data_profiling def test_issue72_higher(): # Showcase (and test) different ways of interfacing with config/profiling report df = pd.DataFrame({"A": [1, 2, 3, 3]}) df["B"] = df["A"].apply(str) - report = ydata_profiling.ProfileReport(df, correlations=None) + report = data_profiling.ProfileReport(df, correlations=None) report.config.vars.num.low_categorical_threshold = 2 # 3 > 2, so numerical assert report.get_description().variables["A"]["type"] == "Numeric" @@ -23,7 +23,7 @@ def test_issue72_higher(): def test_issue72_equal(): df = pd.DataFrame({"A": [1, 2, 3, 3]}) df["B"] = df["A"].apply(str) - report = ydata_profiling.ProfileReport( + report = data_profiling.ProfileReport( df, vars={"num": {"low_categorical_threshold": 3}}, correlations=None, diff --git a/tests/issues/test_issue824.py b/tests/issues/test_issue824.py index 7bbb78833..f945834f8 100644 --- a/tests/issues/test_issue824.py +++ b/tests/issues/test_issue824.py @@ -1,6 +1,6 @@ """ Test for issue 824: -https://github.com/ydataai/ydata-profiling/issues/824 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/824 High correlation warning printed multiple times @@ -13,8 +13,8 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport -from ydata_profiling.model.alerts import AlertType +from data_profiling import ProfileReport +from data_profiling.model.alerts import AlertType @pytest.mark.skip() diff --git a/tests/issues/test_issue864.py b/tests/issues/test_issue864.py index 6c0acac0f..bc74bb906 100644 --- a/tests/issues/test_issue864.py +++ b/tests/issues/test_issue864.py @@ -1,6 +1,6 @@ """ Test for issue 864: -https://github.com/ydataai/ydata-profiling/issues/ +https://github.com/Data-Centric-AI-Community/data-profiling/issues/864 Validate Extreme Values sub-tabs state the correct number of extreme values shown. """ @@ -8,7 +8,7 @@ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue864(): diff --git a/tests/issues/test_issue915.py b/tests/issues/test_issue915.py index 310d488e0..400408f01 100644 --- a/tests/issues/test_issue915.py +++ b/tests/issues/test_issue915.py @@ -5,7 +5,7 @@ """ import pandas as pd -from pandas_profiling import ProfileReport +from data_profiling import ProfileReport def test_issue915(): diff --git a/tests/issues/test_issue94.py b/tests/issues/test_issue94.py index 38cd6a53b..ebb8f76ce 100644 --- a/tests/issues/test_issue94.py +++ b/tests/issues/test_issue94.py @@ -1,6 +1,6 @@ """ Test for issue 94: -https://github.com/ydataai/ydata-profiling/issues/94 +https://github.com/Data-Centric-AI-Community/data-profiling/issues/94 Test based on: https://stackoverflow.com/questions/52926527/pandas-profiling-1-4-1-throws-zerodivisionerror-for-valid-data-set-which-pandas @@ -9,7 +9,7 @@ import pandas as pd -import ydata_profiling +import data_profiling def test_issue94(tmpdir): @@ -28,5 +28,5 @@ def test_issue94(tmpdir): CHEM""" ) df = pd.read_csv(str(file_path), parse_dates=True) - profile = ydata_profiling.ProfileReport(df, title="YData Profiling Report") + profile = data_profiling.ProfileReport(df, title="YData Profiling Report") assert "YData Profiling Report" in profile.to_html() diff --git a/tests/issues/test_issueXXX.py b/tests/issues/test_issueXXX.py index 6bd6b5678..054db8bea 100644 --- a/tests/issues/test_issueXXX.py +++ b/tests/issues/test_issueXXX.py @@ -1,11 +1,11 @@ """ Test for issue XXX: -https://github.com/ydataai/ydata-profiling/issues/XXX +https://github.com/Data-Centric-AI-Community/data-profiling/issues/XXX """ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.mark.skip() diff --git a/tests/notebooks/lazy_pipeline.ipynb b/tests/notebooks/lazy_pipeline.ipynb index 381173414..d087437da 100644 --- a/tests/notebooks/lazy_pipeline.ipynb +++ b/tests/notebooks/lazy_pipeline.ipynb @@ -20,8 +20,8 @@ "from IPython.utils.capture import capture_output\n", "\n", "# Our package\n", - "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from data_profiling import ProfileReport\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/tests/notebooks/meteorites.ipynb b/tests/notebooks/meteorites.ipynb index a2e4c8ce9..0e4b57562 100644 --- a/tests/notebooks/meteorites.ipynb +++ b/tests/notebooks/meteorites.ipynb @@ -21,8 +21,8 @@ "from IPython.display import display\n", "from IPython.utils.capture import capture_output\n", "\n", - "import ydata_profiling\n", - "from ydata_profiling.utils.cache import cache_file" + "import data_profiling\n", + "from data_profiling.utils.cache import cache_file" ] }, { diff --git a/tests/notebooks/titanic.ipynb b/tests/notebooks/titanic.ipynb index 768759214..cba22dca0 100644 --- a/tests/notebooks/titanic.ipynb +++ b/tests/notebooks/titanic.ipynb @@ -20,8 +20,8 @@ "from IPython.utils.capture import capture_output\n", "from ipywidgets import widgets\n", "\n", - "from ydata_profiling import ProfileReport\n", - "from ydata_profiling.utils.cache import cache_file" + "from data_profiling import ProfileReport\n", + "from data_profiling.utils.cache import cache_file" ] }, { @@ -94,7 +94,7 @@ "# assert len(out.outputs) == 2\n", "# assert out.outputs[0].data['text/plain'].startswith('Tab(children=(HTML(value=')\n", "# assert out.outputs[1].data['text/plain'] == ''\n", - "# assert 'ydata-profiling' in out.outputs[1].data['text/html']" + "# assert 'data-profiling' in out.outputs[1].data['text/html']" ] }, { diff --git a/tests/unit/test_comparison.py b/tests/unit/test_comparison.py index 748c5af12..e95235ce6 100644 --- a/tests/unit/test_comparison.py +++ b/tests/unit/test_comparison.py @@ -2,8 +2,8 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport, compare -from ydata_profiling.compare_reports import _compare_title +from data_profiling import ProfileReport, compare +from data_profiling.compare_reports import _compare_title @pytest.fixture() diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index ce007f57f..058cccf9d 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,4 +1,4 @@ -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_set_variable(): diff --git a/tests/unit/test_console.py b/tests/unit/test_console.py index 89b14fde6..34616b9f4 100644 --- a/tests/unit/test_console.py +++ b/tests/unit/test_console.py @@ -3,8 +3,8 @@ import pytest import requests -from ydata_profiling.controller import console -from ydata_profiling.utils.paths import get_config +from data_profiling.controller import console +from data_profiling.utils.paths import get_config NASA_URL = "https://data.nasa.gov/docs/legacy/meteorite_landings/Meteorite_Landings.csv" diff --git a/tests/unit/test_correlations.py b/tests/unit/test_correlations.py index a9ba6174c..a432cd6f9 100644 --- a/tests/unit/test_correlations.py +++ b/tests/unit/test_correlations.py @@ -2,9 +2,9 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport -from ydata_profiling.report.presentation.core import CorrelationTable, Image -from ydata_profiling.report.structure.correlations import get_correlation_items +from data_profiling import ProfileReport +from data_profiling.report.presentation.core import CorrelationTable, Image +from data_profiling.report.structure.correlations import get_correlation_items @pytest.mark.skip diff --git a/tests/unit/test_custom_sample.py b/tests/unit/test_custom_sample.py index b517ab3df..c58355dfa 100644 --- a/tests/unit/test_custom_sample.py +++ b/tests/unit/test_custom_sample.py @@ -1,7 +1,7 @@ import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.utils.dataframe import hash_dataframe +from data_profiling import ProfileReport +from data_profiling.utils.dataframe import hash_dataframe def test_custom_sample(): diff --git a/tests/unit/test_dataset_schema.py b/tests/unit/test_dataset_schema.py index 57ba29de9..a5798eee3 100644 --- a/tests/unit/test_dataset_schema.py +++ b/tests/unit/test_dataset_schema.py @@ -1,6 +1,6 @@ import pandas as pd -from ydata_profiling.utils.cache import cache_file +from data_profiling.utils.cache import cache_file def test_dataset_schema(): diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index d17ac63c1..ba63e2955 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -1,6 +1,6 @@ import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_decorator(get_data_file): diff --git a/tests/unit/test_describe.py b/tests/unit/test_describe.py index cbae1bda6..feea58975 100644 --- a/tests/unit/test_describe.py +++ b/tests/unit/test_describe.py @@ -5,12 +5,12 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.describe import describe -from ydata_profiling.model.summary import describe_1d -from ydata_profiling.model.typeset import ProfilingTypeSet +from data_profiling.config import Settings +from data_profiling.model.describe import describe +from data_profiling.model.summary import describe_1d +from data_profiling.model.typeset import ProfilingTypeSet -check_is_NaN = "ydata_profiling.check_is_NaN" +check_is_NaN = "data_profiling.check_is_NaN" testdata = [ diff --git a/tests/unit/test_duplicates.py b/tests/unit/test_duplicates.py index dce8e587e..164f1efb4 100644 --- a/tests/unit/test_duplicates.py +++ b/tests/unit/test_duplicates.py @@ -3,7 +3,7 @@ import pandas as pd import pytest -from ydata_profiling.model.duplicates import get_duplicates +from data_profiling.model.duplicates import get_duplicates @pytest.fixture(scope="module") diff --git a/tests/unit/test_example.py b/tests/unit/test_example.py index cf5cd8d2a..1f57e370a 100644 --- a/tests/unit/test_example.py +++ b/tests/unit/test_example.py @@ -1,8 +1,8 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport -from ydata_profiling.model import BaseDescription +from data_profiling import ProfileReport +from data_profiling.model import BaseDescription def test_example(get_data_file, test_output_dir): diff --git a/tests/unit/test_formatters.py b/tests/unit/test_formatters.py index f49e693d8..f1b3026a9 100644 --- a/tests/unit/test_formatters.py +++ b/tests/unit/test_formatters.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from ydata_profiling.report.formatters import ( +from data_profiling.report.formatters import ( fmt_array, fmt_bytesize, fmt_class, diff --git a/tests/unit/test_ge_integration.py b/tests/unit/test_ge_integration.py index 80d7329f9..bc484deae 100644 --- a/tests/unit/test_ge_integration.py +++ b/tests/unit/test_ge_integration.py @@ -4,7 +4,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/unit/test_ge_integration_expectations.py b/tests/unit/test_ge_integration_expectations.py index 4ef0b1465..72fd0543a 100644 --- a/tests/unit/test_ge_integration_expectations.py +++ b/tests/unit/test_ge_integration_expectations.py @@ -2,7 +2,7 @@ import pytest -from ydata_profiling.model.expectation_algorithms import ( +from data_profiling.model.expectation_algorithms import ( categorical_expectations, datetime_expectations, file_expectations, diff --git a/tests/unit/test_html_export.py b/tests/unit/test_html_export.py index a5d7fd4a3..24af17cf5 100644 --- a/tests/unit/test_html_export.py +++ b/tests/unit/test_html_export.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def check_assets(assets_dir: Path, file_type, n_css, n_js): diff --git a/tests/unit/test_index_column_name_clash.py b/tests/unit/test_index_column_name_clash.py index 0b149a3f6..cdc4818d5 100644 --- a/tests/unit/test_index_column_name_clash.py +++ b/tests/unit/test_index_column_name_clash.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture() diff --git a/tests/unit/test_interactions.py b/tests/unit/test_interactions.py index c774e8c6f..c1c71294f 100644 --- a/tests/unit/test_interactions.py +++ b/tests/unit/test_interactions.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_interactions_target(): diff --git a/tests/unit/test_modular.py b/tests/unit/test_modular.py index 3d5f9e4fd..1ec45e283 100644 --- a/tests/unit/test_modular.py +++ b/tests/unit/test_modular.py @@ -4,7 +4,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/unit/test_multiindex_columns.py b/tests/unit/test_multiindex_columns.py index 3d9558968..ab06776fc 100644 --- a/tests/unit/test_multiindex_columns.py +++ b/tests/unit/test_multiindex_columns.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture() diff --git a/tests/unit/test_output.py b/tests/unit/test_output.py index 5af538e60..155785d21 100644 --- a/tests/unit/test_output.py +++ b/tests/unit/test_output.py @@ -3,7 +3,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/unit/test_pandas/test_compat.py b/tests/unit/test_pandas/test_compat.py index b0dd94d39..37c47c403 100644 --- a/tests/unit/test_pandas/test_compat.py +++ b/tests/unit/test_pandas/test_compat.py @@ -2,7 +2,7 @@ import pandas as pd -from ydata_profiling.utils.compat import optional_option_context, pandas_version_info +from data_profiling.utils.compat import optional_option_context, pandas_version_info class TestCompatUtils(unittest.TestCase): diff --git a/tests/unit/test_pandas/test_correlations.py b/tests/unit/test_pandas/test_correlations.py index ba396d0c7..a5813edf2 100644 --- a/tests/unit/test_pandas/test_correlations.py +++ b/tests/unit/test_pandas/test_correlations.py @@ -2,14 +2,14 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.pandas.correlations_pandas import ( +from data_profiling.config import Settings +from data_profiling.model.pandas.correlations_pandas import ( auto_compute as pandas_auto_compute, ) -from ydata_profiling.model.pandas.correlations_pandas import ( +from data_profiling.model.pandas.correlations_pandas import ( cramers_compute as pandas_cramers_compute, ) -from ydata_profiling.model.pandas.correlations_pandas import ( +from data_profiling.model.pandas.correlations_pandas import ( spearman_compute as pandas_spearman_compute, ) diff --git a/tests/unit/test_pandas/test_dataframe_empty.py b/tests/unit/test_pandas/test_dataframe_empty.py index d5e9cebb5..397b5a9e8 100644 --- a/tests/unit/test_pandas/test_dataframe_empty.py +++ b/tests/unit/test_pandas/test_dataframe_empty.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.mark.parametrize( diff --git a/tests/unit/test_pandas/test_describe_categorical_pandas.py b/tests/unit/test_pandas/test_describe_categorical_pandas.py index 4cb7b12f6..d3456356c 100644 --- a/tests/unit/test_pandas/test_describe_categorical_pandas.py +++ b/tests/unit/test_pandas/test_describe_categorical_pandas.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling.model.pandas.describe_categorical_pandas import word_summary_vc +from data_profiling.model.pandas.describe_categorical_pandas import word_summary_vc value_counts_w_words = pd.Series(index=["The dog", "is hungry"], data=[2, 1]) diff --git a/tests/unit/test_pandas/test_discretize.py b/tests/unit/test_pandas/test_discretize.py index 18515ec64..0965edbbe 100644 --- a/tests/unit/test_pandas/test_discretize.py +++ b/tests/unit/test_pandas/test_discretize.py @@ -1,6 +1,6 @@ import pandas as pd -from ydata_profiling.model.pandas.discretize_pandas import ( +from data_profiling.model.pandas.discretize_pandas import ( DiscretizationType, Discretizer, ) diff --git a/tests/unit/test_pandas/test_imbalance.py b/tests/unit/test_pandas/test_imbalance.py index d00500ac6..c3586970c 100644 --- a/tests/unit/test_pandas/test_imbalance.py +++ b/tests/unit/test_pandas/test_imbalance.py @@ -1,6 +1,6 @@ import pandas as pd -from ydata_profiling.model.pandas.imbalance_pandas import column_imbalance_score +from data_profiling.model.pandas.imbalance_pandas import column_imbalance_score def test_column_imbalance_score_many_classes(): diff --git a/tests/unit/test_pd_future_infer_string.py b/tests/unit/test_pd_future_infer_string.py index aa37d121b..5eb120fa9 100644 --- a/tests/unit/test_pd_future_infer_string.py +++ b/tests/unit/test_pd_future_infer_string.py @@ -1,8 +1,8 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport -from ydata_profiling.utils.compat import pandas_version_info +from data_profiling import ProfileReport +from data_profiling.utils.compat import pandas_version_info @pytest.fixture() diff --git a/tests/unit/test_plot.py b/tests/unit/test_plot.py index 07b74cbd7..8748a4ca7 100644 --- a/tests/unit/test_plot.py +++ b/tests/unit/test_plot.py @@ -4,7 +4,7 @@ from matplotlib.legend import Legend from matplotlib.pyplot import Axes, close, rcParams -from ydata_profiling.visualisation.plot import ( +from data_profiling.visualisation.plot import ( _create_timeseries_heatmap, _plot_pie_chart, _plot_stacked_barh, diff --git a/tests/unit/test_renderable.py b/tests/unit/test_renderable.py index 50326020b..0d6b40c19 100644 --- a/tests/unit/test_renderable.py +++ b/tests/unit/test_renderable.py @@ -1,8 +1,8 @@ -from ydata_profiling.report.presentation.flavours.html.frequency_table import ( +from data_profiling.report.presentation.flavours.html.frequency_table import ( HTMLFrequencyTable, ) -from ydata_profiling.report.presentation.flavours.html.html import HTMLHTML -from ydata_profiling.report.presentation.flavours.html.image import HTMLImage +from data_profiling.report.presentation.flavours.html.html import HTMLHTML +from data_profiling.report.presentation.flavours.html.image import HTMLImage def test_html_frequency_table(): diff --git a/tests/unit/test_report_options.py b/tests/unit/test_report_options.py index 23b05e236..5090d3287 100644 --- a/tests/unit/test_report_options.py +++ b/tests/unit/test_report_options.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport # Enhanced fixture with more diverse data types diff --git a/tests/unit/test_sensitive.py b/tests/unit/test_sensitive.py index 8587a7217..dcc4d64a3 100644 --- a/tests/unit/test_sensitive.py +++ b/tests/unit/test_sensitive.py @@ -3,7 +3,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture() diff --git a/tests/unit/test_serialize.py b/tests/unit/test_serialize.py index d8a569eaf..d507badc4 100644 --- a/tests/unit/test_serialize.py +++ b/tests/unit/test_serialize.py @@ -2,8 +2,8 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport -from ydata_profiling.model import BaseDescription +from data_profiling import ProfileReport +from data_profiling.model import BaseDescription def test_load(get_data_file, test_output_dir): diff --git a/tests/unit/test_sort.py b/tests/unit/test_sort.py index 7a17c2b31..68edd4f69 100644 --- a/tests/unit/test_sort.py +++ b/tests/unit/test_sort.py @@ -1,6 +1,6 @@ import pytest -from ydata_profiling.utils.dataframe import sort_column_names +from data_profiling.utils.dataframe import sort_column_names @pytest.fixture() diff --git a/tests/unit/test_summarizer.py b/tests/unit/test_summarizer.py index f8d374216..5ecb560ed 100644 --- a/tests/unit/test_summarizer.py +++ b/tests/unit/test_summarizer.py @@ -2,8 +2,8 @@ import pandas as pd -from ydata_profiling.model.summarizer import ProfilingSummarizer, format_summary -from ydata_profiling.model.typeset import ProfilingTypeSet +from data_profiling.model.summarizer import ProfilingSummarizer, format_summary +from data_profiling.model.typeset import ProfilingTypeSet base_path = os.path.abspath(os.path.dirname(__file__)) @@ -33,7 +33,7 @@ def test_summarizer(config): [ os.path.abspath( base_path - + r"../../../src/ydata_profiling/model/typeset_does_not_exist.py" + + r"../../../src/data_profiling/model/typeset_does_not_exist.py" ) ] ), @@ -46,7 +46,7 @@ def test_summarizer(config): pd.Series( [ os.path.abspath( - base_path + r"../../../src/ydata_profiling/model/typeset.py" + base_path + r"../../../src/data_profiling/model/typeset.py" ) ] ), diff --git a/tests/unit/test_summary.py b/tests/unit/test_summary.py index 874600064..80d7e66cc 100644 --- a/tests/unit/test_summary.py +++ b/tests/unit/test_summary.py @@ -1,6 +1,6 @@ import pandas as pd -from ydata_profiling.model.table import get_table_stats +from data_profiling.model.table import get_table_stats def test_get_table_stats_empty_df(config): diff --git a/tests/unit/test_summary_algos.py b/tests/unit/test_summary_algos.py index 523ce5fcd..fafbf5a8a 100644 --- a/tests/unit/test_summary_algos.py +++ b/tests/unit/test_summary_algos.py @@ -2,8 +2,8 @@ import pandas as pd import pytest -from ydata_profiling.config import Settings -from ydata_profiling.model.summary_algorithms import ( +from data_profiling.config import Settings +from data_profiling.model.summary_algorithms import ( describe_counts, describe_generic, describe_supported, diff --git a/tests/unit/test_templates.py b/tests/unit/test_templates.py index cf8a20f3a..d9a3c6737 100644 --- a/tests/unit/test_templates.py +++ b/tests/unit/test_templates.py @@ -1,5 +1,5 @@ -from ydata_profiling.report.presentation.flavours.html import HTMLHTML -from ydata_profiling.report.presentation.flavours.html.templates import template +from data_profiling.report.presentation.flavours.html import HTMLHTML +from data_profiling.report.presentation.flavours.html.templates import template def test_report_title_strip_tags(): diff --git a/tests/unit/test_time_series.py b/tests/unit/test_time_series.py index 25549bf1d..b28a04703 100644 --- a/tests/unit/test_time_series.py +++ b/tests/unit/test_time_series.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport @pytest.fixture diff --git a/tests/unit/test_typeset_custom.py b/tests/unit/test_typeset_custom.py index 2685d3237..2da324338 100644 --- a/tests/unit/test_typeset_custom.py +++ b/tests/unit/test_typeset_custom.py @@ -13,8 +13,8 @@ ) from tests.unit.test_utils import patch_arg -from ydata_profiling.config import Settings -from ydata_profiling.model.typeset import ProfilingTypeSet +from data_profiling.config import Settings +from data_profiling.model.typeset import ProfilingTypeSet def get_profiling_series(): diff --git a/tests/unit/test_typeset_default.py b/tests/unit/test_typeset_default.py index d93d61cb0..db958fcff 100644 --- a/tests/unit/test_typeset_default.py +++ b/tests/unit/test_typeset_default.py @@ -15,9 +15,9 @@ ) from tests.unit.test_utils import patch_arg -from ydata_profiling.config import Settings -from ydata_profiling.model.typeset import ProfilingTypeSet -from ydata_profiling.profile_report import ProfileReport +from data_profiling.config import Settings +from data_profiling.model.typeset import ProfilingTypeSet +from data_profiling.profile_report import ProfileReport def get_sequences() -> Dict[str, Sequence]: diff --git a/tests/unit/test_url.py b/tests/unit/test_url.py index 9caa66cfa..8710b5912 100644 --- a/tests/unit/test_url.py +++ b/tests/unit/test_url.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -from ydata_profiling import ProfileReport +from data_profiling import ProfileReport def test_urls(get_data_file): diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ca7b2b80c..a37cad8b7 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,8 +3,8 @@ import pandas as pd import pytest -from ydata_profiling.utils.compat import pandas_version_info -from ydata_profiling.utils.dataframe import ( +from data_profiling.utils.compat import pandas_version_info +from data_profiling.utils.dataframe import ( expand_mixed, read_pandas, uncompressed_extension,