diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..51968fc0 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 88 +extend-ignore = E203, W503 +exclude = + docs \ No newline at end of file diff --git a/.github/workflows/test-bench.yml b/.github/workflows/test-bench.yml index 37382d48..3db0f64f 100644 --- a/.github/workflows/test-bench.yml +++ b/.github/workflows/test-bench.yml @@ -3,22 +3,61 @@ name: test-bench on: push: branches: - - develop - - main + - main + pull_request: + branches: + - main jobs: - test-bench-job: + test-bench-matrix-job: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + include: + - python-version: 3.8 + allow-failure: false + - python-version: 3.9 + allow-failure: false + - python-version: 3.10 + allow-failure: false + - python-version: 3.11 + allow-failure: false + - python-version: 3.12 + allow-failure: false + - python-version: 3.13 + allow-failure: false + continue-on-error: ${{ matrix.allow-failure }} + name: Python ${{ matrix.python-version }} bench tests + steps: - name: Check out repository code uses: actions/checkout@v4 + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install -e .[dev] + - name: Run benchmarks run: | python -m unittest discover -s tests -p 'test_bench_*.py' + + test-bench-job: + needs: test-bench-matrix-job + runs-on: ubuntu-latest + steps: + - name: test-bench-job + run: echo "All bench tests passed." diff --git a/.github/workflows/test-unit.yml b/.github/workflows/test-unit.yml index 802c4c89..c3f6bb17 100644 --- a/.github/workflows/test-unit.yml +++ b/.github/workflows/test-unit.yml @@ -2,28 +2,68 @@ name: test-unit on: push: - branches: - - develop - - main + branches: + - '**' + pull_request: + branches: + - '**' jobs: - test-unit-job: - runs-on: ubuntu-latest + test-unit-matrix-job: + runs-on: ubuntu-22.04 + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + include: + - python-version: 3.8 + allow-failure: false + - python-version: 3.9 + allow-failure: false + - python-version: 3.10 + allow-failure: false + - python-version: 3.11 + allow-failure: false + - python-version: 3.12 + allow-failure: false + - python-version: 3.13 + allow-failure: false + continue-on-error: ${{ matrix.allow-failure }} + name: Python ${{ matrix.python-version }} unit tests + steps: - name: Check out repository code uses: actions/checkout@v4 + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install -e .[dev] + - name: Run tests and code coverage run: | coverage run --source=src -m unittest discover -s tests -p 'test_unit_*.py' coverage report -m + - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + test-unit-job: + needs: test-unit-matrix-job + runs-on: ubuntu-latest + steps: + - name: test-unit-job + run: echo "All unit tests passed." diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e56d8e3a..15651582 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,6 +30,9 @@ structures are welcome but must first be discussed in an issue, especially when it comes to performance critical parts. Please include supporting theoretical or experimental evidence for such contributions. +NB: Contributions from bots or fully automated AI agents are discouraged and +will be discarded. + Follow these steps to contribute: 1. **Fork the Repository**. @@ -108,12 +111,21 @@ removed whenever required by a new jump in major version. ### Code Style We follow [PEP 8](https://peps.python.org/pep-0008/) for Python code style. -You can run a linter to check your code: +You can run a linter to check your code. The dev dependencies of **tda-mapper** +include `flake8`, `black` and `isort` to help you manage code style. ```bash flake8 ``` +```bash +black . +``` + +```bash +isort . +``` + ### Documentation Ensure that new features and APIs are documented in the code and that @@ -123,6 +135,7 @@ To build the documentation locally: ```bash cd docs +make clean make html ``` diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 1947245f..eb8320c7 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -74,14 +74,14 @@ def run_gm(X, n, p): cover=gm.CubicalCover(n_intervals=n, overlap_frac=p), clusterer=TrivialEstimator(), ) - mapper_graph = pipe.fit_transform(X) + pipe.fit_transform(X) t1 = time.time() return t1 - t0 def run_tm(X, n, p): t0 = time.time() - mapper_graph = tm.core.MapperAlgorithm( + tm.core.MapperAlgorithm( cover=tm.cover.CubicalCover( n_intervals=n, overlap_frac=p, @@ -99,7 +99,7 @@ def run_tm(X, n, p): def run_km(X, n, p): t0 = time.time() mapper = km.KeplerMapper(verbose=0) - graph = mapper.map( + mapper.map( lens=X, X=X, cover=km.Cover(n_cubes=n, perc_overlap=p), diff --git a/docs/source/notebooks/digits.py b/docs/source/notebooks/digits.py index 70b3d75e..54191ec1 100644 --- a/docs/source/notebooks/digits.py +++ b/docs/source/notebooks/digits.py @@ -44,7 +44,6 @@ from sklearn.datasets import load_digits from sklearn.decomposition import PCA -from tdamapper.clustering import FailSafeClustering from tdamapper.cover import CubicalCover from tdamapper.learn import MapperAlgorithm from tdamapper.plot import MapperPlot diff --git a/pyproject.toml b/pyproject.toml index 2d6fd0f7..386a7c2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,12 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ] keywords = ["tda", "mapper", "topology", "topological data analysis"] @@ -28,21 +34,21 @@ dependencies = [ "matplotlib>=3.3.4,<4.0.0", "networkx>=2.5,<3.0.0", "igraph>=0.11.8,<0.12.0", - "numba>=0.54,<0.60", + "numba>=0.54,<0.62", "pyvis>=0.3.2,<0.4.0", "plotly>=6.0.1,<7.0.0", "joblib>=1.4.2,<2.0.0", ] -requires-python = ">=3.6" +requires-python = ">=3.8" [project.optional-dependencies] dev = [ "coverage[toml]", "pandas", "scikit-learn<1.6.0", - "flake8", "black[jupyter]", - "isort" + "isort", + "flake8", ] [project.urls] @@ -80,4 +86,3 @@ include_trailing_comma = true force_grid_wrap = 0 combine_as_imports = true skip = ["venv", ".venv", "build", "dist"] - diff --git a/tests/test_bench_cover.py b/tests/test_bench_cover.py index e6c720ff..67bd8549 100644 --- a/tests/test_bench_cover.py +++ b/tests/test_bench_cover.py @@ -38,9 +38,8 @@ def cover(self, vpt, X, r): def run_bench(self, X, r, dist, vp, **kwargs): XX = np.array([[i] + [xi for xi in x] for i, x in enumerate(X)]) - d = lambda x, y: dist(x[1:], y[1:]) t0 = time.time() - vpt = vp(XX, metric=d, **kwargs) + vpt = vp(XX, metric=lambda x, y: dist(x[1:], y[1:]), **kwargs) list(self.cover(vpt, XX, r)) t1 = time.time() self.logger.info(f"time: {t1 - t0}") @@ -48,7 +47,7 @@ def run_bench(self, X, r, dist, vp, **kwargs): def test_cover_random(self): for r in [1.0, 10.0, 100.0]: for n in [100, 1000, 10000]: - self.logger.info(f"============ Cover Bench Random ==========") + self.logger.info("============ Cover Bench Random ==========") self.logger.info(f"[n: {n}, r: {r}]") X = dataset(num=n) self.logger.info(">>>>>>> HVPT >>>>>>") @@ -66,7 +65,7 @@ def test_cover_digits(self): X, _ = load_digits(return_X_y=True) # X = PCA(n_components=3).fit_transform(X) for r in [1.0, 10.0, 100.0]: - self.logger.info(f"======= Cover Bench Digits =======") + self.logger.info("======= Cover Bench Digits =======") self.logger.info(f"[r: {r}]") self.logger.info(">>>>>>> HVPT >>>>>>") self.run_bench(X, r, dist, HVPT, leaf_radius=r, pivoting="random") diff --git a/tests/test_bench_vptree.py b/tests/test_bench_vptree.py index e04e47ca..9f7f9530 100644 --- a/tests/test_bench_vptree.py +++ b/tests/test_bench_vptree.py @@ -77,14 +77,14 @@ def _test_ball_search_naive(self, data, name): d(np.array([0.0]), np.array([0.0])) # jit-compile numba t0 = time() for val in data: - neigh = [x for x in data if d(val, x) <= self.eps] + [x for x in data if d(val, x) <= self.eps] t1 = time() self.logger.info(f"{name}: {t1 - t0}") def _test_ball_search(self, data, name, vpt): t0 = time() for val in data: - neigh = vpt.ball_search(val, self.eps) + vpt.ball_search(val, self.eps) t1 = time() self.logger.info(f"{name}: {t1 - t0}") @@ -94,13 +94,13 @@ def _test_knn_search_naive(self, data, name): t0 = time() for val in data: data.sort(key=lambda x: d(x, val)) - neigh = [x for x in data[: self.k]] + [x for x in data[: self.k]] t1 = time() self.logger.info(f"{name}: {t1 - t0}") def _test_knn_search(self, data, name, vpt): t0 = time() for val in data: - neigh = vpt.knn_search(val, self.k) + vpt.knn_search(val, self.k) t1 = time() self.logger.info(f"{name}: {t1 - t0}") diff --git a/tests/test_unit_learn.py b/tests/test_unit_learn.py index efa24ce0..de9fc93e 100644 --- a/tests/test_unit_learn.py +++ b/tests/test_unit_learn.py @@ -2,15 +2,11 @@ import networkx as nx import numpy as np -from sklearn.cluster import DBSCAN from sklearn.utils.estimator_checks import check_estimator from tdamapper.core import TrivialClustering, TrivialCover from tdamapper.cover import BallCover -from tdamapper.learn import ( - MapperAlgorithm, - MapperClustering, -) +from tdamapper.learn import MapperAlgorithm, MapperClustering def euclidean(x, y): diff --git a/tests/test_unit_params.py b/tests/test_unit_params.py index f79437da..b89841d4 100644 --- a/tests/test_unit_params.py +++ b/tests/test_unit_params.py @@ -5,7 +5,7 @@ from tdamapper._common import clone from tdamapper.clustering import MapperClustering from tdamapper.core import MapperAlgorithm -from tdamapper.cover import BallCover, CubicalCover, KNNCover +from tdamapper.cover import BallCover, CubicalCover class TestParams(unittest.TestCase): diff --git a/tests/test_unit_plot.py b/tests/test_unit_plot.py index 79f81033..c9749eea 100644 --- a/tests/test_unit_plot.py +++ b/tests/test_unit_plot.py @@ -23,7 +23,7 @@ def test_two_connected_clusters(self): ) g = mp.fit_transform(data, data) mp_plot1 = MapperPlot(g, dim=2, seed=123, iterations=10) - fig1 = mp_plot1.plot_plotly( + mp_plot1.plot_plotly( colors=data, agg=np.nanmax, width=200, @@ -40,7 +40,7 @@ def test_two_connected_clusters(self): title="example", cmap="jet", ) - fig3 = mp_plot2.plot_plotly_update( + mp_plot2.plot_plotly_update( fig2, colors=data, agg=np.nanmin, @@ -50,7 +50,7 @@ def test_two_connected_clusters(self): cmap="viridis", ) mp_plot3 = MapperPlot(g, dim=2) - fig4 = mp_plot3.plot_matplotlib(width=300, height=300, colors=data) + mp_plot3.plot_matplotlib(width=300, height=300, colors=data) mp_plot3.plot_pyvis( width=512, height=512,