Skip to content

Commit f3c3513

Browse files
authored
Merge pull request #145 from lucasimi/develop
Develop
2 parents 49c0da8 + 6a2d9a3 commit f3c3513

File tree

19 files changed

+959
-443
lines changed

19 files changed

+959
-443
lines changed

.github/workflows/bench.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Check out repository code
14-
uses: actions/checkout@v2
14+
uses: actions/checkout@v4
1515
- name: Install Python
16-
uses: actions/setup-python@v4
16+
uses: actions/setup-python@v5
1717
with:
1818
python-version: '3.10'
1919
- name: Install dependencies

.github/workflows/publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
uses: actions/checkout@v4
1818

1919
- name: Install Python
20-
uses: actions/setup-python@v4
20+
uses: actions/setup-python@v5
2121
with:
2222
python-version: '3.10'
2323

.github/workflows/test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Check out repository code
14-
uses: actions/checkout@v2
14+
uses: actions/checkout@v4
1515
- name: Install Python
16-
uses: actions/setup-python@v4
16+
uses: actions/setup-python@v5
1717
with:
1818
python-version: '3.10'
1919
- name: Install dependencies
@@ -24,6 +24,6 @@ jobs:
2424
coverage run --source=src -m unittest discover -s tests -p 'test_unit_*.py'
2525
coverage report -m
2626
- name: Upload coverage reports to Codecov
27-
uses: codecov/codecov-action@v3
27+
uses: codecov/codecov-action@v4
2828
env:
2929
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
same "printed page" as the copyright notice for easier
187187
identification within third-party archives.
188188

189-
Copyright [yyyy] [name of copyright owner]
189+
Copyright 2020 Luca Simi
190190

191191
Licensed under the Apache License, Version 2.0 (the "License");
192192
you may not use this file except in compliance with the License.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ read
3232
This library contains an implementation of Mapper, where the construction
3333
of open covers is based on *vp-trees* for improved performance and scalability.
3434
The details about this methodology are contained in
35-
[our preprint](https://doi.org/10.5281/zenodo.10659652).
35+
[our preprint](https://doi.org/10.5281/zenodo.10659651).
3636

3737
| Step 1 | Step 2 | Step 3 | Step 4 |
3838
| ------ | ------ | ------ | ------ |

benchmarks/benchmark.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import time
2+
3+
import pandas as pd
4+
import numpy as np
5+
6+
from sklearn.decomposition import PCA
7+
from sklearn.datasets import fetch_openml, load_digits
8+
from sklearn.base import ClusterMixin
9+
10+
from tdamapper.clustering import TrivialClustering
11+
12+
import tdamapper as tm
13+
import gtda.mapper as gm
14+
import kmapper as km
15+
16+
17+
def _segment(cardinality, dimension, noise=0.1, start=None, end=None):
18+
if start is None:
19+
start = np.zeros(dimension)
20+
if end is None:
21+
end = np.ones(dimension)
22+
coefficients = np.random.rand(cardinality, 1)
23+
points = start + coefficients * (end - start)
24+
noise = np.random.normal(0, noise, size=(cardinality, dimension))
25+
return points + noise
26+
27+
28+
def _load_openml(name):
29+
XX, _ = fetch_openml(name=name, return_X_y=True)
30+
return XX.to_numpy()
31+
32+
33+
def line(k):
34+
return _segment(100000, k, 0.01)
35+
36+
37+
def digits(k):
38+
X_digits, _ = load_digits(return_X_y=True)
39+
return PCA(k).fit_transform(X_digits)
40+
41+
42+
def mnist(k):
43+
X = _load_openml('mnist_784')
44+
return PCA(k).fit_transform(X)
45+
46+
47+
def cifar10(k):
48+
X = _load_openml('CIFAR_10')
49+
return PCA(k).fit_transform(X)
50+
51+
52+
def fashion_mnist(k):
53+
X = _load_openml('Fashion-MNIST')
54+
return PCA(k).fit_transform(X)
55+
56+
57+
# wrapper class to supply trivial clustering to giotto-tda
58+
class TrivialEstimator(ClusterMixin):
59+
60+
def get_params(self, deep=True):
61+
return {}
62+
63+
def set_params(self, **parmeters):
64+
return self
65+
66+
def fit(self, X, y=None):
67+
clust = TrivialClustering()
68+
self.labels_ = clust.fit(X, y).labels_
69+
return self
70+
71+
72+
def run_gm(X, n, p):
73+
t0 = time.time()
74+
pipe = gm.make_mapper_pipeline(
75+
filter_func=lambda x: x,
76+
cover=gm.CubicalCover(
77+
n_intervals=n,
78+
overlap_frac=p),
79+
clusterer=TrivialEstimator(),
80+
)
81+
mapper_graph = pipe.fit_transform(X)
82+
t1 = time.time()
83+
return t1 - t0
84+
85+
86+
def run_tm(X, n, p):
87+
t0 = time.time()
88+
mapper_graph = tm.core.MapperAlgorithm(
89+
cover=tm.cover.CubicalCover(
90+
n_intervals=n,
91+
overlap_frac=p,
92+
#leaf_capacity=1000,
93+
#leaf_radius=1.0 / (2.0 - 2.0 * p),
94+
#kind='hierarchical',
95+
#pivoting='random',
96+
),
97+
clustering=TrivialEstimator(),
98+
).fit_transform(X, X)
99+
t1 = time.time()
100+
return t1 - t0
101+
102+
103+
def run_km(X, n, p):
104+
t0 = time.time()
105+
mapper = km.KeplerMapper(verbose=0)
106+
graph = mapper.map(
107+
lens=X,
108+
X=X,
109+
cover=km.Cover(
110+
n_cubes=n,
111+
perc_overlap=p
112+
),
113+
clusterer=TrivialEstimator(),
114+
)
115+
t1 = time.time()
116+
return t1 - t0
117+
118+
119+
def run_bench(benches, datasets, dimensions, overlaps, intervals):
120+
df_bench = pd.DataFrame({
121+
'bench': [],
122+
'dataset': [],
123+
'p': [],
124+
'n': [],
125+
'k': [],
126+
'time': [],
127+
})
128+
launch_time = int(time.time())
129+
for bench_name, bench in benches:
130+
for dataset_name, dataset in datasets:
131+
for k in dimensions:
132+
X = dataset(k)
133+
for p in overlaps:
134+
for n in intervals:
135+
t = bench(X, n, p)
136+
df_delta = pd.DataFrame({
137+
'bench': bench_name,
138+
'dataset': dataset_name,
139+
'p': p,
140+
'n': n,
141+
'k': k,
142+
'time': t,
143+
}, index=[0])
144+
print(df_delta)
145+
df_bench = pd.concat([df_bench, df_delta], ignore_index=True)
146+
df_bench.to_csv(f'./benchmark_{launch_time}.csv', index=False)
147+
148+
149+
if __name__ == '__main__':
150+
run_tm(line(1), 1, 0.5) # fist run to jit-compile numba decorated functions
151+
152+
run_bench(
153+
overlaps=[
154+
0.125,
155+
0.25,
156+
0.5
157+
],
158+
datasets=[
159+
('line', line),
160+
('digits', digits),
161+
('mnist', mnist),
162+
('cifar10', cifar10),
163+
('fashion_mnist', fashion_mnist),
164+
],
165+
intervals=[
166+
10,
167+
],
168+
dimensions=[
169+
1,
170+
2,
171+
3,
172+
4,
173+
5,
174+
],
175+
benches = [
176+
('tda-mapper', run_tm),
177+
('kepler-mapper', run_km),
178+
('giotto-tda', run_gm),
179+
],
180+
)

0 commit comments

Comments
 (0)