Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions app/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
streamlit>=1.40.0
numpy>=1.25.2
scikit-learn>=1.5.0
umap-learn>=0.5.7
pandas>=2.1.0
tda-mapper==0.9.0
plotly < 6.0.0
streamlit>=1.40.0,<2.0.0
numpy>=1.25.2,<2.0.0
scikit-learn>=1.5.0,<1.6.0
umap-learn>=0.5.7,<0.6.0
pandas>=2.1.0,<3.0.0
tda-mapper>=0.9.0,<0.10.0
plotly>=6.0.0,<7.0.0
106 changes: 50 additions & 56 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import time

import pandas as pd
import gtda.mapper as gm
import kmapper as km
import numpy as np

from sklearn.decomposition import PCA
from sklearn.datasets import fetch_openml, load_digits
import pandas as pd
from sklearn.base import ClusterMixin

from tdamapper.clustering import TrivialClustering
from sklearn.datasets import fetch_openml, load_digits
from sklearn.decomposition import PCA

import tdamapper as tm
import gtda.mapper as gm
import kmapper as km
from tdamapper.clustering import TrivialClustering


def _segment(cardinality, dimension, noise=0.1, start=None, end=None):
Expand Down Expand Up @@ -40,17 +38,17 @@ def digits(k):


def mnist(k):
X = _load_openml('mnist_784')
X = _load_openml("mnist_784")
return PCA(k).fit_transform(X)


def cifar10(k):
X = _load_openml('CIFAR_10')
X = _load_openml("CIFAR_10")
return PCA(k).fit_transform(X)


def fashion_mnist(k):
X = _load_openml('Fashion-MNIST')
X = _load_openml("Fashion-MNIST")
return PCA(k).fit_transform(X)


Expand All @@ -73,9 +71,7 @@ def run_gm(X, n, p):
t0 = time.time()
pipe = gm.make_mapper_pipeline(
filter_func=lambda x: x,
cover=gm.CubicalCover(
n_intervals=n,
overlap_frac=p),
cover=gm.CubicalCover(n_intervals=n, overlap_frac=p),
clusterer=TrivialEstimator(),
)
mapper_graph = pipe.fit_transform(X)
Expand All @@ -89,10 +85,10 @@ def run_tm(X, n, p):
cover=tm.cover.CubicalCover(
n_intervals=n,
overlap_frac=p,
#leaf_capacity=1000,
#leaf_radius=1.0 / (2.0 - 2.0 * p),
#kind='hierarchical',
#pivoting='random',
# leaf_capacity=1000,
# leaf_radius=1.0 / (2.0 - 2.0 * p),
# kind='hierarchical',
# pivoting='random',
),
clustering=TrivialEstimator(),
).fit_transform(X, X)
Expand All @@ -106,25 +102,24 @@ def run_km(X, n, p):
graph = mapper.map(
lens=X,
X=X,
cover=km.Cover(
n_cubes=n,
perc_overlap=p
),
cover=km.Cover(n_cubes=n, perc_overlap=p),
clusterer=TrivialEstimator(),
)
t1 = time.time()
return t1 - t0


def run_bench(benches, datasets, dimensions, overlaps, intervals):
df_bench = pd.DataFrame({
'bench': [],
'dataset': [],
'p': [],
'n': [],
'k': [],
'time': [],
})
df_bench = pd.DataFrame(
{
"bench": [],
"dataset": [],
"p": [],
"n": [],
"k": [],
"time": [],
}
)
launch_time = int(time.time())
for bench_name, bench in benches:
for dataset_name, dataset in datasets:
Expand All @@ -133,34 +128,33 @@ def run_bench(benches, datasets, dimensions, overlaps, intervals):
for p in overlaps:
for n in intervals:
t = bench(X, n, p)
df_delta = pd.DataFrame({
'bench': bench_name,
'dataset': dataset_name,
'p': p,
'n': n,
'k': k,
'time': t,
}, index=[0])
df_delta = pd.DataFrame(
{
"bench": bench_name,
"dataset": dataset_name,
"p": p,
"n": n,
"k": k,
"time": t,
},
index=[0],
)
print(df_delta)
df_bench = pd.concat([df_bench, df_delta], ignore_index=True)
df_bench.to_csv(f'./benchmark_{launch_time}.csv', index=False)
df_bench.to_csv(f"./benchmark_{launch_time}.csv", index=False)


if __name__ == '__main__':
run_tm(line(1), 1, 0.5) # fist run to jit-compile numba decorated functions
if __name__ == "__main__":
run_tm(line(1), 1, 0.5) # fist run to jit-compile numba decorated functions

run_bench(
overlaps=[
0.125,
0.25,
0.5
],
overlaps=[0.125, 0.25, 0.5],
datasets=[
('line', line),
('digits', digits),
('mnist', mnist),
('cifar10', cifar10),
('fashion_mnist', fashion_mnist),
("line", line),
("digits", digits),
("mnist", mnist),
("cifar10", cifar10),
("fashion_mnist", fashion_mnist),
],
intervals=[
10,
Expand All @@ -172,9 +166,9 @@ def run_bench(benches, datasets, dimensions, overlaps, intervals):
4,
5,
],
benches = [
('tda-mapper', run_tm),
('kepler-mapper', run_km),
('giotto-tda', run_gm),
benches=[
("tda-mapper", run_tm),
("kepler-mapper", run_km),
("giotto-tda", run_gm),
],
)
)
Loading