Skip to content
22 changes: 15 additions & 7 deletions app/streamlit_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,11 @@ def mode(arr):


def quantile(q):
return lambda agg: np.nanquantile(agg, q=q)

def _quantile_q(agg):
return np.nanquantile(agg, q=q)

return _quantile_q


@st.cache_data
Expand Down Expand Up @@ -565,12 +569,12 @@ def plot_agg_input_section():
return agg, agg_name


def _hash_networkx_graph(graph):
return _encode_graph(_get_graph_no_attribs(graph))


@st.cache_data(
hash_funcs={
"networkx.classes.graph.Graph": lambda g: _encode_graph(
_get_graph_no_attribs(g)
)
},
hash_funcs={"networkx.classes.graph.Graph": _hash_networkx_graph},
show_spinner="Generating Mapper Layout",
)
def compute_mapper_plot(mapper_graph, dim, seed, iterations):
Expand Down Expand Up @@ -610,8 +614,12 @@ def mapper_plot_section(mapper_graph):
return mapper_plot


def _hash_mapper_plot(mapper_plot):
return mapper_plot.positions


@st.cache_data(
hash_funcs={"tdamapper.plot.MapperPlot": lambda mp: mp.positions},
hash_funcs={"tdamapper.plot.MapperPlot": _hash_mapper_plot},
show_spinner="Rendering Mapper",
)
def compute_mapper_fig(mapper_plot, colors, node_size, cmap, _agg, agg_name):
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from tdamapper.core import TrivialClustering


def _identity(x):
return x


def _segment(cardinality, dimension, noise=0.1, start=None, end=None):
if start is None:
start = np.zeros(dimension)
Expand Down Expand Up @@ -70,7 +74,7 @@ def fit(self, X, y=None):
def run_gm(X, n, p):
t0 = time.time()
pipe = gm.make_mapper_pipeline(
filter_func=lambda x: x,
filter_func=_identity,
cover=gm.CubicalCover(n_intervals=n, overlap_frac=p),
clusterer=TrivialEstimator(),
)
Expand Down
22 changes: 22 additions & 0 deletions src/tdamapper/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
This module provides common functionalities for internal use.
"""

import cProfile
import io
import pstats
import warnings

import numpy as np
Expand Down Expand Up @@ -147,3 +150,22 @@ def clone(obj):
obj_noargs = type(obj)()
obj_noargs.set_params(**params)
return obj_noargs


def profile(n_lines=10):
def decorator(func):
def wrapper(*args, **kwargs):
profiler = cProfile.Profile()
profiler.enable()
result = func(*args, **kwargs)
profiler.disable()

s = io.StringIO()
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
ps.print_stats(n_lines)
print(s.getvalue())
return result

return wrapper

return decorator
10 changes: 5 additions & 5 deletions src/tdamapper/utils/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,27 @@
from numba import njit


@njit(fastmath=True)
@njit(fastmath=True) # pragma: no cover
def euclidean(x, y):
return np.linalg.norm(x - y)


@njit(fastmath=True)
@njit(fastmath=True) # pragma: no cover
def manhattan(x, y):
return np.linalg.norm(x - y, ord=1)


@njit(fastmath=True)
@njit(fastmath=True) # pragma: no cover
def chebyshev(x, y):
return np.linalg.norm(x - y, ord=np.inf)


@njit(fastmath=True)
@njit(fastmath=True) # pragma: no cover
def minkowski(p, x, y):
return np.linalg.norm(x - y, ord=p)


@njit(fastmath=True)
@njit(fastmath=True) # pragma: no cover
def cosine(x, y):
xy = np.dot(x, y)
xx = np.linalg.norm(x)
Expand Down
6 changes: 5 additions & 1 deletion src/tdamapper/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@
return euclidean()
elif np.isinf(p):
return chebyshev()
return lambda x, y: _metrics.minkowski(p, x, y)

def dist(x, y):
return _metrics.minkowski(p, x, y)

Check warning on line 119 in src/tdamapper/utils/metrics.py

View check run for this annotation

Codecov / codecov/patch

src/tdamapper/utils/metrics.py#L118-L119

Added lines #L118 - L119 were not covered by tests

return dist

Check warning on line 121 in src/tdamapper/utils/metrics.py

View check run for this annotation

Codecov / codecov/patch

src/tdamapper/utils/metrics.py#L121

Added line #L121 was not covered by tests


def cosine():
Expand Down
116 changes: 82 additions & 34 deletions src/tdamapper/utils/quickselect.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,103 @@
def __swap(arr, i, j):
import numpy as np
from numba import njit

_ARR = np.zeros(1)


@njit # pragma: no cover
def swap(arr, i, j):
arr[i], arr[j] = arr[j], arr[i]


def partition(data, start, end, p_ord):
@njit # pragma: no cover
def _swap_all(arr, i, j, extra1, use_extra1, extra2, use_extra2):
swap(arr, i, j)
if use_extra1:
swap(extra1, i, j)
if use_extra2:
swap(extra2, i, j)


@njit # pragma: no cover
def _partition(data, start, end, p_ord, extra1, use_extra1, extra2, use_extra2):
higher = start
for j in range(start, end):
j_ord, _ = data[j]
j_ord = data[j]
if j_ord < p_ord:
__swap(data, higher, j)
_swap_all(data, higher, j, extra1, use_extra1, extra2, use_extra2)
higher += 1
return higher


def quickselect(data, start, end, k):
@njit # pragma: no cover
def _quickselect(data, start, end, k, extra1, use_extra1, extra2, use_extra2):
if (k < start) or (k >= end):
return
start_, end_, higher = start, end, None
start_, end_, higher = start, end, -1
while higher != k + 1:
p, _ = data[k]
__swap(data, start_, k)
higher = partition(data, start_ + 1, end_, p)
__swap(data, start_, higher - 1)
p = data[k]
_swap_all(data, start_, k, extra1, use_extra1, extra2, use_extra2)
higher = _partition(
data, start_ + 1, end_, p, extra1, use_extra1, extra2, use_extra2
)
_swap_all(data, start_, higher - 1, extra1, use_extra1, extra2, use_extra2)
if k <= higher - 1:
end_ = higher
else:
start_ = higher


def partition_tuple(data_ord, data_arr, start, end, p_ord):
higher = start
for j in range(start, end):
j_ord = data_ord[j]
if j_ord < p_ord:
__swap(data_arr, higher, j)
__swap(data_ord, higher, j)
higher += 1
return higher
def _to_array(extra1=None, extra2=None):
extra1_arr = _ARR if extra1 is None else extra1
extra2_arr = _ARR if extra2 is None else extra2
return extra1_arr, extra2_arr


def quickselect_tuple(data_ord, data_arr, start, end, k):
if (k < start) or (k >= end):
return
start_, end_, higher = start, end, None
while higher != k + 1:
p_ord = data_ord[k]
__swap(data_arr, start_, k)
__swap(data_ord, start_, k)
higher = partition_tuple(data_ord, data_arr, start_ + 1, end_, p_ord)
__swap(data_arr, start_, higher - 1)
__swap(data_ord, start_, higher - 1)
if k <= higher - 1:
end_ = higher
else:
start_ = higher
def _use_array(extra1=None, extra2=None):
use_extra1 = extra1 is not None
use_extra2 = extra2 is not None
return use_extra1, use_extra2


def swap_all(arr, i, j, extra1=None, extra2=None):
extra1_arr, extra2_arr = _to_array(extra1, extra2)
use_extra1, use_extra2 = _use_array(extra1, extra2)
_swap_all(
arr,
i,
j,
extra1=extra1_arr,
use_extra1=use_extra1,
extra2=extra2_arr,
use_extra2=use_extra2,
)


def partition(data, start, end, p_ord, extra1=None, extra2=None):
extra1_arr, extra2_arr = _to_array(extra1, extra2)
use_extra1, use_extra2 = _use_array(extra1, extra2)
return _partition(
data,
start,
end,
p_ord,
extra1=extra1_arr,
use_extra1=use_extra1,
extra2=extra2_arr,
use_extra2=use_extra2,
)


def quickselect(data, start, end, k, extra1=None, extra2=None):
extra1_arr, extra2_arr = _to_array(extra1, extra2)
use_extra1, use_extra2 = _use_array(extra1, extra2)
_quickselect(
data,
start,
end,
k,
extra1=extra1_arr,
use_extra1=use_extra1,
extra2=extra2_arr,
use_extra2=use_extra2,
)
4 changes: 2 additions & 2 deletions src/tdamapper/utils/vptree.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
A module for fast knn and range searches, depending only on a given metric
"""

from tdamapper.utils.vptree_flat import VPTree as FVPT
from tdamapper.utils.vptree_hier import VPTree as HVPT
from tdamapper.utils.vptree_flat.vptree import VPTree as FVPT
from tdamapper.utils.vptree_hier.vptree import VPTree as HVPT


class VPTree:
Expand Down
Loading