Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
.idea
dist/
build/
coverage.xml
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
PYTHON = python
PIP = pip

.PHONY: all
all: install

.PHONY: install
install:
$(PIP) install -e .[dev]

.PHONY: test
test:
coverage run --source=src -m pytest tests/test_unit_*.py
coverage xml

.PHONY: bench
bench:
$(PYTHON) -m pytest tests/test_bench_*.py -s -o log_cli=true --log-level=INFO

.PHONY: clean
clean:
find . -type d -name "__pycache__" -exec rm -r {} +
find . -type f -name "*.pyc" -delete
110 changes: 101 additions & 9 deletions app/streamlit_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from umap import UMAP

from tdamapper.core import aggregate_graph
from tdamapper.cover import BallCover, CubicalCover
from tdamapper.learn import MapperAlgorithm
from tdamapper.cover import BallCover, CubicalCover, KNNCover
from tdamapper.learn import MapperAlgorithm, MapperClustering
from tdamapper.plot import MapperPlot

LIMITS_ENABLED = bool(os.environ.get("LIMITS_ENABLED", False))
Expand Down Expand Up @@ -63,8 +63,12 @@

V_COVER_CUBICAL = "Cubical"

V_COVER_KNN = "KNN"

V_CLUSTERING_TRIVIAL = "Trivial"

V_CLUSTERING_COVER = "Cover"

V_CLUSTERING_AGGLOMERATIVE = "Agglomerative"

V_CLUSTERING_DBSCAN = "DBSCAN"
Expand Down Expand Up @@ -198,7 +202,10 @@ def _get_data_summary(df_X, df_y):
}
).T
df_summary = pd.DataFrame(
{V_DATA_SUMMARY_FEAT: df.columns, V_DATA_SUMMARY_HIST: df_hist.values.tolist()}
{
V_DATA_SUMMARY_FEAT: df.columns,
V_DATA_SUMMARY_HIST: df_hist.values.tolist(),
}
)
return df_summary

Expand Down Expand Up @@ -316,9 +323,10 @@ def mapper_lens_input_section(X):
if pca_n > n_feats:
lens = X
else:
lens = PCA(n_components=pca_n, random_state=pca_random_state).fit_transform(
X
)
lens = PCA(
n_components=pca_n,
random_state=pca_random_state,
).fit_transform(X)
elif lens_type == V_LENS_UMAP:
umap_n = st.number_input(
"UMAP Components",
Expand All @@ -343,7 +351,12 @@ def mapper_cover_input_section():
st.header("🌐 Cover")
cover_type = st.selectbox(
"Type",
options=[V_COVER_TRIVIAL, V_COVER_BALL, V_COVER_CUBICAL],
options=[
V_COVER_TRIVIAL,
V_COVER_BALL,
V_COVER_CUBICAL,
V_COVER_KNN,
],
index=2,
)
cover = None
Expand Down Expand Up @@ -379,9 +392,79 @@ def mapper_cover_input_section():
"Overlap", value=0.25, min_value=0.0, max_value=1.0
)
cover = CubicalCover(n_intervals=cubical_n, overlap_frac=cubical_p)
elif cover_type == V_COVER_KNN:
knn_k = st.number_input("Neighbors", value=10, min_value=1)
cover = KNNCover(neighbors=knn_k)
return cover


def mapper_clustering_cover():
cover_type = st.selectbox(
"Type",
options=[
V_COVER_TRIVIAL,
V_COVER_BALL,
V_COVER_CUBICAL,
V_COVER_KNN,
],
index=2,
key="mapper_clustering_cover_type",
)
cover = None
if cover_type == V_COVER_TRIVIAL:
cover = None
elif cover_type == V_COVER_BALL:
ball_r = st.number_input(
"Radius",
value=100.0,
min_value=0.0,
key="mapper_clustering_radius",
)
metric = st.selectbox(
"Metric",
options=[
"euclidean",
"chebyshev",
"manhattan",
"cosine",
],
key="mapper_clustering_cover_metric",
)
cover = BallCover(radius=ball_r, metric=metric)
elif cover_type == V_COVER_CUBICAL:
cubical_n = st.number_input(
"Intervals",
value=10,
min_value=0,
key="mapper_clustering_cover_intervals",
)
cubical_overlap = st.checkbox(
"Set overlap",
value=False,
help="Uses a dimension-dependant default overlap when unchecked",
key="mapper_clustering_cover_set_overlap",
)
cubical_p = None
if cubical_overlap:
cubical_p = st.number_input(
"Overlap",
value=0.25,
min_value=0.0,
max_value=1.0,
key="mapper_clustering_cover_overlap",
)
cover = CubicalCover(n_intervals=cubical_n, overlap_frac=cubical_p)
elif cover_type == V_COVER_KNN:
knn_k = st.number_input(
"Neighbors",
value=10,
min_value=1,
key="mapper_clustering_knn_k",
)
cover = KNNCover(neighbors=knn_k)
return MapperClustering(cover=cover, n_jobs=-2)


def mapper_clustering_kmeans():
clust_num = st.number_input(
"Clusters",
Expand Down Expand Up @@ -485,17 +568,20 @@ def mapper_clustering_input_section():
"Type",
options=[
V_CLUSTERING_TRIVIAL,
V_CLUSTERING_COVER,
V_CLUSTERING_KMEANS,
V_CLUSTERING_AGGLOMERATIVE,
V_CLUSTERING_DBSCAN,
V_CLUSTERING_HDBSCAN,
V_CLUSTERING_AFFINITY_PROPAGATION,
],
index=1,
index=0,
)
clustering = None
if clustering_type == V_CLUSTERING_TRIVIAL:
clustering = None
elif clustering_type == V_CLUSTERING_COVER:
clustering = mapper_clustering_cover()
elif clustering_type == V_CLUSTERING_AGGLOMERATIVE:
clustering = mapper_clustering_agglomerative()
elif clustering_type == V_CLUSTERING_KMEANS:
Expand Down Expand Up @@ -625,7 +711,13 @@ def compute_mapper_fig(mapper_plot, colors, node_size, cmap, _agg, agg_name):
logger.info("Generating Mapper figure")
mapper_fig = mapper_plot.plot_plotly(
colors,
node_size=node_size,
node_size=[
0.0,
node_size / 2.0,
node_size,
node_size * 1.5,
node_size * 2.0,
],
agg=_agg,
title=[f"{c}" for c in colors.columns],
cmap=cmap,
Expand Down
2 changes: 1 addition & 1 deletion src/tdamapper/_plot_plotly.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def plot_plotly(
titles = [title for _ in range(colors_num)]
elif isinstance(title, list) and len(title) == colors_num:
titles = title
node_sizes = [node_size] if isinstance(node_size, int) else node_size
node_sizes = [node_size] if isinstance(node_size, (int, float)) else node_size
fig = _figure(mapper_plot, width, height, node_sizes, colors, titles, agg, cmaps)
_add_ui_to_layout(mapper_plot, fig, colors, titles, node_sizes, agg, cmaps)
return fig
Expand Down
2 changes: 1 addition & 1 deletion src/tdamapper/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(self, cover=None, clustering=None, n_jobs=1):
self.n_jobs = n_jobs

def fit(self, X, y=None):
y = X if y is None else y
X, y = self._validate_X_y(X, y)
cover = TrivialCover() if self.cover is None else self.cover
cover = clone(cover)
Expand All @@ -53,7 +54,6 @@ def fit(self, X, y=None):
)
clustering = clone(clustering)
n_jobs = self.n_jobs
y = X if y is None else y
itm_lbls = mapper_connected_components(
X,
y,
Expand Down
58 changes: 29 additions & 29 deletions src/tdamapper/utils/heap.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,91 +13,91 @@ def _parent(i):
class _HeapNode:

def __init__(self, key, value):
self.__key = key
self.__value = value
self._key = key
self._value = value

def get(self):
return self.__key, self.__value
return self._key, self._value

def __lt__(self, other):
return self.__key < other
return self._key < other._key

def __le__(self, other):
return self.__key <= other
return self._key <= other._key

def __gt__(self, other):
return self.__key > other
return self._key > other._key

def __ge__(self, other):
return self.__key >= other
return self._key >= other._key


class MaxHeap:

def __init__(self):
self.__heap = []
self.__iter = None
self._heap = []
self._iter = None

def __iter__(self):
self.__iter = iter(self.__heap)
self._iter = iter(self._heap)
return self

def __next__(self):
node = next(self.__iter)
node = next(self._iter)
return node.get()

def __len__(self):
return len(self.__heap)
return len(self._heap)

def top(self):
if not self.__heap:
if not self._heap:
return (None, None)
return self.__heap[0].get()
return self._heap[0].get()

def pop(self):
if not self.__heap:
if not self._heap:
return
max_val = self.__heap[0]
self.__heap[0] = self.__heap[-1]
self.__heap.pop()
max_val = self._heap[0]
self._heap[0] = self._heap[-1]
self._heap.pop()
self._bubble_down()
return max_val.get()

def add(self, key, val):
self.__heap.append(_HeapNode(key, val))
self._heap.append(_HeapNode(key, val))
self._bubble_up()

def _get_local_max(self, i):
heap_len = len(self.__heap)
heap_len = len(self._heap)
left = _left(i)
right = _right(i)
if left >= heap_len:
return i
if right >= heap_len:
if self.__heap[i] < self.__heap[left]:
if self._heap[i] < self._heap[left]:
return left
return i
max_child = left
if self.__heap[left] < self.__heap[right]:
if self._heap[left] < self._heap[right]:
max_child = right
if self.__heap[i] < self.__heap[max_child]:
if self._heap[i] < self._heap[max_child]:
return max_child
return i

def _fix_down(self, i):
local_max = self._get_local_max(i)
if i < local_max:
self.__heap[i], self.__heap[local_max] = (
self.__heap[local_max],
self.__heap[i],
self._heap[i], self._heap[local_max] = (
self._heap[local_max],
self._heap[i],
)
return local_max
return i

def _fix_up(self, i):
parent = _parent(i)
if self.__heap[parent] < self.__heap[i]:
self.__heap[i], self.__heap[parent] = self.__heap[parent], self.__heap[i]
if self._heap[parent] < self._heap[i]:
self._heap[i], self._heap[parent] = self._heap[parent], self._heap[i]
return parent
return i

Expand All @@ -110,7 +110,7 @@ def _bubble_down(self):
current = local_max

def _bubble_up(self):
current = len(self.__heap) - 1
current = len(self._heap) - 1
done = False
while not done:
local_max = self._fix_up(current)
Expand Down
Loading