Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:

- name: Run benchmarks
run: |
python -m unittest discover -s tests -p 'test_bench_*.py'
python -m pytest tests/test_bench_*.py -s

test-bench-job:
needs: test-bench-matrix-job
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:

- name: Run tests and code coverage
run: |
coverage run --source=src -m unittest discover -s tests -p 'test_unit_*.py'
coverage run --source=src -m pytest tests/test_unit_*.py
coverage report -m

- name: Upload coverage reports to Codecov
Expand Down
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ Follow these steps to contribute:
the naming convention `test_bench_*.py`.

4. **Run Tests**.
Ensure your changes pass all tests before committing. We use `unittest` as
Ensure your changes pass all tests before committing. We use `pytest` as
test framework:

```bash
python -m unittest discover -s tests -p 'test_*.py'
python -m pytest tests/test_*.py
```

Before each commit make sure to check code coverage:

```bash
coverage run --source=src -m unittest discover -s tests -p 'test_*.py'
coverage run --source=src -m pytest tests/test_*.py
```

5. **Commit and Push Your Changes**.
Expand Down
16 changes: 8 additions & 8 deletions app/streamlit_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from sklearn.decomposition import PCA
from umap import UMAP

from tdamapper._plot_plotly import _marker_size
from tdamapper.core import aggregate_graph
from tdamapper.cover import BallCover, CubicalCover
from tdamapper.learn import MapperAlgorithm
Expand Down Expand Up @@ -137,14 +136,14 @@ def _check_limits_mapper_graph(mapper_graph):
if LIMITS_ENABLED:
num_nodes = mapper_graph.number_of_nodes()
if num_nodes > LIMITS_NUM_NODES:
logging.warn("Too many nodes.")
logging.warning("Too many nodes.")
raise ValueError(
"Too many nodes: select different parameters or run the app "
"locally on your machine."
)
num_edges = mapper_graph.number_of_edges()
if num_edges > LIMITS_NUM_EDGES:
logging.warn("Too many edges.")
logging.warning("Too many edges.")
raise ValueError(
"Too many edges: select different parameters or run the app "
"locally on your machine."
Expand All @@ -155,14 +154,14 @@ def _check_limits_dataset(df_X, df_y):
if LIMITS_ENABLED:
num_samples = len(df_X)
if num_samples > LIMITS_NUM_SAMPLES:
logging.warn("Dataset too big.")
logging.warning("Dataset too big.")
raise ValueError(
"Dataset too big: select a different dataset or run the app "
"locally on your machine."
)
num_features = len(df_X.columns) + len(df_y.columns)
if num_features > LIMITS_NUM_FEATURES:
logging.warn("Too many features.")
logging.warning("Too many features.")
raise ValueError(
"Too many features: select a different dataset or run the app "
"locally on your machine."
Expand Down Expand Up @@ -529,8 +528,8 @@ def mapper_input_section(X):
mapper_algo = MapperAlgorithm(
cover=cover,
clustering=clustering,
verbose=True,
n_jobs=1,
verbose=False,
n_jobs=-2,
)
mapper_graph = compute_mapper(mapper_algo, X, lens)
return mapper_graph
Expand Down Expand Up @@ -628,11 +627,12 @@ def compute_mapper_fig(mapper_plot, colors, node_size, cmap, _agg, agg_name):
colors,
node_size=node_size,
agg=_agg,
title=[f"{agg_name} of {c}" for c in colors.columns],
title=[f"{c}" for c in colors.columns],
cmap=cmap,
width=600,
height=600,
)
logger.info("Done")
return mapper_fig


Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dev = [
"coverage[toml]",
"pandas",
"scikit-learn<1.6.0",
"pytest",
"black[jupyter]",
"isort",
"flake8",
Expand Down
19 changes: 10 additions & 9 deletions src/tdamapper/_plot_plotly.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def plot_plotly(
mapper_plot,
width: int,
height: int,
node_size: int = DEFAULT_NODE_SIZE,
node_size: Optional[Union[int, List[int]]] = DEFAULT_NODE_SIZE,
colors=None,
title: Optional[Union[str, List[str]]] = None,
agg=np.nanmean,
Expand All @@ -73,8 +73,9 @@ def plot_plotly(
titles = [title for _ in range(colors_num)]
elif isinstance(title, list) and len(title) == colors_num:
titles = title
fig = _figure(mapper_plot, width, height, node_size, colors, titles, agg, cmaps)
_add_ui_to_layout(mapper_plot, fig, colors, titles, node_size, agg, cmaps)
node_sizes = [node_size] if isinstance(node_size, int) else node_size
fig = _figure(mapper_plot, width, height, node_sizes, colors, titles, agg, cmaps)
_add_ui_to_layout(mapper_plot, fig, colors, titles, node_sizes, agg, cmaps)
return fig


Expand Down Expand Up @@ -220,7 +221,7 @@ def _update_layout(fig, width, height):
)


def _figure(mapper_plot, width, height, node_size, colors, titles, agg, cmaps):
def _figure(mapper_plot, width, height, node_sizes, colors, titles, agg, cmaps):
node_pos = mapper_plot.positions
node_pos_arr = _node_pos_array(
mapper_plot.graph,
Expand All @@ -239,7 +240,7 @@ def _figure(mapper_plot, width, height, node_size, colors, titles, agg, cmaps):

_set_cmap(mapper_plot, fig, cmaps[0])
_set_colors(mapper_plot, fig, colors[:, 0], agg)
_set_node_size(mapper_plot, fig, node_size)
_set_node_size(mapper_plot, fig, node_sizes[len(node_sizes) // 2])
_set_title(mapper_plot, fig, titles[0])

return fig
Expand Down Expand Up @@ -387,7 +388,7 @@ def _layout(width, height):
)


def _add_ui_to_layout(mapper_plot, mapper_fig, colors, titles, node_size, agg, cmaps):
def _add_ui_to_layout(mapper_plot, mapper_fig, colors, titles, node_sizes, agg, cmaps):
cmaps_plotly = [PLOTLY_CMAPS.get(c.lower()) for c in cmaps]
menu_color = _ui_color(mapper_plot, colors, titles, agg)
if menu_color["buttons"]:
Expand All @@ -396,7 +397,7 @@ def _add_ui_to_layout(mapper_plot, mapper_fig, colors, titles, node_size, agg, c
menu_color["x"] = -0.25
menu_cmap = _ui_cmap(mapper_plot, cmaps_plotly)
menu_cmap["x"] = menu_color["x"] + 0.25
slider_size = _ui_node_size(mapper_plot, node_size)
slider_size = _ui_node_size(mapper_plot, node_sizes)
mapper_fig.update_layout(
updatemenus=[menu_cmap, menu_color],
sliders=[slider_size],
Expand Down Expand Up @@ -441,7 +442,7 @@ def _update_cmap(cmap):
)


def _ui_node_size(mapper_plot, node_size):
def _ui_node_size(mapper_plot, node_sizes):
steps = [
dict(
method="restyle",
Expand All @@ -451,7 +452,7 @@ def _ui_node_size(mapper_plot, node_size):
[1],
],
)
for size in [node_size * x / 10.0 for x in range(1, 20)]
for size in node_sizes
]

return dict(
Expand Down
20 changes: 10 additions & 10 deletions src/tdamapper/utils/vptree_flat/ball_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ class BallSearch:

def __init__(self, vpt, point, eps, inclusive=True):
self._arr = vpt._get_arr()
self.__distance = vpt._get_distance()
self.__point = point
self.__eps = eps
self.__inclusive = inclusive
self._distance = vpt._get_distance()
self._point = point
self._eps = eps
self._inclusive = inclusive

def search(self):
return self._search_iter()

def _inside(self, dist):
if self.__inclusive:
return dist <= self.__eps
return dist < self.__eps
if self._inclusive:
return dist <= self._eps
return dist < self._eps

def _search_iter(self):
stack = [(0, self._arr.size())]
Expand All @@ -28,11 +28,11 @@ def _search_iter(self):
is_terminal = self._arr.is_terminal(start)
if is_terminal:
for x in self._arr.get_points(start, end):
dist = self.__distance(self.__point, x)
dist = self._distance(self._point, x)
if self._inside(dist):
result.append(x)
else:
dist = self.__distance(self.__point, v_point)
dist = self._distance(self._point, v_point)
mid = _mid(start, end)
if self._inside(dist):
result.append(v_point)
Expand All @@ -42,7 +42,7 @@ def _search_iter(self):
else:
fst = (mid, end)
snd = (start + 1, mid)
if abs(dist - v_radius) <= self.__eps:
if abs(dist - v_radius) <= self._eps:
stack.append(snd)
stack.append(fst)
return result
22 changes: 11 additions & 11 deletions src/tdamapper/utils/vptree_flat/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ def _mid(start, end):
class Builder:

def __init__(self, vpt, X):
self.__distance = vpt._get_distance()
self._distance = vpt._get_distance()

dataset = [x for x in X]
indices = np.array([i for i in range(len(dataset))])
distances = np.array([0.0 for _ in X])
is_terminal = np.array([False for _ in X])
self._arr = VPArray(dataset, distances, indices, is_terminal)

self.__leaf_capacity = vpt.get_leaf_capacity()
self.__leaf_radius = vpt.get_leaf_radius()
self._leaf_capacity = vpt.get_leaf_capacity()
self._leaf_radius = vpt.get_leaf_radius()
pivoting = vpt.get_pivoting()
self.__pivoting = self._pivoting_disabled
self._pivoting = self._pivoting_disabled
if pivoting == "random":
self.__pivoting = self._pivoting_random
self._pivoting = self._pivoting_random
elif pivoting == "furthest":
self.__pivoting = self._pivoting_furthest
self._pivoting = self._pivoting_furthest

def _pivoting_disabled(self, start, end):
pass
Expand All @@ -45,7 +45,7 @@ def _furthest(self, start, end, i):
i_point = self._arr.get_point(i)
for j in range(start, end):
j_point = self._arr.get_point(j)
j_dist = self.__distance(i_point, j_point)
j_dist = self._distance(i_point, j_point)
if j_dist > furthest_dist:
furthest = j
furthest_dist = j_dist
Expand All @@ -61,12 +61,12 @@ def _pivoting_furthest(self, start, end):
self._arr.swap(start, furthest)

def _update(self, start, end):
self.__pivoting(start, end)
self._pivoting(start, end)
v_point = self._arr.get_point(start)
is_terminal = self._arr.is_terminal(start)
for i in range(start + 1, end):
point = self._arr.get_point(i)
self._arr.set_distance(i, self.__distance(v_point, point))
self._arr.set_distance(i, self._distance(v_point, point))
self._arr.set_terminal(i, is_terminal)

def build(self):
Expand All @@ -81,8 +81,8 @@ def _build_iter(self):
self._update(start, end)
self._arr.partition(start + 1, end, mid)
v_radius = self._arr.get_distance(mid)
if (end - start > 2 * self.__leaf_capacity) and (
v_radius > self.__leaf_radius
if (end - start > 2 * self._leaf_capacity) and (
v_radius > self._leaf_radius
):
self._arr.set_distance(start, v_radius)
self._arr.set_terminal(start, False)
Expand Down
50 changes: 26 additions & 24 deletions src/tdamapper/utils/vptree_flat/knn_search.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,43 @@
from tdamapper.utils.heap import MaxHeap
from tdamapper.utils.vptree_flat.common import _mid

_PRE = 0
_POST = 1


class KnnSearch:

def __init__(self, vpt, point, neighbors):
self._arr = vpt._get_arr()
self.__distance = vpt._get_distance()
self.__point = point
self.__neighbors = neighbors
self.__radius = float("inf")
self.__result = MaxHeap()
self._distance = vpt._get_distance()
self._point = point
self._neighbors = neighbors
self._radius = float("inf")
self._result = MaxHeap()

def _get_items(self):
while len(self.__result) > self.__neighbors:
self.__result.pop()
return [x for (_, x) in self.__result]
while len(self._result) > self._neighbors:
self._result.pop()
return [x for (_, x) in self._result]

def search(self):
self._search_iter()
return self._get_items()

def _process(self, x):
dist = self.__distance(self.__point, x)
if dist >= self.__radius:
dist = self._distance(self._point, x)
if dist >= self._radius:
return dist
self.__result.add(dist, x)
while len(self.__result) > self.__neighbors:
self.__result.pop()
if len(self.__result) == self.__neighbors:
self.__radius, _ = self.__result.top()
self._result.add(dist, x)
while len(self._result) > self._neighbors:
self._result.pop()
if len(self._result) == self._neighbors:
self._radius, _ = self._result.top()
return dist

def _search_iter(self):
PRE, POST = 0, 1
self.__result = MaxHeap()
stack = [(0, self._arr.size(), 0.0, PRE)]
self._result = MaxHeap()
stack = [(0, self._arr.size(), 0.0, _PRE)]
while stack:
start, end, thr, action = stack.pop()

Expand All @@ -47,7 +49,7 @@ def _search_iter(self):
for x in self._arr.get_points(start, end):
self._process(x)
else:
if action == PRE:
if action == _PRE:
mid = _mid(start, end)
dist = self._process(v_point)
if dist <= v_radius:
Expand All @@ -56,9 +58,9 @@ def _search_iter(self):
else:
fst_start, fst_end = mid, end
snd_start, snd_end = start + 1, mid
stack.append((snd_start, snd_end, abs(v_radius - dist), POST))
stack.append((fst_start, fst_end, 0.0, PRE))
elif action == POST:
if self.__radius > thr:
stack.append((start, end, 0.0, PRE))
stack.append((snd_start, snd_end, abs(v_radius - dist), _POST))
stack.append((fst_start, fst_end, 0.0, _PRE))
elif action == _POST:
if self._radius > thr:
stack.append((start, end, 0.0, _PRE))
return self._get_items()
Loading