Skip to content

Commit 2d1234d

Browse files
committed
python/: Add is_nondominated_within_sets().
1 parent 4209a25 commit 2d1234d

5 files changed

Lines changed: 169 additions & 26 deletions

File tree

python/doc/source/reference/functions.dominance.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Dominance relations
1010
:toctree: generated/
1111

1212
is_nondominated
13+
is_nondominated_within_sets
1314
filter_dominated
1415
filter_dominated_within_sets
1516
pareto_rank

python/doc/source/whatsnew/index.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ Version 0.1.3
99

1010
- :class:`Hypervolume`: Object-oriented API for hypervolume indicator.
1111
- :func:`apply_within_sets()`: Utility function to apply operations to individual datasets.
12-
- Fix bug in :func:`normalise()` when the input is a Pandas ``DataFrame`` or some other non-contiguous array.
13-
- New example using Pandas ``DataFrame`` and :func:`apply_within_sets()`.
12+
- :func:`is_nondominated_within_sets()`: Utility function to identify nondominated points within sets.
13+
- Fix bug in :func:`normalise()` when the input is :class:`pandas.DataFrame` or some other non-contiguous array.
14+
- New example using :class:`pandas.DataFrame`.
1415

1516

1617
Version 0.1.2 (18/09/2024)

python/examples/plot_pandas.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
r"""Using moocore with Pandas
2-
=============================
1+
"""Using moocore with Pandas
2+
==========================
33
44
This example shows how to use ``moocore`` functions with ``pandas`` (https://pandas.pydata.org/).
5+
56
"""
67

78
import pandas as pd
@@ -31,7 +32,6 @@
3132
# Now we calculate the hypervolume for each algo using :meth:`pandas.core.groupby.DataFrameGroupBy.apply`.
3233

3334
ref = 2.1
34-
3535
hv = (
3636
df.groupby("algo")
3737
.apply(moocore.hypervolume, ref=ref, include_groups=False)
@@ -53,3 +53,43 @@
5353
# Note that :func:`moocore.apply_within_sets()` processes each group in order, even if the elements of the same group are not contiguous. That is, it processes the groups like :meth:`pandas.Series.unique` and not like :class:`set` or :func:`numpy.unique()`.
5454

5555
df["algo"].unique()
56+
57+
# %%
58+
# If we have multiple columns that we want to use to define the sets, such as ``algo`` and ``run``:
59+
60+
df = pd.DataFrame(
61+
dict(
62+
obj1=[1, 2, 3, 4, 5, 6, 5, 4, 3, 1],
63+
obj2=[6, 5, 4, 3, 2, 1, 5, 4, 5, 6],
64+
obj3=[1, 2, 3, 4, 5, 6, 6, 7, 5, 2],
65+
algo=["a"] * 3 + ["b"] * 3 + ["a", "b"] * 2,
66+
run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1],
67+
)
68+
)
69+
df
70+
71+
# %%
72+
# We can still use :meth:`pandas.DataFrame.groupby` but we may need to reset and clean-up the index.
73+
74+
df.groupby(["algo", "run"]).apply(
75+
moocore.filter_dominated, include_groups=False
76+
).reset_index().drop(columns="level_2")
77+
78+
# %%
79+
# Or we can combine the multiple columns as one to define the sets.
80+
#
81+
sets = df["algo"].astype(str) + "-" + df["run"].astype(str)
82+
sets
83+
84+
# %%
85+
# Identify nondominated rows within each set.
86+
#
87+
is_nondom = moocore.is_nondominated_within_sets(
88+
df[["obj1", "obj2", "obj2"]], sets=sets
89+
)
90+
is_nondom
91+
92+
# %%
93+
# And use the boolean vector above to filter rows.
94+
#
95+
df[is_nondom]

python/src/moocore/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
igd,
1818
igd_plus,
1919
is_nondominated,
20+
is_nondominated_within_sets,
2021
normalise,
2122
pareto_rank,
2223
read_datasets,
@@ -52,6 +53,7 @@
5253
"igd",
5354
"igd_plus",
5455
"is_nondominated",
56+
"is_nondominated_within_sets",
5557
"normalise",
5658
"pareto_rank",
5759
"read_datasets",

python/src/moocore/_moocore.py

Lines changed: 120 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,90 @@ def is_nondominated(
701701
return np.frombuffer(nondom, dtype=bool)
702702

703703

704+
def is_nondominated_within_sets(
705+
data: ArrayLike,
706+
/,
707+
sets: ArrayLike,
708+
*,
709+
maximise: bool | list[bool] = False,
710+
keep_weakly: bool = False,
711+
) -> np.ndarray:
712+
r"""Identify dominated points according to Pareto optimality within each set.
713+
714+
Executes the :func:`is_nondominated` function within each set in a dataset \
715+
and returns back a 1D array of booleans.
716+
717+
Parameters
718+
----------
719+
data :
720+
Array of numerical values, where each row gives the coordinates of a point in objective space.
721+
If the array is created by the :func:`read_datasets()` function, remove the last column.
722+
sets :
723+
1D vector or list of values that define the sets to which each row of ``data`` belongs.
724+
maximise :
725+
Whether the objectives must be maximised instead of minimised.
726+
Either a single boolean value that applies to all objectives or a list of boolean values, with one value per objective.
727+
Also accepts a 1D numpy array with value 0/1 for each objective.
728+
keep_weakly:
729+
If ``False``, return ``False`` for any duplicates of nondominated points.
730+
731+
Returns
732+
-------
733+
Returns a boolean array of the same length as the number of rows of data,
734+
where ``True`` means that the point is not dominated by any other point.
735+
736+
See Also
737+
--------
738+
filter_dominated_within_sets : to filter out dominated points.
739+
740+
Examples
741+
--------
742+
>>> x = moocore.get_dataset("input1.dat")
743+
>>> nondom_per_set = moocore.is_nondominated_within_sets(x[:, :-1], x[:, -1])
744+
>>> len(nondom_per_set)
745+
100
746+
>>> nondom_per_set # doctest: +ELLIPSIS
747+
array([False, False, True, False, True, False, False, False, False,
748+
True, False, True, True, True, False, True, True, True,
749+
False, True, False, False, False, False, True, False, True,
750+
...
751+
True, True, True, False, True, False, True, True, False,
752+
True, False, False, True, True, False, False, False, False,
753+
False])
754+
>>> x[nondom_per_set, :] # doctest: +ELLIPSIS
755+
array([[ 0.20816431, 4.62275469, 1. ],
756+
[ 0.22997367, 1.11772205, 1. ],
757+
[ 0.58799475, 0.73891181, 1. ],
758+
[ 1.5964888 , 5.98825094, 2. ],
759+
[ 5.2812367 , 3.47800969, 2. ],
760+
[ 2.16315952, 4.7394435 , 2. ],
761+
...
762+
[ 0.6510164 , 9.42381213, 9. ],
763+
[ 1.30291449, 4.50417698, 9. ],
764+
[ 0.62230271, 3.56945324, 10. ],
765+
[ 0.86723965, 1.58599089, 10. ],
766+
[ 6.43135537, 1.00153569, 10. ]])
767+
768+
"""
769+
data = np.asarray(data, dtype=float)
770+
ncols = data.shape[1]
771+
if ncols < 2:
772+
raise ValueError("'data' must have at least 2 columns (2 objectives)")
773+
774+
is_nondom = np.concatenate(
775+
apply_within_sets(
776+
data,
777+
sets,
778+
is_nondominated,
779+
maximise=maximise,
780+
keep_weakly=keep_weakly,
781+
),
782+
dtype=bool,
783+
casting="no",
784+
)
785+
return is_nondom
786+
787+
704788
def filter_dominated(
705789
data, /, *, maximise: bool | list[bool] = False, keep_weakly: bool = False
706790
) -> np.ndarray:
@@ -717,20 +801,23 @@ def filter_dominated(
717801

718802

719803
def filter_dominated_within_sets(
720-
data, /, *, maximise: bool | list[bool] = False, keep_weakly: bool = False
721-
):
804+
data: ArrayLike,
805+
/,
806+
*,
807+
maximise: bool | list[bool] = False,
808+
keep_weakly: bool = False,
809+
) -> np.ndarray:
722810
"""Given a dataset with multiple sets (last column gives the set index), filter dominated points within each set.
723811
724812
Executes the :func:`filter_dominated` function within each set in a dataset \
725-
and returns back a dataset. This is roughly equivalent to partitioning 'data' according to the last column,
813+
and returns back a dataset. This is roughly equivalent to partitioning ``data`` according to the last column,
726814
filtering dominated solutions within each partition, and joining back the result.
727815
728816
Parameters
729817
----------
730-
data : numpy array
731-
Numpy array of numerical values and set numbers, containing multiple sets. For example the output \
732-
of the :func:`read_datasets` function
733-
maximise : single bool, or list of booleans
818+
data :
819+
Numpy array of numerical values and set numbers, containing multiple datasets. For example the output of the :func:`read_datasets` function.
820+
maximise :
734821
Whether the objectives must be maximised instead of minimised. \
735822
Either a single boolean value that applies to all objectives or a list of booleans, with one value per objective. \
736823
Also accepts a 1D numpy array with values 0 or 1 for each objective
@@ -739,8 +826,7 @@ def filter_dominated_within_sets(
739826
740827
Returns
741828
-------
742-
numpy array
743-
A numpy array where each set only contains nondominated points with respect to the set (last column is the set index).
829+
A numpy array where each set only contains nondominated points with respect to the set (last column is the set index).
744830
Points from one set can still dominated points from another set.
745831
746832
Examples
@@ -749,6 +835,19 @@ def filter_dominated_within_sets(
749835
>>> pf_per_set = moocore.filter_dominated_within_sets(x)
750836
>>> len(pf_per_set)
751837
42
838+
>>> pf_per_set # doctest: +ELLIPSIS
839+
array([[ 0.20816431, 4.62275469, 1. ],
840+
[ 0.22997367, 1.11772205, 1. ],
841+
[ 0.58799475, 0.73891181, 1. ],
842+
[ 1.5964888 , 5.98825094, 2. ],
843+
[ 5.2812367 , 3.47800969, 2. ],
844+
[ 2.16315952, 4.7394435 , 2. ],
845+
...
846+
[ 0.6510164 , 9.42381213, 9. ],
847+
[ 1.30291449, 4.50417698, 9. ],
848+
[ 0.62230271, 3.56945324, 10. ],
849+
[ 0.86723965, 1.58599089, 10. ],
850+
[ 6.43135537, 1.00153569, 10. ]])
752851
>>> pf = moocore.filter_dominated(x[:, :-1])
753852
>>> len(pf)
754853
6
@@ -762,26 +861,21 @@ def filter_dominated_within_sets(
762861
763862
See Also
764863
--------
864+
read_datasets : read datasets from a file.
765865
filter_dominated : to be used with a single dataset.
766866
767867
"""
768868
data = np.asarray(data, dtype=float)
769-
ncols = data.shape[1]
770-
if ncols < 3:
869+
if data.shape[1] < 3:
771870
raise ValueError(
772871
"'data' must have at least 3 columns (2 objectives + set column)"
773872
)
774873

775-
is_nondom = np.concatenate(
776-
apply_within_sets(
777-
data[:, :-1],
778-
data[:, -1],
779-
is_nondominated,
780-
maximise=maximise,
781-
keep_weakly=keep_weakly,
782-
),
783-
dtype=bool,
784-
casting="no",
874+
is_nondom = is_nondominated_within_sets(
875+
data[:, :-1],
876+
sets=data[:, -1],
877+
maximise=maximise,
878+
keep_weakly=keep_weakly,
785879
)
786880
return data[is_nondom, :]
787881

@@ -1822,4 +1916,9 @@ def apply_within_sets(x: ArrayLike, sets: ArrayLike, func, **kwargs):
18221916
"""
18231917
x = np.asarray(x)
18241918
sets = np.asarray(sets)
1919+
if x.shape[0] != sets.shape[0]:
1920+
raise ValueError(
1921+
f"'x' and 'sets' must have the same length ({x.shape[0]} != {sets.shape[0]})"
1922+
)
1923+
18251924
return [func(g, **kwargs) for g in groupby(x, sets)]

0 commit comments

Comments
 (0)