@@ -701,6 +701,90 @@ def is_nondominated(
701701 return np .frombuffer (nondom , dtype = bool )
702702
703703
704+ def is_nondominated_within_sets (
705+ data : ArrayLike ,
706+ / ,
707+ sets : ArrayLike ,
708+ * ,
709+ maximise : bool | list [bool ] = False ,
710+ keep_weakly : bool = False ,
711+ ) -> np .ndarray :
712+ r"""Identify dominated points according to Pareto optimality within each set.
713+
714+ Executes the :func:`is_nondominated` function within each set in a dataset \
715+ and returns back a 1D array of booleans.
716+
717+ Parameters
718+ ----------
719+ data :
720+ Array of numerical values, where each row gives the coordinates of a point in objective space.
721+ If the array is created by the :func:`read_datasets()` function, remove the last column.
722+ sets :
723+ 1D vector or list of values that define the sets to which each row of ``data`` belongs.
724+ maximise :
725+ Whether the objectives must be maximised instead of minimised.
726+ Either a single boolean value that applies to all objectives or a list of boolean values, with one value per objective.
727+ Also accepts a 1D numpy array with value 0/1 for each objective.
728+ keep_weakly:
729+ If ``False``, return ``False`` for any duplicates of nondominated points.
730+
731+ Returns
732+ -------
733+ Returns a boolean array of the same length as the number of rows of data,
734+ where ``True`` means that the point is not dominated by any other point.
735+
736+ See Also
737+ --------
738+ filter_dominated_within_sets : to filter out dominated points.
739+
740+ Examples
741+ --------
742+ >>> x = moocore.get_dataset("input1.dat")
743+ >>> nondom_per_set = moocore.is_nondominated_within_sets(x[:, :-1], x[:, -1])
744+ >>> len(nondom_per_set)
745+ 100
746+ >>> nondom_per_set # doctest: +ELLIPSIS
747+ array([False, False, True, False, True, False, False, False, False,
748+ True, False, True, True, True, False, True, True, True,
749+ False, True, False, False, False, False, True, False, True,
750+ ...
751+ True, True, True, False, True, False, True, True, False,
752+ True, False, False, True, True, False, False, False, False,
753+ False])
754+ >>> x[nondom_per_set, :] # doctest: +ELLIPSIS
755+ array([[ 0.20816431, 4.62275469, 1. ],
756+ [ 0.22997367, 1.11772205, 1. ],
757+ [ 0.58799475, 0.73891181, 1. ],
758+ [ 1.5964888 , 5.98825094, 2. ],
759+ [ 5.2812367 , 3.47800969, 2. ],
760+ [ 2.16315952, 4.7394435 , 2. ],
761+ ...
762+ [ 0.6510164 , 9.42381213, 9. ],
763+ [ 1.30291449, 4.50417698, 9. ],
764+ [ 0.62230271, 3.56945324, 10. ],
765+ [ 0.86723965, 1.58599089, 10. ],
766+ [ 6.43135537, 1.00153569, 10. ]])
767+
768+ """
769+ data = np .asarray (data , dtype = float )
770+ ncols = data .shape [1 ]
771+ if ncols < 2 :
772+ raise ValueError ("'data' must have at least 2 columns (2 objectives)" )
773+
774+ is_nondom = np .concatenate (
775+ apply_within_sets (
776+ data ,
777+ sets ,
778+ is_nondominated ,
779+ maximise = maximise ,
780+ keep_weakly = keep_weakly ,
781+ ),
782+ dtype = bool ,
783+ casting = "no" ,
784+ )
785+ return is_nondom
786+
787+
704788def filter_dominated (
705789 data , / , * , maximise : bool | list [bool ] = False , keep_weakly : bool = False
706790) -> np .ndarray :
@@ -717,20 +801,23 @@ def filter_dominated(
717801
718802
719803def filter_dominated_within_sets (
720- data , / , * , maximise : bool | list [bool ] = False , keep_weakly : bool = False
721- ):
804+ data : ArrayLike ,
805+ / ,
806+ * ,
807+ maximise : bool | list [bool ] = False ,
808+ keep_weakly : bool = False ,
809+ ) -> np .ndarray :
722810 """Given a dataset with multiple sets (last column gives the set index), filter dominated points within each set.
723811
724812 Executes the :func:`filter_dominated` function within each set in a dataset \
725- and returns back a dataset. This is roughly equivalent to partitioning ' data' according to the last column,
813+ and returns back a dataset. This is roughly equivalent to partitioning `` data`` according to the last column,
726814 filtering dominated solutions within each partition, and joining back the result.
727815
728816 Parameters
729817 ----------
730- data : numpy array
731- Numpy array of numerical values and set numbers, containing multiple sets. For example the output \
732- of the :func:`read_datasets` function
733- maximise : single bool, or list of booleans
818+ data :
819+ Numpy array of numerical values and set numbers, containing multiple datasets. For example the output of the :func:`read_datasets` function.
820+ maximise :
734821 Whether the objectives must be maximised instead of minimised. \
735822 Either a single boolean value that applies to all objectives or a list of booleans, with one value per objective. \
736823 Also accepts a 1D numpy array with values 0 or 1 for each objective
@@ -739,8 +826,7 @@ def filter_dominated_within_sets(
739826
740827 Returns
741828 -------
742- numpy array
743- A numpy array where each set only contains nondominated points with respect to the set (last column is the set index).
829+ A numpy array where each set only contains nondominated points with respect to the set (last column is the set index).
744830 Points from one set can still dominated points from another set.
745831
746832 Examples
@@ -749,6 +835,19 @@ def filter_dominated_within_sets(
749835 >>> pf_per_set = moocore.filter_dominated_within_sets(x)
750836 >>> len(pf_per_set)
751837 42
838+ >>> pf_per_set # doctest: +ELLIPSIS
839+ array([[ 0.20816431, 4.62275469, 1. ],
840+ [ 0.22997367, 1.11772205, 1. ],
841+ [ 0.58799475, 0.73891181, 1. ],
842+ [ 1.5964888 , 5.98825094, 2. ],
843+ [ 5.2812367 , 3.47800969, 2. ],
844+ [ 2.16315952, 4.7394435 , 2. ],
845+ ...
846+ [ 0.6510164 , 9.42381213, 9. ],
847+ [ 1.30291449, 4.50417698, 9. ],
848+ [ 0.62230271, 3.56945324, 10. ],
849+ [ 0.86723965, 1.58599089, 10. ],
850+ [ 6.43135537, 1.00153569, 10. ]])
752851 >>> pf = moocore.filter_dominated(x[:, :-1])
753852 >>> len(pf)
754853 6
@@ -762,26 +861,21 @@ def filter_dominated_within_sets(
762861
763862 See Also
764863 --------
864+ read_datasets : read datasets from a file.
765865 filter_dominated : to be used with a single dataset.
766866
767867 """
768868 data = np .asarray (data , dtype = float )
769- ncols = data .shape [1 ]
770- if ncols < 3 :
869+ if data .shape [1 ] < 3 :
771870 raise ValueError (
772871 "'data' must have at least 3 columns (2 objectives + set column)"
773872 )
774873
775- is_nondom = np .concatenate (
776- apply_within_sets (
777- data [:, :- 1 ],
778- data [:, - 1 ],
779- is_nondominated ,
780- maximise = maximise ,
781- keep_weakly = keep_weakly ,
782- ),
783- dtype = bool ,
784- casting = "no" ,
874+ is_nondom = is_nondominated_within_sets (
875+ data [:, :- 1 ],
876+ sets = data [:, - 1 ],
877+ maximise = maximise ,
878+ keep_weakly = keep_weakly ,
785879 )
786880 return data [is_nondom , :]
787881
@@ -1822,4 +1916,9 @@ def apply_within_sets(x: ArrayLike, sets: ArrayLike, func, **kwargs):
18221916 """
18231917 x = np .asarray (x )
18241918 sets = np .asarray (sets )
1919+ if x .shape [0 ] != sets .shape [0 ]:
1920+ raise ValueError (
1921+ f"'x' and 'sets' must have the same length ({ x .shape [0 ]} != { sets .shape [0 ]} )"
1922+ )
1923+
18251924 return [func (g , ** kwargs ) for g in groupby (x , sets )]
0 commit comments