3535#include "common.h"
3636#include "hv.h"
3737#define HV_RECURSIVE
38- #include "hv4d_priv .h"
38+ #include "hvc4d_priv .h"
3939
4040#define STOP_DIMENSION 3 // stop on dimension 4.
4141#define MAX_ROWS_HV_INEX 15
@@ -64,7 +64,11 @@ fpli_setup_cdllist(const double * restrict data, dimension_t d,
6464 dimension_t d_stop = d - STOP_DIMENSION ;
6565 size_t n = * size ;
6666
67- dlnode_t * head = malloc ((n + 1 ) * sizeof (* head ));
67+ // Reserve space for main CDLL used by hv_recursive() and for the auxiliary
68+ // list used by onec4dplusU(). The main CDLL will store all points + 1
69+ // sentinel. The auxiliary list will store at most n - 1 points + 1
70+ // sentinel.
71+ dlnode_t * head = malloc ((n + 1 + n ) * sizeof (* head ));
6872 size_t i = 1 ;
6973 for (size_t j = 0 ; j < n ; j ++ ) {
7074 /* Filters those points that do not strictly dominate the reference
@@ -85,9 +89,10 @@ fpli_setup_cdllist(const double * restrict data, dimension_t d,
8589 // We need space in r_next and r_prev for dimension 5 and above (d_stop - 1).
8690 head -> r_next = malloc (2 * (d_stop - 1 ) * (n + 1 ) * sizeof (head ));
8791 head -> r_prev = head -> r_next + (d_stop - 1 ) * (n + 1 );
88- // We only need space in area and vol for dimension 4 and above.
89- head -> area = malloc (2 * d_stop * (n + 1 ) * sizeof (* data ));
90- head -> vol = head -> area + d_stop * (n + 1 );
92+ // We only need space in area and vol for dimension 4 and above (d-stop).
93+ // Also reserve space for n-1 auxiliary 3D points used by onec4dplusU().
94+ head -> area = malloc ((2 * d_stop * n + 3 * (n - 1 )) * sizeof (* data ));
95+ head -> vol = head -> area + d_stop * n ;
9196 head -> x = NULL ; // head contains no data
9297 head -> ignore = 0 ; // should never get used
9398
@@ -96,41 +101,38 @@ fpli_setup_cdllist(const double * restrict data, dimension_t d,
96101 // Link head and list4d; head is not used by HV4D, so next[0] and prev[0]
97102 // should remain untouched.
98103 head -> next [0 ] = list4d ;
99- head -> prev [0 ] = list4d ; // Save it twice so we can use assert() later.
104+
105+ // Setup the auxiliary list used in onec4dplusU().
106+ dlnode_t * list_aux = head + n + 1 ;
107+ // Setup the auxiliary 3D points used in onec4dplusU().
108+ list_aux -> vol = head -> vol + d_stop * n ;
109+ head -> prev [0 ] = list_aux ;
110+ list_aux -> next [0 ] = list4d ;
100111
101112 for (i = 1 ; i <= n ; i ++ ) {
102113 // Shift x because qsort() cannot take the dimension to sort as an argument.
103114 head [i ].x += d - 1 ;
104115 head [i ].ignore = 0 ;
105116 head [i ].r_next = head -> r_next + i * (d_stop - 1 );
106117 head [i ].r_prev = head -> r_prev + i * (d_stop - 1 );
107- head [i ].area = head -> area + i * d_stop ;
108- head [i ].vol = head -> vol + i * d_stop ;
118+ head [i ].area = head -> area + ( i - 1 ) * d_stop ;
119+ head [i ].vol = head -> vol + ( i - 1 ) * d_stop ;
109120 }
121+ // Make sure they are not used.
122+ head -> area = NULL ;
123+ head -> vol = NULL ;
110124
111125 dlnode_t * * scratch = malloc (n * sizeof (* scratch ));
112126 for (i = 0 ; i < n ; i ++ )
113127 scratch [i ] = head + 1 + i ;
114128
115- int j = d_stop - 2 ;
116- while (true) {
129+ for (int j = d_stop - 2 ; j >= 0 ; j -- ) {
117130 /* FIXME: replace qsort() by something better:
118131 https://github.com/numpy/x86-simd-sort
119132 https://github.com/google/highway/tree/52a2d98d07852c5d69284e175666e5f8cc7d8285/hwy/contrib/sort
120133 */
121134 // Sort each dimension independently.
122135 qsort (scratch , n , sizeof (* scratch ), compare_node );
123- if (j == -1 ) {
124- (list4d + 1 )-> next [1 ] = scratch [0 ];
125- scratch [0 ]-> prev [1 ] = list4d + 1 ;
126- for (i = 1 ; i < n ; i ++ ) {
127- scratch [i - 1 ]-> next [1 ] = scratch [i ];
128- scratch [i ]-> prev [1 ] = scratch [i - 1 ];
129- }
130- scratch [n - 1 ]-> next [1 ] = list4d + 2 ;
131- (list4d + 2 )-> prev [1 ] = scratch [n - 1 ];
132- break ;
133- }
134136 head -> r_next [j ] = scratch [0 ];
135137 scratch [0 ]-> r_prev [j ] = head ;
136138 for (i = 1 ; i < n ; i ++ ) {
@@ -139,33 +141,46 @@ fpli_setup_cdllist(const double * restrict data, dimension_t d,
139141 }
140142 scratch [n - 1 ]-> r_next [j ] = head ;
141143 head -> r_prev [j ] = scratch [n - 1 ];
142- j -- ;
143144 // Consider next objective (in reverse order).
144145 for (i = 1 ; i <= n ; i ++ )
145146 head [i ].x -- ;
146147 }
147- // Reset x to point to the first objective.
148- for (i = 1 ; i <= n ; i ++ )
149- head [i ].x -= STOP_DIMENSION ;
150148
149+ for (int j = 1 ; j >= 0 ; j -- ) {
150+ // Sort each dimension independently.
151+ qsort (scratch , n , sizeof (* scratch ), compare_node );
152+ (list4d + 1 )-> next [j ] = scratch [0 ];
153+ scratch [0 ]-> prev [j ] = list4d + 1 ;
154+ for (i = 1 ; i < n ; i ++ ) {
155+ scratch [i - 1 ]-> next [j ] = scratch [i ];
156+ scratch [i ]-> prev [j ] = scratch [i - 1 ];
157+ }
158+ scratch [n - 1 ]-> next [j ] = list4d + 2 ;
159+ (list4d + 2 )-> prev [j ] = scratch [n - 1 ];
160+ if (j > 0 ) {
161+ // Consider next objective (in reverse order).
162+ for (i = 1 ; i <= n ; i ++ )
163+ head [i ].x -- ;
164+ } else {
165+ // Reset x to point to the first objective.
166+ for (i = 1 ; i <= n ; i ++ )
167+ head [i ].x -= STOP_DIMENSION - 1 ;
168+ }
169+ }
151170 free (scratch );
152171
153- // Make sure it is not used.
154- ASAN_POISON_MEMORY_REGION (head -> area , sizeof (* data ) * d_stop );
155- ASAN_POISON_MEMORY_REGION (head -> vol , sizeof (* data ) * d_stop );
156-
157172finish :
158173 * size = n ;
159174 return head ;
160175}
161176
162177static void fpli_free_cdllist (dlnode_t * head )
163178{
164- assert (head -> next [0 ] == head -> prev [0 ]);
179+ assert (head -> next [0 ] == head -> prev [0 ]-> next [ 0 ] );
165180 free_cdllist (head -> next [0 ]); // free 4D sentinels
166181 free (head -> r_next );
167- free (head -> area );
168- free (head );
182+ free (head [ 1 ]. area ); // Free ->area, ->vol and list_aux->vol (x_aux used by 4D basecase).
183+ free (head ); // Free main CDLL and list_aux (4D basecase).
169184}
170185
171186static inline void
@@ -180,6 +195,34 @@ update_bound(double * restrict bound, const double * restrict x, dimension_t dim
180195 }
181196}
182197
198+ static void
199+ delete_4d (dlnode_t * restrict nodep )
200+ {
201+ nodep -> prev [1 ]-> next [1 ] = nodep -> next [1 ];
202+ nodep -> next [1 ]-> prev [1 ] = nodep -> prev [1 ];
203+ }
204+
205+ static void
206+ delete_3d (dlnode_t * restrict nodep )
207+ {
208+ nodep -> prev [0 ]-> next [0 ] = nodep -> next [0 ];
209+ nodep -> next [0 ]-> prev [0 ] = nodep -> prev [0 ];
210+ }
211+
212+ static void
213+ reinsert_4d (dlnode_t * restrict nodep )
214+ {
215+ nodep -> prev [1 ]-> next [1 ] = nodep ;
216+ nodep -> next [1 ]-> prev [1 ] = nodep ;
217+ }
218+
219+ static void
220+ reinsert_3d (dlnode_t * restrict nodep )
221+ {
222+ nodep -> prev [0 ]-> next [0 ] = nodep ;
223+ nodep -> next [0 ]-> prev [0 ] = nodep ;
224+ }
225+
183226static void
184227delete_dom (dlnode_t * restrict nodep , dimension_t dim )
185228{
@@ -190,9 +233,8 @@ delete_dom(dlnode_t * restrict nodep, dimension_t dim)
190233 nodep -> r_prev [d ]-> r_next [d ] = nodep -> r_next [d ];
191234 nodep -> r_next [d ]-> r_prev [d ] = nodep -> r_prev [d ];
192235 }
193- // Dimension 4.
194- nodep -> prev [1 ]-> next [1 ] = nodep -> next [1 ];
195- nodep -> next [1 ]-> prev [1 ] = nodep -> prev [1 ];
236+ delete_4d (nodep );
237+ delete_3d (nodep );
196238}
197239
198240static void
@@ -213,9 +255,8 @@ reinsert_nobound(dlnode_t * restrict nodep, dimension_t dim)
213255 nodep -> r_prev [d ]-> r_next [d ] = nodep ;
214256 nodep -> r_next [d ]-> r_prev [d ] = nodep ;
215257 }
216- // Dimension 4.
217- nodep -> prev [1 ]-> next [1 ] = nodep ;
218- nodep -> next [1 ]-> prev [1 ] = nodep ;
258+ reinsert_4d (nodep );
259+ reinsert_3d (nodep );
219260}
220261
221262static void
@@ -226,15 +267,14 @@ reinsert(dlnode_t * restrict nodep, dimension_t dim, double * restrict bound)
226267}
227268
228269static double
229- fpli_hv4d (dlnode_t * restrict list , size_t c _attr_maybe_unused )
270+ fpli_onec4d (dlnode_t * restrict list , size_t c _attr_maybe_unused , dlnode_t * restrict the_point )
230271{
231272 ASSUME (c > 1 );
232- assert (list -> next [0 ] == list -> prev [0 ]);
273+ assert (list -> next [0 ] == list -> prev [0 ]-> next [ 0 ] );
233274 dlnode_t * restrict list4d = list -> next [0 ];
234- // hv4dplusU() will change the sentinels for 3D, so we need to reset them.
235- reset_sentinels_3d (list4d );
236- double hv = hv4dplusU (list4d );
237- return hv ;
275+ dlnode_t * restrict list_aux = list -> prev [0 ];
276+ double contrib = onec4dplusU (list4d , list_aux , the_point );
277+ return contrib ;
238278}
239279
240280_attr_optimize_finite_and_associative_math // Required for auto-vectorization: https://gcc.gnu.org/PR122687
@@ -248,14 +288,6 @@ one_point_hv(const double * restrict x, const double * restrict ref, dimension_t
248288 return hv ;
249289}
250290
251- _attr_optimize_finite_and_associative_math
252- static inline void
253- upper_bound (double * restrict dest , const double * restrict a , const double * restrict b , dimension_t dim )
254- {
255- for (dimension_t i = 0 ; i < dim ; i ++ )
256- dest [i ] = MAX (a [i ], b [i ]);
257- }
258-
259291_attr_optimize_finite_and_associative_math
260292static double
261293hv_two_points (const double * restrict x1 , const double * restrict x2 ,
@@ -429,7 +461,9 @@ hv_recursive(dlnode_t * restrict list, dimension_t dim, size_t c,
429461 ASSUME (dim - 1 >= STOP_DIMENSION );
430462 if (dim - 1 == STOP_DIMENSION ) {
431463 // base case of dimension 4.
432- hypera = fpli_hv4d (list , c );
464+ hypera = fpli_onec4d (list , c , p1 );
465+ // hypera only has the contribution of p1.
466+ hypera += p1_prev -> area [d_stop ];
433467 } else {
434468 hypera = hv_recursive (list , dim - 1 , c , ref , bound );
435469 }
@@ -505,7 +539,9 @@ fpli_hv_ge5d(dlnode_t * restrict list, dimension_t dim, size_t c,
505539 ASSUME (dim - 1 >= STOP_DIMENSION );
506540 if (dim - 1 == STOP_DIMENSION ) {
507541 // base case of dimension 4.
508- hypera = fpli_hv4d (list , c );
542+ hypera = fpli_onec4d (list , c , p1 );
543+ // hypera only has the contribution of p1.
544+ hypera += p1_prev -> area [d_stop ];
509545 } else {
510546 hypera = hv_recursive (list , dim - 1 , c , ref , bound );
511547 }
0 commit comments