2929#include " pbbs/parallel.h"
3030#include " pbbs/utils.h"
3131
32+ // r holds squared distance; using distSqr and nodeDistanceSqr avoids sqrt in hot path
3233template <class nodeT , class objT >
3334inline void compBcpCoreHSerial (nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
34- if (n1->nodeDistance (n2) > *r) return ;
35+ if (n1->nodeDistanceSqr (n2) > *r) return ;
3536
3637 if (n1->isLeaf () && n2->isLeaf ()) {// basecase
3738 for (intT i=0 ; i<n1->size (); ++i) {
3839 for (intT j=0 ; j<n2->size (); ++j) {
3940 auto pi = n1->getItem (i);
4041 auto pj = n2->getItem (j);
4142 if (coreFlag[pi - P] && coreFlag[pj - P]) {
42- floatT dist = pi->dist (*pj);
43+ floatT dist = pi->distSqr (*pj);
4344 r[0 ] = min (r[0 ], dist);
4445 }
4546 }
@@ -78,30 +79,31 @@ inline void compBcpCoreHSerial(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag,
7879
7980template <class nodeT , class objT >
8081inline void compBcpCoreHBase (nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
81- if (n1->nodeDistance (n2) > *r) return ;
82+ if (n1->nodeDistanceSqr (n2) > *r) return ;
8283
8384 if (n1->isLeaf () && n2->isLeaf ()) {// basecase
8485 for (intT i=0 ; i<n1->size (); ++i) {
8586 for (intT j=0 ; j<n2->size (); ++j) {
8687 auto pi = n1->getItem (i);
8788 auto pj = n2->getItem (j);
8889 if (coreFlag[pi - P] && coreFlag[pj - P]) {
89- floatT dist = pi->dist (*pj);
90+ floatT dist = pi->distSqr (*pj);
9091 utils::writeMin (r, dist);
9192 }
9293 }
9394 }
94- } else {// recursive, todo consider call order, might help
95+ } else {// recursive
9596 if (n1->isLeaf ()) {
96- if (n1->nodeDistance (n2->L ()) < n1->nodeDistance (n2->R ())) {
97+ // nodeDistanceSqr avoids sqrt; monotonicity preserves ordering
98+ if (n1->nodeDistanceSqr (n2->L ()) < n1->nodeDistanceSqr (n2->R ())) {
9799 compBcpCoreH (n1, n2->L (), r, coreFlag, P);
98100 compBcpCoreH (n1, n2->R (), r, coreFlag, P);
99101 } else {
100102 compBcpCoreH (n1, n2->R (), r, coreFlag, P);
101103 compBcpCoreH (n1, n2->L (), r, coreFlag, P);
102104 }
103105 } else if (n2->isLeaf ()) {
104- if (n2->nodeDistance (n1->L ()) < n2->nodeDistance (n1->R ())) {
106+ if (n2->nodeDistanceSqr (n1->L ()) < n2->nodeDistanceSqr (n1->R ())) {
105107 compBcpCoreH (n2, n1->L (), r, coreFlag, P);
106108 compBcpCoreH (n2, n1->R (), r, coreFlag, P);
107109 } else {
@@ -115,7 +117,7 @@ inline void compBcpCoreHBase(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, ob
115117 ordering[2 ] = make_pair (n2->L (), n1->R ());
116118 ordering[3 ] = make_pair (n2->R (), n1->R ());
117119 auto bbd = [&](pair<nodeT*,nodeT*> p1, pair<nodeT*,nodeT*> p2) {
118- return p1.first ->nodeDistance (p1.second ) < p2.first ->nodeDistance (p2.second );};
120+ return p1.first ->nodeDistanceSqr (p1.second ) < p2.first ->nodeDistanceSqr (p2.second );};
119121 quickSortSerial (ordering, 4 , bbd);
120122 for (intT o=0 ; o<4 ; ++o) {
121123 compBcpCoreH (ordering[o].first , ordering[o].second , r, coreFlag, P);}
@@ -125,21 +127,21 @@ inline void compBcpCoreHBase(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, ob
125127
126128template <class nodeT , class objT >
127129inline void compBcpCoreH (nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
128- if (n1->nodeDistance (n2) > *r) return ;
130+ if (n1->nodeDistanceSqr (n2) > *r) return ;
129131
130132 if ((n1->isLeaf () && n2->isLeaf ()) || (n1->size ()+n2->size () < 2000 )) {
131133 return compBcpCoreHBase (n1, n2, r, coreFlag, P);
132- } else {// recursive, todo consider call order, might help
134+ } else {// recursive
133135 if (n1->isLeaf ()) {
134- if (n1->nodeDistance (n2->L ()) < n1->nodeDistance (n2->R ())) {
136+ if (n1->nodeDistanceSqr (n2->L ()) < n1->nodeDistanceSqr (n2->R ())) {
135137 par_do ([&](){compBcpCoreH (n1, n2->L (), r, coreFlag, P);},
136138 [&](){compBcpCoreH (n1, n2->R (), r, coreFlag, P);});
137139 } else {
138140 par_do ([&](){compBcpCoreH (n1, n2->R (), r, coreFlag, P);},
139141 [&](){compBcpCoreH (n1, n2->L (), r, coreFlag, P);});
140142 }
141143 } else if (n2->isLeaf ()) {
142- if (n2->nodeDistance (n1->L ()) < n2->nodeDistance (n1->R ())) {
144+ if (n2->nodeDistanceSqr (n1->L ()) < n2->nodeDistanceSqr (n1->R ())) {
143145 par_do ([&](){compBcpCoreH (n2, n1->L (), r, coreFlag, P);},
144146 [&](){compBcpCoreH (n2, n1->R (), r, coreFlag, P);});
145147 } else {
@@ -153,7 +155,7 @@ inline void compBcpCoreH(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT*
153155 ordering[2 ] = make_pair (n2->L (), n1->R ());
154156 ordering[3 ] = make_pair (n2->R (), n1->R ());
155157 auto bbd = [&](pair<nodeT*,nodeT*> p1, pair<nodeT*,nodeT*> p2) {
156- return p1.first ->nodeDistance (p1.second ) < p2.first ->nodeDistance (p2.second );};
158+ return p1.first ->nodeDistanceSqr (p1.second ) < p2.first ->nodeDistanceSqr (p2.second );};
157159 quickSortSerial (ordering, 4 , bbd);
158160 parallel_for (0 , 4 , [&](intT o) {
159161 compBcpCoreH (ordering[o].first , ordering[o].second , r, coreFlag, P);}, 1 );
@@ -179,11 +181,11 @@ inline bool hasEdge(intT n1, intT n2, intT* coreFlag, objT* P, floatT epsilon, c
179181
180182 if (!trees[n1])
181183 trees[n1] = new treeT (cells[n1].getItem (), cells[n1].size (), false );// todo allocation, parallel
182- if (!trees[n2])
184+ if (!trees[n2])
183185 trees[n2] = new treeT (cells[n2].getItem (), cells[n2].size (), false );// todo allocation, parallel
184186 floatT r = floatMax ();
185187 compBcpCoreH (trees[n1]->rootNode (), trees[n2]->rootNode (), &r, coreFlag, P);
186- return r <= epsilon;
188+ return r <= epsilon * epsilon; // r holds squared distance now
187189}
188190
189191#endif
0 commit comments