Skip to content

Commit 607417e

Browse files
committed
O(n) cluster renumbering via prefix sum, replace O(n log n) sort + hash table
1 parent 4a4025f commit 607417e

1 file changed

Lines changed: 18 additions & 55 deletions

File tree

include/dbscan/algo.h

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -165,67 +165,30 @@ int DBSCAN(intT n, floatT* PF, double epsilon, intT minPts, bool* coreFlagOut, i
165165
delete G;
166166

167167
//improving cluster representation
168-
auto cluster2 = newA(intT, n);
169-
auto flag = newA(intT, n+1);
170-
parallel_for(0, n, [&](intT i){cluster2[i] = cluster[i];});
171-
sampleSort(cluster, n, std::less<intT>());
172-
173-
flag[0] = 1;
174-
parallel_for(1, n, [&](intT i){
175-
if (cluster[i] != cluster[i-1])
176-
flag[i] = 1;
177-
else
178-
flag[i] = 0;
179-
});
180-
flag[n] = sequence::prefixSum(flag, 0, n);
181-
182-
// typedef pair<intT,intT> eType;
183-
struct myPair {
184-
intT first;
185-
intT second;
186-
myPair(intT _first, intT _second): first(_first), second(_second) {}
187-
myPair(): first(-1), second(-1) {}
188-
inline bool operator==(myPair a) {
189-
if(a.first==first && a.second== second)
190-
return true;
191-
else
192-
return false;
193-
}
194-
};
195-
196-
typedef Table<hashSimplePair<myPair>,intT> tableT;
197-
auto T = new tableT(n, hashSimplePair<myPair>());
198-
parallel_for(0, n, [&] (intT i) {
199-
if (flag[i] != flag[i+1]) {
200-
// T->insert(make_pair(cluster[i], flag[i]));
201-
T->insert(myPair(cluster[i], flag[i]));
202-
}
203-
});
168+
// O(n) renumbering: cluster IDs are point indices ∈ [0,n), use prefix sum
169+
// instead of O(n log n) sort + hash table.
170+
auto idMap = newA(intT, n);
171+
parallel_for(0, n, [&](intT i) { idMap[i] = 0; });
172+
parallel_for(0, n, [&](intT i) {
173+
if (cluster[i] >= 0) idMap[cluster[i]] = 1;
174+
});
175+
sequence::prefixSum(idMap, 0, n); // exclusive: idMap[cid] = sequential ID for cid
204176

205-
if(T->find(-1).second < 0) {
206-
parallel_for(0, n, [&](intT i){
207-
cluster2[i] = T->find(cluster2[i]).second;
208-
});
209-
} else {
210-
parallel_for(0, n, [&](intT i){
211-
if (cluster2[i] > 0)
212-
cluster2[i] = T->find(cluster2[i]).second-1;
213-
});
214-
}
177+
// Remap to sequential IDs (separate buffer to avoid read/write conflict)
178+
auto cluster2 = newA(intT, n);
179+
parallel_for(0, n, [&](intT i) {
180+
cluster2[i] = (cluster[i] >= 0) ? idMap[cluster[i]] : cluster[i];
181+
});
215182

216183
//restoring order
217-
parallel_for(0, n, [&](intT i){
218-
cluster[I[i]] = cluster2[i];
219-
});
220-
parallel_for(0, n, [&](intT i){
221-
coreFlagOut[I[i]] = coreFlag[i];
222-
});
184+
parallel_for(0, n, [&](intT i) {
185+
cluster[I[i]] = cluster2[i];
186+
coreFlagOut[I[i]] = coreFlag[i];
187+
});
223188

224189
free(I);
225190
free(cluster2);
226-
free(flag);
227-
T->del(); // Required to clean-up T's internals
228-
delete T;
191+
free(idMap);
229192
free(P);
230193
#ifdef VERBOSE
231194
cout << "output-time = " << tt.stop() << endl;

0 commit comments

Comments
 (0)