wangyiqiu · wangyiqiu · Jun 22, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
diff --git a/include/dbscan/algo.h b/include/dbscan/algo.h
@@ -85,7 +85,14 @@ int DBSCAN(intT n, floatT* PF, double epsilon, intT minPts, bool* coreFlagOut, i
 
   typedef kdTree<dim, pointT> treeT;
   auto trees = newA(treeT*, G->numCell());
-  parallel_for(0, G->numCell(), [&](intT i) {trees[i] = NULL;});
+
+  parallel_for(0, G->numCell(), [&](intT i) {
+    if (ccFlag[i]) {
+        trees[i] = new treeT(G->getCell(i)->getItem(), G->getCell(i)->size(), false);
+    } else {
+        trees[i] = NULL;
+    }
+});
 
   // auto degCmp = [&](intT i, intT j) {
   //                 return G->getCell(i)->size() < G->getCell(j)->size();

diff --git a/include/dbscan/grid.h b/include/dbscan/grid.h
@@ -23,6 +23,7 @@
 
 #pragma once
 
+#include <mutex>
 #include "cell.h"
 #include "point.h"
 #include "shared.h"
@@ -77,6 +78,7 @@ struct grid {
   treeT* tree=NULL;
   intT totalPoints;
   cellBuf **nbrCache;
+  std::mutex* cacheLocks;
 
   /**
   *   Grid constructor.
@@ -89,10 +91,12 @@ struct grid {
 
     cells = newA(cellT, cellCapacity);
     nbrCache = newA(cellBuf*, cellCapacity);
+    cacheLocks = (std::mutex*) malloc(cellCapacity * sizeof(std::mutex));
     parallel_for(0, cellCapacity, [&](intT i) {
-	nbrCache[i] = NULL;
-	cells[i].init();
-      });
+      new (&cacheLocks[i]) std::mutex();
+      nbrCache[i] = NULL;
+      cells[i].init();
+    });
     numCells = 0;
 
     myHash = new cellHashT(pMinn, r);
@@ -101,9 +105,10 @@ struct grid {
 
   ~grid() {
     free(cells);
-    parallel_for(0, numCells, [&](intT i) {
-	if(nbrCache[i]) delete nbrCache[i];
-      });
+    free(cacheLocks);
+    parallel_for(0, cellCapacity, [&](intT i) {
+      if(nbrCache[i]) delete nbrCache[i];
+    });
     free(nbrCache);
     if(myHash) delete myHash;
     if(table) {
@@ -141,14 +146,24 @@ struct grid {
                      }
                    }
                    return false;};//todo, optimize
-    if (nbrCache[bait-cells]) {
-      auto accum = nbrCache[bait-cells];
+    int idx = bait - cells;
+    if (nbrCache[idx]) {
+      auto accum = nbrCache[idx];
       for (auto accum_i : *accum) {
         if(fWrap(accum_i)) break;
       }
     } else {
-      floatT hop = sqrt(dim + 3) * 1.0000001;
-      nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[bait-cells]);
+      // wait for other threads to do their thing then try again
+      std::lock_guard<std::mutex> lock(cacheLocks[idx]);
+      if (nbrCache[idx]) {
+        auto accum = nbrCache[idx];
+        for (auto accum_i : *accum) {
+          if (fWrap(accum_i)) break;
+        }
+      } else {
+        floatT hop = sqrt(dim + 3) * 1.0000001;
+        nbrCache[idx] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[idx]);
+      }
     }
   }
 
@@ -160,14 +175,24 @@ struct grid {
                      return f(cell);
                    return false;
                  };
-    if (nbrCache[bait-cells]) {
-      auto accum = nbrCache[bait-cells];
+    int idx = bait - cells;
+    if (nbrCache[idx]) {
+      auto accum = nbrCache[idx];
       for (auto accum_i : *accum) {
-        if(fWrap(accum_i)) break;
+        if (fWrap(accum_i)) break;
       }
     } else {
-      floatT hop = sqrt(dim + 3) * 1.0000001;
-      nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[bait-cells]);
+      // wait for other threads to do their thing then try again
+      std::lock_guard<std::mutex> lock(cacheLocks[idx]);
+      if (nbrCache[idx]) {
+        auto accum = nbrCache[idx];
+        for (auto accum_i : *accum) {
+          if (fWrap(accum_i)) break;
+        }
+      } else {
+        floatT hop = sqrt(dim + 3) * 1.0000001;
+        nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[idx]);
+      }
     }
   }
 

diff --git a/setup.py b/setup.py
@@ -51,7 +51,6 @@ def initialize_options(self):
         depends=depends,
         py_limited_api=True,
         define_macros=[
-            ('Py_LIMITED_API', '0x03020000'),
             ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION'),
             # ('DBSCAN_VERSION', json.dumps(version)),
         ]

diff --git a/src/dbscanmodule.cpp b/src/dbscanmodule.cpp
@@ -4,6 +4,27 @@
 #include "dbscan/pbbs/parallel.h"
 
 
+static bool scheduler_initialized = false;
+static PyObject* scheduler_cleanup_weakref = nullptr;
+
+static void cleanup_scheduler(PyObject *capsule)
+{
+ if (scheduler_initialized)
+ {
+   parlay::internal::stop_scheduler();
+   scheduler_initialized = false;
+ }
+}
+
+static void ensure_scheduler_initialized()
+{
+   if (!scheduler_initialized)
+   {
+       parlay::internal::start_scheduler();
+       scheduler_initialized = true;
+   }
+}
+
 static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs)
 {
     PyObject *Xobj;
@@ -58,7 +79,7 @@ static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs)
     PyArrayObject* core_samples = (PyArrayObject*)PyArray_SimpleNew(1, &n, NPY_BOOL);
     PyArrayObject* labels = (PyArrayObject*)PyArray_SimpleNew(1, &n, NPY_INT);
 
-    parlay::internal::start_scheduler();
+    ensure_scheduler_initialized();
 
     DBSCAN(
         dim,
@@ -70,9 +91,11 @@ static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs)
         (int*)PyArray_DATA(labels)
     );
 
-    parlay::internal::stop_scheduler();
-
-    return PyTuple_Pack(2, labels, core_samples);
+    PyObject* result_tuple = PyTuple_Pack(2, labels, core_samples);
+    Py_DECREF(X);
+    Py_DECREF(core_samples);
+    Py_DECREF(labels);
+    return result_tuple;
 }
 
 PyDoc_STRVAR(doc_DBSCAN,
@@ -126,6 +149,11 @@ PyInit__dbscan(void)
 #endif
     PyModule_AddIntMacro(module, DBSCAN_MIN_DIMS);
     PyModule_AddIntMacro(module, DBSCAN_MAX_DIMS);
+    PyObject *capsule = PyCapsule_New((void *)module, "dbscan.scheduler", cleanup_scheduler);
+    if (capsule != NULL)
+    {
+       PyModule_AddObject(module, "_scheduler_capsule", capsule);
+    }
 
     return module;
 }