-
Notifications
You must be signed in to change notification settings - Fork 20
Fix memory leaks and performance improvements #22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
|
|
||
| #pragma once | ||
|
|
||
| #include <mutex> | ||
| #include "cell.h" | ||
| #include "point.h" | ||
| #include "shared.h" | ||
|
|
@@ -77,6 +78,7 @@ struct grid { | |
| treeT* tree=NULL; | ||
| intT totalPoints; | ||
| cellBuf **nbrCache; | ||
| std::mutex* cacheLocks; | ||
|
|
||
| /** | ||
| * Grid constructor. | ||
|
|
@@ -89,10 +91,12 @@ struct grid { | |
|
|
||
| cells = newA(cellT, cellCapacity); | ||
| nbrCache = newA(cellBuf*, cellCapacity); | ||
| cacheLocks = (std::mutex*) malloc(cellCapacity * sizeof(std::mutex)); | ||
| parallel_for(0, cellCapacity, [&](intT i) { | ||
| nbrCache[i] = NULL; | ||
| cells[i].init(); | ||
| }); | ||
| new (&cacheLocks[i]) std::mutex(); | ||
| nbrCache[i] = NULL; | ||
| cells[i].init(); | ||
| }); | ||
| numCells = 0; | ||
|
|
||
| myHash = new cellHashT(pMinn, r); | ||
|
|
@@ -101,9 +105,10 @@ struct grid { | |
|
|
||
| ~grid() { | ||
| free(cells); | ||
| parallel_for(0, numCells, [&](intT i) { | ||
| if(nbrCache[i]) delete nbrCache[i]; | ||
| }); | ||
| free(cacheLocks); | ||
| parallel_for(0, cellCapacity, [&](intT i) { | ||
| if(nbrCache[i]) delete nbrCache[i]; | ||
| }); | ||
| free(nbrCache); | ||
| if(myHash) delete myHash; | ||
| if(table) { | ||
|
|
@@ -141,14 +146,24 @@ struct grid { | |
| } | ||
| } | ||
| return false;};//todo, optimize | ||
| if (nbrCache[bait-cells]) { | ||
| auto accum = nbrCache[bait-cells]; | ||
| int idx = bait - cells; | ||
| if (nbrCache[idx]) { | ||
| auto accum = nbrCache[idx]; | ||
| for (auto accum_i : *accum) { | ||
| if(fWrap(accum_i)) break; | ||
| } | ||
| } else { | ||
| floatT hop = sqrt(dim + 3) * 1.0000001; | ||
| nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[bait-cells]); | ||
| // wait for other threads to do their thing then try again | ||
| std::lock_guard<std::mutex> lock(cacheLocks[idx]); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thank you. Did you notice any performance degradation as a result of using mutex?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not notice a change in the performance. Threads should now be doing less unnecessary work, which should overcome the performance cost of mutex. |
||
| if (nbrCache[idx]) { | ||
| auto accum = nbrCache[idx]; | ||
| for (auto accum_i : *accum) { | ||
| if (fWrap(accum_i)) break; | ||
| } | ||
| } else { | ||
| floatT hop = sqrt(dim + 3) * 1.0000001; | ||
| nbrCache[idx] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[idx]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -160,14 +175,24 @@ struct grid { | |
| return f(cell); | ||
| return false; | ||
| }; | ||
| if (nbrCache[bait-cells]) { | ||
| auto accum = nbrCache[bait-cells]; | ||
| int idx = bait - cells; | ||
| if (nbrCache[idx]) { | ||
| auto accum = nbrCache[idx]; | ||
| for (auto accum_i : *accum) { | ||
| if(fWrap(accum_i)) break; | ||
| if (fWrap(accum_i)) break; | ||
| } | ||
| } else { | ||
| floatT hop = sqrt(dim + 3) * 1.0000001; | ||
| nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[bait-cells]); | ||
| // wait for other threads to do their thing then try again | ||
| std::lock_guard<std::mutex> lock(cacheLocks[idx]); | ||
| if (nbrCache[idx]) { | ||
| auto accum = nbrCache[idx]; | ||
| for (auto accum_i : *accum) { | ||
| if (fWrap(accum_i)) break; | ||
| } | ||
| } else { | ||
| floatT hop = sqrt(dim + 3) * 1.0000001; | ||
| nbrCache[bait-cells] = tree->rangeNeighbor(bait, r * hop, fStop, fWrap, true, nbrCache[idx]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,27 @@ | |
| #include "dbscan/pbbs/parallel.h" | ||
|
|
||
|
|
||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for fixing this!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem! I also have a fix for Arm CPU crashing, which I will upload later. There is also a very rare segmentation fault (~1 in a million chance) that I am looking into. |
||
| static bool scheduler_initialized = false; | ||
| static PyObject* scheduler_cleanup_weakref = nullptr; | ||
|
|
||
| static void cleanup_scheduler(PyObject *capsule) | ||
| { | ||
| if (scheduler_initialized) | ||
| { | ||
| parlay::internal::stop_scheduler(); | ||
| scheduler_initialized = false; | ||
| } | ||
| } | ||
|
|
||
| static void ensure_scheduler_initialized() | ||
| { | ||
| if (!scheduler_initialized) | ||
| { | ||
| parlay::internal::start_scheduler(); | ||
| scheduler_initialized = true; | ||
| } | ||
| } | ||
|
|
||
| static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs) | ||
| { | ||
| PyObject *Xobj; | ||
|
|
@@ -58,7 +79,7 @@ static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs) | |
| PyArrayObject* core_samples = (PyArrayObject*)PyArray_SimpleNew(1, &n, NPY_BOOL); | ||
| PyArrayObject* labels = (PyArrayObject*)PyArray_SimpleNew(1, &n, NPY_INT); | ||
|
|
||
| parlay::internal::start_scheduler(); | ||
| ensure_scheduler_initialized(); | ||
|
|
||
| DBSCAN( | ||
| dim, | ||
|
|
@@ -70,9 +91,11 @@ static PyObject* DBSCAN_py(PyObject* self, PyObject* args, PyObject *kwargs) | |
| (int*)PyArray_DATA(labels) | ||
| ); | ||
|
|
||
| parlay::internal::stop_scheduler(); | ||
|
|
||
| return PyTuple_Pack(2, labels, core_samples); | ||
| PyObject* result_tuple = PyTuple_Pack(2, labels, core_samples); | ||
| Py_DECREF(X); | ||
| Py_DECREF(core_samples); | ||
| Py_DECREF(labels); | ||
| return result_tuple; | ||
| } | ||
|
|
||
| PyDoc_STRVAR(doc_DBSCAN, | ||
|
|
@@ -126,6 +149,11 @@ PyInit__dbscan(void) | |
| #endif | ||
| PyModule_AddIntMacro(module, DBSCAN_MIN_DIMS); | ||
| PyModule_AddIntMacro(module, DBSCAN_MAX_DIMS); | ||
| PyObject *capsule = PyCapsule_New((void *)module, "dbscan.scheduler", cleanup_scheduler); | ||
| if (capsule != NULL) | ||
| { | ||
| PyModule_AddObject(module, "_scheduler_capsule", capsule); | ||
| } | ||
|
|
||
| return module; | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you. In
hasEdge, the trees were allocated on demand to save memory in case they are not required. How are the trees getting duplicated without removed? From concurrent calls tohasEdge?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think what was happening was that in hasEdge
while one thread is doing
new treeT, another thread seestrees[x]is empty, so it tries to do the same. Both threads assign thenew treeTto the same index, one of them leaks.This fix can be improved to work as intended, but currently it works, and speed is improved due to threads not doing duplicate work, so I moved on to fixing other areas.