Skip to content

Commit 8eff429

Browse files
Fix inotify instance exhaustion in pickle backend (Issue #24) (#277)
* Fix inotify instance exhaustion in pickle backend (Issue #24) - Reuse and cleanup watchdog observers to prevent inotify resource leaks - Fallback to polling if inotify instance limit is reached - Add regression test (will fail if bug is present, pass when fixed) - Document fix in README - Note: Two edge-case tests may fail locally, check CI for Linux results * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix test failures caused by inotify changes - Restore _update_with_defaults calls for proper parameter handling - Fix cache directory and separate_files parameter handling - Restore original CacheChangeHandler behavior - Make polling fallback more conservative (only for inotify-specific errors) - All pickle core tests now pass locally * Fix inotify instance exhaustion bug by removing observer reuse - Remove observer caching that caused 'threads can only be started once' errors - Create new observer for each cache wait to ensure proper cleanup - Maintain proper observer lifecycle management - Fixes Issue #24: inotify instance exhaustion in pickle backend * Run black code formatter for consistent style * Remove 2024 inotify instance exhaustion fix section from README * inotify bug solved so remove xfail --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 68e92d3 commit 8eff429

2 files changed

Lines changed: 78 additions & 16 deletions

File tree

src/cachier/cores/pickle.py

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
# Licensed under the MIT license:
77
# http://www.opensource.org/licenses/MIT-license
88
# Copyright (c) 2016, Shay Palachy <shaypal5@gmail.com>
9+
import logging
910
import os
1011
import pickle # for local caching
12+
import time
1113
from datetime import datetime
1214
from typing import Any, Dict, Optional, Tuple, Union
1315

@@ -51,12 +53,14 @@ def _check_calculation(self) -> None:
5153
if not entry._processing:
5254
# print('stopping observer!')
5355
self.value = entry.value
54-
self.observer.stop()
56+
if self.observer is not None:
57+
self.observer.stop()
5558
# else:
5659
# print('NOT stopping observer... :(')
5760
except AttributeError: # catching entry being None
5861
self.value = None
59-
self.observer.stop()
62+
if self.observer is not None:
63+
self.observer.stop()
6064

6165
def on_created(self, event) -> None:
6266
"""A Watchdog Event Handler method.""" # noqa: D401
@@ -256,29 +260,93 @@ def mark_entry_not_calculated(self, key: str) -> None:
256260
cache[key]._processing = False
257261
self._save_cache(cache)
258262

263+
def _create_observer(self) -> Observer:
264+
"""Create a new observer instance."""
265+
return Observer()
266+
267+
def _cleanup_observer(self, observer: Observer) -> None:
268+
"""Clean up observer properly."""
269+
try:
270+
if observer.is_alive():
271+
observer.stop()
272+
observer.join(timeout=1.0)
273+
except Exception as e:
274+
logging.debug("Observer cleanup failed: %s", e)
275+
259276
def wait_on_entry_calc(self, key: str) -> Any:
277+
"""Wait for entry calculation to complete with inotify protection."""
260278
if self.separate_files:
261279
entry = self._load_cache_by_key(key)
262280
filename = f"{self.cache_fname}_{key}"
263281
else:
264282
with self.lock:
265-
entry = self.get_cache_dict()[key]
283+
entry = self.get_cache_dict().get(key)
266284
filename = self.cache_fname
285+
267286
if entry and not entry._processing:
268287
return entry.value
288+
289+
# Try to use inotify-based waiting
290+
try:
291+
return self._wait_with_inotify(key, filename)
292+
except OSError as e:
293+
if "inotify instance limit reached" in str(e):
294+
# Fall back to polling if inotify limit is reached
295+
return self._wait_with_polling(key)
296+
else:
297+
raise
298+
299+
def _wait_with_inotify(self, key: str, filename: str) -> Any:
300+
"""Wait for calculation using inotify with proper cleanup."""
269301
event_handler = _PickleCore.CacheChangeHandler(
270302
filename=filename, core=self, key=key
271303
)
272-
observer = Observer()
304+
305+
observer = self._create_observer()
273306
event_handler.inject_observer(observer)
274-
observer.schedule(event_handler, path=self.cache_dir, recursive=True)
275-
observer.start()
307+
308+
try:
309+
observer.schedule(
310+
event_handler, path=self.cache_dir, recursive=True
311+
)
312+
observer.start()
313+
314+
time_spent = 0
315+
while observer.is_alive():
316+
observer.join(timeout=1.0)
317+
time_spent += 1
318+
self.check_calc_timeout(time_spent)
319+
320+
# Check if calculation is complete
321+
if event_handler.value is not None:
322+
break
323+
324+
return event_handler.value
325+
finally:
326+
# Always cleanup the observer
327+
self._cleanup_observer(observer)
328+
329+
def _wait_with_polling(self, key: str) -> Any:
330+
"""Fallback method using polling instead of inotify."""
276331
time_spent = 0
277-
while observer.is_alive():
278-
observer.join(timeout=1.0)
332+
while True:
333+
time.sleep(1) # Poll every 1 second (matching other cores)
279334
time_spent += 1
280-
self.check_calc_timeout(time_spent)
281-
return event_handler.value
335+
336+
try:
337+
if self.separate_files:
338+
entry = self._load_cache_by_key(key)
339+
else:
340+
with self.lock:
341+
entry = self.get_cache_dict().get(key)
342+
343+
if entry and not entry._processing:
344+
return entry.value
345+
346+
self.check_calc_timeout(time_spent)
347+
except (FileNotFoundError, EOFError):
348+
# Continue polling even if there are file errors
349+
pass
282350

283351
def clear_cache(self) -> None:
284352
if self.separate_files:

tests/test_pickle_core.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -615,12 +615,6 @@ def _params_with_dataframe(*args, **kwargs):
615615
not sys.platform.startswith("linux"),
616616
reason="inotify instance limit is only relevant on Linux",
617617
)
618-
@pytest.mark.xfail(
619-
reason=(
620-
"inotify instance limit issue not yet fixed - test will pass "
621-
"when issue is resolved"
622-
)
623-
)
624618
def test_inotify_instance_limit_reached():
625619
"""Reproduces the inotify instance exhaustion issue (see Issue #24).
626620

0 commit comments

Comments
 (0)