Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. The format
## Unreleased

- Upgrade `pdfminer.six` from `20251230` to `20260107`. ([07a5ff6](https://github.com/jsvine/pdfplumber/commit/07a5ff6))
- Ensure internally-opened file streams are closed during garbage collection, preventing `ResourceWarning` leaks when `PDF.close()` is not called. ([#1336](https://github.com/jsvine/pdfplumber/issues/1336))

## 0.11.9 — 2026-01-05

Expand Down Expand Up @@ -680,4 +681,3 @@ Whoops.

### Fixed
- Fix find_gutters — should ignore `" "` chars

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ Many thanks to the following users who've contributed ideas, features, and fixes
- [Brandon Roberts](https://github.com/brandonrobertz)
- [@ennamarie19](https://github.com/ennamarie19)
- [Anton Ilin](https://github.com/bronislav)
- [@ReinerBRO](https://github.com/ReinerBRO)

## Contributing

Expand Down
19 changes: 17 additions & 2 deletions pdfplumber/pdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
import logging
import pathlib
import weakref
from io import BufferedReader, BytesIO
from types import TracebackType
from typing import Any, Dict, Generator, List, Literal, Optional, Tuple, Type, Union
Expand All @@ -22,6 +23,18 @@
logger = logging.getLogger(__name__)


def _close_stream_if_internal(
stream: Union[BufferedReader, BytesIO], stream_is_external: bool
) -> None:
if stream_is_external:
return
try:
if not stream.closed:
stream.close()
except Exception:
pass


class PDF(Container):
cached_properties: List[str] = Container.cached_properties + ["_pages"]

Expand All @@ -39,6 +52,9 @@ def __init__(
):
self.stream = stream
self.stream_is_external = stream_is_external
self._stream_finalizer = weakref.finalize(
self, _close_stream_if_internal, stream, stream_is_external
)
self.path = path
self.pages_to_parse = pages
self.laparams = None if laparams is None else LAParams(**laparams)
Expand Down Expand Up @@ -127,8 +143,7 @@ def close(self) -> None:
for page in self.pages:
page.close()

if not self.stream_is_external:
self.stream.close()
self._stream_finalizer()

def __enter__(self) -> "PDF":
return self
Expand Down
42 changes: 42 additions & 0 deletions tests/test_resource_warning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python
import gc
import logging
import os
import unittest
import warnings

import pdfplumber

logging.disable(logging.ERROR)

HERE = os.path.abspath(os.path.dirname(__file__))
PDF_PATH = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
PDF_NAME = os.path.basename(PDF_PATH)


class Test(unittest.TestCase):
def test_internal_stream_no_resource_warning_after_gc(self):
with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always", ResourceWarning)
pdf = pdfplumber.open(PDF_PATH)
_ = pdf.pages[0].chars[0]["text"]
del pdf
gc.collect()

leaked_file_warnings = [
w
for w in caught
if issubclass(w.category, ResourceWarning) and PDF_NAME in str(w.message)
]
assert leaked_file_warnings == []

def test_external_stream_is_not_closed(self):
stream = open(PDF_PATH, "rb")
try:
pdf = pdfplumber.open(stream)
_ = pdf.pages[0].chars[0]["text"]
del pdf
gc.collect()
assert stream.closed is False
finally:
stream.close()