Skip to content

Commit eb167ef

Browse files
fix: remove unnecessary pi-heif dependency, simplify ImportError catch
Co-Authored-By: Ryan Waskewich <ryan.waskewich@airbyte.io>
1 parent 4b907be commit eb167ef

3 files changed

Lines changed: 4 additions & 65 deletions

File tree

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _import_unstructured() -> None:
8888
from unstructured.partition.pdf import partition_pdf
8989

9090
unstructured_partition_pdf = partition_pdf
91-
except (ImportError, ModuleNotFoundError):
91+
except ImportError:
9292
logger = logging.getLogger(__name__)
9393
logger.info(
9494
"Could not import unstructured.partition.pdf (requires unstructured_inference). PDF parsing will be unavailable."

poetry.lock

Lines changed: 2 additions & 61 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ python-snappy = { version = "0.7.3", optional = true } # TODO: remove if unused
7878
tiktoken = { version = "0.8.0", optional = true }
7979
nltk = { version = "3.9.1", optional = true }
8080
unstructured = { version = "0.18.32", extras = ["docx", "pptx"], optional = true }
81-
pi-heif = { version = ">=0.16.0", optional = true }
8281
"unstructured.pytesseract" = { version = ">=0.3.12", optional = true }
8382
pyjwt = "^2.8.0"
8483
cryptography = ">=44.0.0,<45.0.0" # Constrained as transitive dependency due to a bug in newer versions
@@ -122,7 +121,7 @@ deptry = "^0.23.0"
122121
dagger-io = "0.19.0"
123122

124123
[tool.poetry.extras]
125-
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pi-heif", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "openpyxl", "python-snappy"]
124+
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "openpyxl", "python-snappy"]
126125
vector-db-based = ["langchain_community", "langchain_core", "langchain_text_splitters", "openai", "cohere", "tiktoken"]
127126
sql = ["sqlalchemy"]
128127
dev = ["pytest"]
@@ -262,7 +261,6 @@ DEP002 = [
262261
"python-snappy",
263262
"tiktoken",
264263
"unstructured.pytesseract",
265-
"pi-heif",
266264
]
267265

268266
# DEP003: Project should not use transitive dependencies.

0 commit comments

Comments
 (0)