Unstructured-IO · cragwolfe · Apr 4, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,10 +1,11 @@
-## 0.17.6-dev0
+## 0.17.6-dev1
 
 ### Enhancements
 
 ### Features
 
 ### Fixes
+- **Do not use NLP to determine element types for extracted elements with hi_res.** This avoids extraneous Title elements in hi_res outputs.
 
 ## 0.17.5
 

diff --git a/unstructured/__version__.py b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.17.6-dev0"  # pragma: no cover
+__version__ = "0.17.6-dev1"  # pragma: no cover
diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
@@ -362,7 +362,10 @@ def partition_pdf_or_image(
                 table_ocr_agent=table_ocr_agent,
                 **kwargs,
             )
-            out_elements = _process_uncategorized_text_elements(elements)
+            # NOTE(crag): do not call _process_uncategorized_text_elements here, because
+            # extracted elements (which are text blocks outside of OD-determined blocks)
+            # are likely not Titles and should not be identified as such.
+            return elements
 
     elif strategy == PartitionStrategy.FAST:
         out_elements = _partition_pdf_with_pdfparser(
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "0.17.6-dev0" # pragma: no cover
		__version__ = "0.17.6-dev1" # pragma: no cover