Skip to content

Commit cac7c0f

Browse files
lint fix
1 parent 01a456d commit cac7c0f

2 files changed

Lines changed: 16 additions & 9 deletions

File tree

test_unstructured/partition/test_auto.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,16 +1074,21 @@ def test_auto_partition_respects_detect_language_per_element_arg():
10741074
)
10751075
def test_auto_partition_respects_language_arg(file_extension: str):
10761076
elements = partition(
1077-
example_doc_path(f"language-docs/eng_spa_mult.{file_extension}"), languages=["deu"]
1077+
example_doc_path(f"language-docs/eng_spa_mult.{file_extension}"),
1078+
languages=["deu"],
10781079
)
10791080
assert all(element.metadata.languages == ["deu"] for element in elements)
10801081

10811082

10821083
def test_auto_partition_language_fallback_flows_through_call_chain():
10831084
"""Integration test: language_fallback must flow partition() -> partitioner -> apply_metadata
1084-
-> apply_lang_metadata -> detect_languages(). A fallback returning None yields no language."""
1085+
-> apply_lang_metadata -> detect_languages(). A fallback returning None yields no language.
1086+
"""
10851087
with tempfile.NamedTemporaryFile(
1086-
mode="w", suffix=".txt", delete=False, encoding="utf-8"
1088+
mode="w",
1089+
suffix=".txt",
1090+
delete=False,
1091+
encoding="utf-8",
10871092
) as f:
10881093
f.write("Hi.")
10891094
path = f.name

unstructured/partition/common/lang.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ def _validate_fallback_languages(
384384
value: Optional[list[str]],
385385
) -> Optional[list[str]]:
386386
"""Validate and normalize language_fallback return value to ISO 639-3 codes.
387+
387388
Returns None for None, non-list, or when no valid codes remain (invalid entries
388389
are logged and skipped).
389390
"""
@@ -417,12 +418,13 @@ def detect_languages(
417418
Detects the list of languages present in the text (in the default "auto" mode),
418419
or formats and passes through the user inputted document languages if provided.
419420
420-
For short ASCII text (fewer than 5 words), language detection is unreliable. By default
421-
such text is assigned English (["eng"]). Use ``language_fallback`` to override:
422-
pass a callable that takes the text and returns a list of ISO 639-3 codes or None.
423-
Return None to leave language unspecified. The caller is responsible for returning
424-
valid ISO 639-3 codes (e.g. "eng", "fra"); invalid entries are filtered out and
425-
a warning is logged; if none remain, this function returns None.
421+
For short ASCII text (fewer than 5 words), language detection is unreliable. By
422+
default such text is assigned English (["eng"]). Use ``language_fallback`` to
423+
override: pass a callable that takes the text and returns a list of ISO 639-3
424+
codes or None. Return None to leave language unspecified. The caller is
425+
responsible for returning valid ISO 639-3 codes (e.g. "eng", "fra"); invalid
426+
entries are filtered out and a warning is logged; if none remain, this function
427+
returns None.
426428
"""
427429
if languages is None:
428430
languages = ["auto"]

0 commit comments

Comments
 (0)