Skip to content

Commit 732a346

Browse files
fix: test
1 parent e36d31d commit 732a346

3 files changed

Lines changed: 25 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
## 0.22.2
22

33
### Enhancements
4-
- **`partition_md` Markdown `extensions` (fixes #4006)**: Optional kwarg passed to `markdown.markdown()`; defaults to `["tables"]`.
4+
- **`partition_md` Markdown `extensions` (fixes #4006)**: Optional kwarg passed to `markdown.markdown()`; defaults to `["tables", "fenced_code"]`. Override with e.g. `extensions=["tables"]` when needed.
55

66
## 0.22.1
77

test_unstructured/partition/test_md.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ def test_partition_md_non_xml_processing_instruction():
309309

310310
def test_partition_fenced_code():
311311
filename = example_doc_path("codeblock.md")
312-
elements = partition_md(filename=filename, extensions=["tables", "fenced_code"])
312+
elements = partition_md(filename=filename)
313313

314314
# Should have 5 elements: 2 titles and 3 code blocks
315315
assert len(elements) == 5
@@ -344,29 +344,32 @@ def test_partition_fenced_code():
344344

345345

346346
def test_partition_md_custom_extensions_parameter():
347-
"""User can override markdown extensions via `extensions` kwarg."""
347+
"""User can override markdown extensions via `extensions` kwarg (fixes #4006)."""
348348
text = """```bash
349349
# create the container
350350
docker run -dt --name unstructured downloads.unstructured.io/unstructured-io/unstructured:latest
351351
```"""
352352

353-
# By default (tables-only), markdown will treat the heading as a Title element.
354-
elements_default = partition_md(text=text)
355-
assert any(isinstance(el, Title) for el in elements_default)
356-
357-
# With fenced_code enabled, the whole block should be a single NarrativeText element.
358-
elements_fenced = partition_md(text=text, extensions=["fenced_code"])
359-
assert len(elements_fenced) == 1
360-
assert elements_fenced[0].category == ElementType.NARRATIVE_TEXT
361-
assert elements_fenced[0].text == (
353+
expected_body = (
362354
"# create the container\n"
363355
"docker run -dt --name unstructured "
364356
"downloads.unstructured.io/unstructured-io/unstructured:latest"
365357
)
366358

359+
# Without fenced_code, ``#`` inside the fence is parsed as a heading (undesired).
360+
elements_tables_only = partition_md(text=text, extensions=["tables"])
361+
assert any(isinstance(el, Title) for el in elements_tables_only)
362+
363+
# Default and explicit fenced_code keep the block as one element (CodeSnippet from HTML).
364+
assert len(partition_md(text=text)) == 1
365+
elements_fenced = partition_md(text=text, extensions=["fenced_code"])
366+
assert len(elements_fenced) == 1
367+
assert elements_fenced[0].category == ElementType.CODE_SNIPPET
368+
assert elements_fenced[0].text == expected_body
369+
367370

368371
def test_partition_md_invalid_extensions_logs_and_falls_back(mocker: MockFixture):
369-
"""Invalid `extensions` value is ignored with a warning and defaults to ['tables']."""
372+
"""Invalid `extensions` value is ignored with a warning and falls back to the default list."""
370373
text = "# Heading"
371374
logger = mocker.patch("unstructured.partition.md.logging.warning")
372375

unstructured/partition/md.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ def partition_md(
5050
languages
5151
The languages present in the document. Use ``["auto"]`` to detect (default when None).
5252
Use ``[""]`` to disable language detection.
53+
54+
Other keyword arguments are forwarded to ``partition_html``. In addition, ``extensions`` may be
55+
passed to select Python-Markdown extensions. The default is ``["tables", "fenced_code"]``.
56+
Pass e.g. ``extensions=["tables"]`` if you need the legacy behavior where ``#`` inside unfenced
57+
content is parsed as a heading (see #4006).
5358
"""
5459
if text is None:
5560
text = ""
@@ -78,13 +83,14 @@ def partition_md(
7883

7984
text = response.text
8085

81-
# -- caller may override markdown extensions; default is tables-only (backwards compat) --
82-
extensions = kwargs.pop("extensions", ["tables"])
86+
# -- optional markdown extensions; default matches historical partition_md behavior --
87+
_default_extensions = ["tables", "fenced_code"]
88+
extensions = kwargs.pop("extensions", _default_extensions)
8389
if not (isinstance(extensions, list) and all(isinstance(ext, str) for ext in extensions)):
8490
logging.warning(
8591
"Ignoring invalid 'extensions' argument (expected list of strings): %r", extensions
8692
)
87-
extensions = ["tables"]
93+
extensions = _default_extensions
8894

8995
html = markdown.markdown(text, extensions=extensions)
9096

0 commit comments

Comments
 (0)