Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### Fixes
- **Fix image extraction for PNG files.** When `extract_image_block_to_payload` is True, and the image is a PNG, we get a Pillow error. We need to remove the PNG transparency layer before saving the image.
- **Throw validation error when json is passed with invalid unstructured json

## 0.17.6

Expand Down
11 changes: 11 additions & 0 deletions test_unstructured/partition/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ def test_partition_json_works_with_empty_string():
assert partition_json(text="") == []


def test_partition_json_fails_with_empty_item():
with pytest.raises(ValueError):
partition_json(text="{}")


def test_partition_json_works_with_empty_list():
assert partition_json(text="[]") == []

Expand Down Expand Up @@ -288,6 +293,12 @@ def test_partition_json_from_text_prefers_metadata_last_modified():
# ------------------------------------------------------------------------------------------------


def test_partition_json_raises_with_unprocessable_json_array():
text = '[{"invalid": "schema"}]'
with pytest.raises(ValueError):
partition_json(text=text)


def test_partition_json_raises_with_unprocessable_json():
# NOTE(robinson) - This is unprocessable because it is not a list of dicts,
# per the Unstructured ISD format
Expand Down
16 changes: 14 additions & 2 deletions test_unstructured/partition/test_ndjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,14 @@ def test_partition_ndjson_works_with_empty_string():
assert partition_ndjson(text="") == []


def test_partition_ndjson_works_with_empty_list():
assert partition_ndjson(text="{}") == []
def test_partition_ndjson_fails_with_empty_item():
with pytest.raises(ValueError):
partition_ndjson(text="{}")


def test_partition_ndjson_fails_with_empty_list():
with pytest.raises(ValueError):
partition_ndjson(text="[]")


def test_partition_ndjson_raises_with_too_many_specified():
Expand Down Expand Up @@ -293,6 +299,12 @@ def test_partition_ndjson_from_text_prefers_metadata_last_modified():
# ------------------------------------------------------------------------------------------------


def test_partition_json_raises_with_unprocessable_json():
text = '{"invalid": "schema"}'
with pytest.raises(ValueError):
partition_ndjson(text=text)


def test_partition_json_raises_with_invalid_json():
text = '[{"hi": "there"}]]'
with pytest.raises(ValueError):
Expand Down
5 changes: 5 additions & 0 deletions unstructured/partition/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def partition_json(
try:
element_dicts = json.loads(file_text)
elements = elements_from_dicts(element_dicts)
# if we found at least one json element, but no unstructured elements were found, throw 422
if len(element_dicts) > 0 and len(elements) == 0:
raise ValueError(
"JSON cannot be partitioned. Schema does not match the Unstructured schema.",
)
except json.JSONDecodeError:
raise ValueError("Not a valid json")

Expand Down
5 changes: 5 additions & 0 deletions unstructured/partition/ndjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def partition_ndjson(
try:
element_dicts = ndjson_loads(file_text)
elements = elements_from_dicts(element_dicts)
# if we found at least one json element, but no unstructured elements were found, throw 422
if len(element_dicts) > 0 and len(elements) == 0:
raise ValueError(
"JSON cannot be partitioned. Schema does not match the Unstructured schema.",
)
except json.JSONDecodeError:
raise ValueError("Not a valid ndjson")

Expand Down
Loading