Skip to content

Commit 39b8263

Browse files
authored
SDK correctness and resilience improvements (#336)
<!-- CURSOR_SUMMARY --> > [!NOTE] > **Medium Risk** > Touches core split-PDF execution and retry/timeout cleanup logic; mistakes could impact partition reliability or leak resources, though changes are well-covered by expanded unit/integration tests and logging. > > **Overview** > Improves split-PDF correctness and debuggability by adding **operation-aware observability** (plan/batch/chunk lifecycle logs) and propagating split metadata via `X-Unstructured-Split-*` headers into errors/logs. > > Hardens split execution: per-operation state is isolated, transport exceptions/cancellations are handled explicitly (with optional partial-results behavior via `split_pdf_allow_failed`), and timeout/cleanup paths now safely cancel in-flight work even when event loops are closed. > > Preserves chunk-level transport retries by deriving a split-specific retry config that always retries `httpx.TransportError` for chunk calls, even when SDK-level connection retries are disabled. CI/test tooling is updated (new platform integration job/target, more verbose integration output, and bumped GitHub Action versions), and the package is released as `0.43.1`. > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit e65ce5b. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent a40df61 commit 39b8263

File tree

13 files changed

+1774
-258
lines changed

13 files changed

+1774
-258
lines changed

.github/workflows/ci.yaml

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ jobs:
2424
python-version: [ "3.11", "3.12", "3.13" ]
2525
runs-on: ubuntu-latest
2626
steps:
27-
- uses: actions/checkout@v4
28-
- uses: astral-sh/setup-uv@v6
27+
- uses: actions/checkout@v5
28+
- uses: astral-sh/setup-uv@v7
2929
- name: Set up Python ${{ matrix.python-version }}
30-
uses: actions/setup-python@v5
30+
uses: actions/setup-python@v6
3131
with:
3232
python-version: ${{ matrix.python-version }}
3333
- name: Install dependencies
@@ -44,10 +44,10 @@ jobs:
4444
lint:
4545
runs-on: ubuntu-latest
4646
steps:
47-
- uses: actions/checkout@v4
48-
- uses: astral-sh/setup-uv@v6
47+
- uses: actions/checkout@v5
48+
- uses: astral-sh/setup-uv@v7
4949
- name: Set up Python 3.13
50-
uses: actions/setup-python@v5
50+
uses: actions/setup-python@v6
5151
with:
5252
python-version: "3.13"
5353
- name: Install dependencies
@@ -68,10 +68,10 @@ jobs:
6868
python-version: [ "3.11", "3.12", "3.13" ]
6969
runs-on: opensource-linux-8core
7070
steps:
71-
- uses: actions/checkout@v4
72-
- uses: astral-sh/setup-uv@v6
71+
- uses: actions/checkout@v5
72+
- uses: astral-sh/setup-uv@v7
7373
- name: Set up Python ${{ matrix.python-version }}
74-
uses: actions/setup-python@v5
74+
uses: actions/setup-python@v6
7575
with:
7676
python-version: ${{ matrix.python-version }}
7777
- name: Install dependencies
@@ -86,17 +86,38 @@ jobs:
8686
run: |
8787
make test-integration-docker
8888
89+
test_platform_integration:
90+
runs-on: opensource-linux-8core
91+
steps:
92+
- uses: actions/checkout@v5
93+
- uses: astral-sh/setup-uv@v7
94+
- name: Set up Python 3.13
95+
uses: actions/setup-python@v6
96+
with:
97+
python-version: "3.13"
98+
- name: Install dependencies
99+
env:
100+
UV_LOCKED: "1"
101+
UV_PYTHON: "3.13"
102+
run: make install
103+
- name: Run platform integration tests
104+
env:
105+
UV_PYTHON: "3.13"
106+
UNSTRUCTURED_API_KEY: ${{ secrets.UNSTRUCTURED_API_KEY }}
107+
run: |
108+
make test-integration-platform
109+
89110
test_contract:
90111
strategy:
91112
fail-fast: false
92113
matrix:
93114
python-version: [ "3.11", "3.12", "3.13" ]
94115
runs-on: opensource-linux-8core
95116
steps:
96-
- uses: actions/checkout@v4
97-
- uses: astral-sh/setup-uv@v6
117+
- uses: actions/checkout@v5
118+
- uses: astral-sh/setup-uv@v7
98119
- name: Set up Python ${{ matrix.python-version }}
99-
uses: actions/setup-python@v5
120+
uses: actions/setup-python@v6
100121
with:
101122
python-version: ${{ matrix.python-version }}
102123
- name: Install dependencies

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
## 0.43.1
2+
3+
### Enhancements
4+
* Add split-PDF observability with operation-aware batch planning, timeout, cancellation, and completion logs.
5+
* Make long-running integration tests stream live progress, timings, and backend failure context for split and single partition phases.
6+
7+
### Features
8+
9+
### Fixes
10+
* Preserve chunk-local transport retries for split-PDF execution even when SDK-level retries disable connection-error retries for top-level requests.
11+
* Harden split-PDF timeout and cleanup paths against closed event loops and cancelled chunk tasks.
12+
* Stabilize `hi_res` split integration coverage by using a smaller derived multi-page fixture instead of the flaky full `layout-parser-paper.pdf` path for equivalence and caching checks.
13+
114
## 0.42.12
215

316
### Enhancements

Makefile

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ PACKAGE_NAME := unstructured-python-client
22
CURRENT_DIR := $(shell pwd)
33
ARCH := $(shell uname -m)
44
DOCKER_IMAGE ?= downloads.unstructured.io/unstructured-io/unstructured-api:latest
5+
INTEGRATION_IGNORE_ARGS := --ignore=_test_unstructured_client/integration/test_platform_workflow_lifecycle.py
6+
INTEGRATION_PYTEST_ARGS := _test_unstructured_client -vv -k integration $(INTEGRATION_IGNORE_ARGS) -o log_cli=true -o log_cli_level=INFO -o log_cli_format="%(asctime)s %(levelname)s %(message)s" --capture=tee-sys --durations=20 --tb=long
7+
PLATFORM_INTEGRATION_PYTEST_ARGS := _test_unstructured_client/integration/test_platform_workflow_lifecycle.py -v -o log_cli=true -o log_cli_level=INFO --durations=20 --tb=long
58

69
###########
710
# Install #
@@ -36,16 +39,34 @@ test-contract:
3639
# Assumes you have unstructured-api running on localhost:8000
3740
.PHONY: test-integration
3841
test-integration:
39-
PYTHONPATH=. uv run pytest -n auto _test_unstructured_client -v -k "integration"
42+
PYTHONPATH=. uv run pytest $(INTEGRATION_PYTEST_ARGS)
4043

4144
# Runs the unstructured-api in docker for tests
4245
.PHONY: test-integration-docker
4346
test-integration-docker:
44-
-docker stop unstructured-api && docker kill unstructured-api
45-
docker run --name unstructured-api -p 8000:8000 -d --rm ${DOCKER_IMAGE} --host 0.0.0.0 && \
46-
curl -s -o /dev/null --retry 10 --retry-delay 5 --retry-all-errors http://localhost:8000/general/docs && \
47-
PYTHONPATH=. uv run pytest -n auto _test_unstructured_client -v -k "integration" && \
48-
docker kill unstructured-api
47+
@bash -lc 'set -euo pipefail; \
48+
container_name=unstructured-api; \
49+
image="${DOCKER_IMAGE}"; \
50+
cleanup() { \
51+
status=$$?; \
52+
if [ $$status -ne 0 ]; then \
53+
echo "integration diagnostics image=$$image container=$$container_name"; \
54+
docker logs "$$container_name" --tail 200 || true; \
55+
fi; \
56+
docker kill "$$container_name" >/dev/null 2>&1 || true; \
57+
exit $$status; \
58+
}; \
59+
trap cleanup EXIT; \
60+
docker stop "$$container_name" >/dev/null 2>&1 || true; \
61+
docker kill "$$container_name" >/dev/null 2>&1 || true; \
62+
echo "starting integration api image=$$image"; \
63+
docker run --name "$$container_name" -p 8000:8000 -d --rm "$$image" --host 0.0.0.0; \
64+
curl -s -o /dev/null --retry 10 --retry-delay 5 --retry-all-errors http://localhost:8000/general/docs; \
65+
PYTHONPATH=. uv run pytest $(INTEGRATION_PYTEST_ARGS)'
66+
67+
.PHONY: test-integration-platform
68+
test-integration-platform:
69+
PYTHONPATH=. uv run pytest $(PLATFORM_INTEGRATION_PYTEST_ARGS)
4970

5071
.PHONY: lint
5172
lint:

0 commit comments

Comments
 (0)