Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/base-cache/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ runs:
- name: Install dependencies
shell: bash
run: |
uv sync --frozen --all-extras --all-groups
uv sync --locked --all-extras --all-groups
make install-nltk-models
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ jobs:
env:
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
run: |
uv sync --frozen --group test
uv sync --locked --group test
make install-nltk-models
make test-no-extras CI=true

Expand Down Expand Up @@ -162,7 +162,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install extra dependencies
run: |
uv sync --frozen ${{ matrix.uv-extras }} --group test
uv sync --locked ${{ matrix.uv-extras }} --group test
make install-nltk-models
- name: Install system dependencies
run: |
Expand Down Expand Up @@ -250,7 +250,7 @@ jobs:
sudo apt-get install -y tesseract-ocr-kor
sudo apt-get install diffstat
tesseract --version
uv run ./test_unstructured_ingest/test-ingest-src.sh
uv run --no-sync ./test_unstructured_ingest/test-ingest-src.sh

test_json_to_html:
strategy:
Expand All @@ -268,7 +268,7 @@ jobs:
OVERWRITE_FIXTURES: "false"
run: |
sudo apt-get install diffstat
uv run ./test_unstructured_ingest/check-diff-expected-output-html.sh
uv run --no-sync ./test_unstructured_ingest/check-diff-expected-output-html.sh

test_json_to_markdown:
strategy:
Expand All @@ -286,7 +286,7 @@ jobs:
OVERWRITE_FIXTURES: "false"
run: |
sudo apt-get install diffstat
uv run ./test_unstructured_ingest/check-diff-expected-output-markdown.sh
uv run --no-sync ./test_unstructured_ingest/check-diff-expected-output-markdown.sh

changelog:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ingest-test-fixtures-update-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
sudo apt-get install -y tesseract-ocr-kor
sudo apt-get install diffstat
tesseract --version
uv run ./test_unstructured_ingest/test-ingest-src.sh
uv run --no-sync ./test_unstructured_ingest/test-ingest-src.sh
- name: Update HTML fixtures
run: make html-fixtures-update
- name: Update markdown fixtures
Expand Down
66 changes: 66 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Pypi Release

on:
release:
types:
- published

permissions:
contents: read
id-token: write

env:
PYTHON_VERSION: "3.12"

jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up Python
run: uv python install

- name: Install dependencies
run: uv sync --locked --group release

- name: Build artifact
run: |
uv build --no-sync

- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
skip-existing: true

- name: Create .pypirc for Azure Artifacts
run: |
cat <<EOF > ~/.pypirc
[distutils]
index-servers =
azure

[azure]
repository: https://pkgs.dev.azure.com/${{ secrets.AZURE_ARTIFACTS_FEED }}/_packaging/${{ secrets.AZURE_ARTIFACTS_FEED }}/pypi/upload/
username: ${{ secrets.AZURE_ARTIFACTS_USERNAME }}
password: ${{ secrets.AZURE_ARTIFACTS_PAT }}
EOF

- name: Publish package to Azure Artifacts
run: |
if twine upload -r azure dist/*; then
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
echo "✅ Successfully published to Azure Artifacts (or already existed)"
else
EXIT_CODE=$?
echo "❌ Azure Artifacts upload failed, but PyPI upload succeeded"
if [[ $EXIT_CODE == 1 ]]; then
echo "⚠️ This may be due to version conflicts or connectivity issues"
fi
echo "This is non-critical - the package is available on PyPI"
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
exit 0
fi
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## 0.20.2

### Enhancements
- Add automated PyPI publishing: new `release.yml` GitHub Actions workflow triggers on GitHub release, builds the package with `uv build`, publishes to PyPI via `pypa/gh-action-pypi-publish`, and uploads to Azure Artifacts via `twine`
- Replace `uv sync --frozen` with `uv sync --locked` across all CI workflows, Dockerfile, and Makefile to fail fast on stale lockfiles
- Add `--no-sync` to all `uv run` and `uv build` commands that follow a prior `uv sync` step to prevent implicit re-syncing

## 0.20.1

### Fixes
Expand Down
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ ENV UV_COMPILE_BYTECODE=1
ENV UV_PYTHON_DOWNLOADS=never

# Install Python dependencies via uv and download required NLTK packages
RUN uv sync --frozen --all-extras --no-group dev --no-group lint --no-group test && \
RUN uv sync --locked --all-extras --no-group dev --no-group lint --no-group test && \
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
mkdir -p ${NLTK_DATA} && \
uv run $PYTHON -m nltk.downloader -d ${NLTK_DATA} punkt_tab averaged_perceptron_tagger_eng && \
uv run $PYTHON -c "from unstructured.partition.model_init import initialize; initialize()" && \
uv run $PYTHON -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
uv run --no-sync $PYTHON -m nltk.downloader -d ${NLTK_DATA} punkt_tab averaged_perceptron_tagger_eng && \
uv run --no-sync $PYTHON -c "from unstructured.partition.model_init import initialize; initialize()" && \
uv run --no-sync $PYTHON -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"

ENV PATH="/app/.venv/bin:${PATH}"
ENV HF_HUB_OFFLINE=1
Expand Down
46 changes: 23 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ help: Makefile
## install: install all dependencies via uv
.PHONY: install
install:
@uv sync --frozen --all-extras --all-groups
@uv sync --locked --all-extras --all-groups
@$(MAKE) install-nltk-models

## lock: update and lock all dependencies
Expand All @@ -23,7 +23,7 @@ lock:

.PHONY: install-nltk-models
install-nltk-models:
uv run --frozen python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
uv run --locked --no-sync python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"


#################
Expand All @@ -38,62 +38,62 @@ export UNSTRUCTURED_INCLUDE_DEBUG_METADATA ?= false
test:
CI=$(CI) \
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) \
uv run --frozen --no-sync pytest -n auto test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40
uv run --locked --no-sync pytest -n auto test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing --durations=40

.PHONY: test-no-extras
test-no-extras:
CI=$(CI) \
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) \
uv run --frozen --no-sync pytest -n auto \
uv run --locked --no-sync pytest -n auto \
test_${PACKAGE_NAME}/partition/test_text.py \
test_${PACKAGE_NAME}/partition/test_email.py \
test_${PACKAGE_NAME}/partition/html/test_partition.py \
test_${PACKAGE_NAME}/partition/test_xml.py

.PHONY: test-extra-csv
test-extra-csv:
CI=$(CI) uv run --frozen --no-sync pytest -n auto \
CI=$(CI) uv run --locked --no-sync pytest -n auto \
test_unstructured/partition/test_csv.py \
test_unstructured/partition/test_tsv.py

.PHONY: test-extra-docx
test-extra-docx:
CI=$(CI) uv run --frozen --no-sync pytest -n auto \
CI=$(CI) uv run --locked --no-sync pytest -n auto \
test_unstructured/partition/test_doc.py \
test_unstructured/partition/test_docx.py

.PHONY: test-extra-epub
test-extra-epub:
CI=$(CI) uv run --frozen --no-sync pytest -n auto test_unstructured/partition/test_epub.py
CI=$(CI) uv run --locked --no-sync pytest -n auto test_unstructured/partition/test_epub.py

.PHONY: test-extra-markdown
test-extra-markdown:
CI=$(CI) uv run --frozen --no-sync pytest -n auto test_unstructured/partition/test_md.py
CI=$(CI) uv run --locked --no-sync pytest -n auto test_unstructured/partition/test_md.py

.PHONY: test-extra-odt
test-extra-odt:
CI=$(CI) uv run --frozen --no-sync pytest -n auto test_unstructured/partition/test_odt.py
CI=$(CI) uv run --locked --no-sync pytest -n auto test_unstructured/partition/test_odt.py

.PHONY: test-extra-pdf-image
test-extra-pdf-image:
CI=$(CI) uv run --frozen --no-sync pytest -n auto test_unstructured/partition/pdf_image
CI=$(CI) uv run --locked --no-sync pytest -n auto test_unstructured/partition/pdf_image

.PHONY: test-extra-pptx
test-extra-pptx:
CI=$(CI) uv run --frozen --no-sync pytest -n auto \
CI=$(CI) uv run --locked --no-sync pytest -n auto \
test_unstructured/partition/test_ppt.py \
test_unstructured/partition/test_pptx.py

.PHONY: test-extra-pypandoc
test-extra-pypandoc:
CI=$(CI) uv run --frozen --no-sync pytest -n auto \
CI=$(CI) uv run --locked --no-sync pytest -n auto \
test_unstructured/partition/test_org.py \
test_unstructured/partition/test_rst.py \
test_unstructured/partition/test_rtf.py

.PHONY: test-extra-xlsx
test-extra-xlsx:
CI=$(CI) uv run --frozen --no-sync pytest -n auto test_unstructured/partition/test_xlsx.py
CI=$(CI) uv run --locked --no-sync pytest -n auto test_unstructured/partition/test_xlsx.py

## check: runs all linters and checks
.PHONY: check
Expand All @@ -102,8 +102,8 @@ check: check-ruff check-version
## check-ruff: runs ruff linter and formatter check
.PHONY: check-ruff
check-ruff:
uv run --frozen --no-sync ruff check .
uv run --frozen --no-sync ruff format --check .
uv run --locked --no-sync ruff check .
uv run --locked --no-sync ruff format --check .

.PHONY: check-licenses
check-licenses:
Expand All @@ -119,8 +119,8 @@ check-version:
## tidy: auto-format and fix lint issues
.PHONY: tidy
tidy:
uv run --frozen --no-sync ruff format .
uv run --frozen --no-sync ruff check --fix-only --show-fixes .
uv run --locked --no-sync ruff format .
uv run --locked --no-sync ruff check --fix-only --show-fixes .

.PHONY: tidy-shell
tidy-shell:
Expand All @@ -135,7 +135,7 @@ version-sync:
## check-coverage: check test coverage meets threshold
.PHONY: check-coverage
check-coverage:
uv run --frozen --no-sync coverage report --fail-under=90
uv run --locked --no-sync coverage report --fail-under=90

##########
# Docker #
Expand Down Expand Up @@ -166,10 +166,10 @@ docker-test:
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/notebook-user/test_unstructured_ingest \
$(if $(wildcard uns_test_env_file),--env-file uns_test_env_file,) \
$(DOCKER_IMAGE) \
bash -c "uv sync --frozen --all-extras --group test --no-install-project && \
bash -c "uv sync --locked --all-extras --group test --no-install-project && \
CI=$(CI) \
UNSTRUCTURED_INCLUDE_DEBUG_METADATA=$(UNSTRUCTURED_INCLUDE_DEBUG_METADATA) \
uv run pytest -n auto $(if $(TEST_FILE),$(TEST_FILE),test_unstructured)"
uv run --no-sync pytest -n auto $(if $(TEST_FILE),$(TEST_FILE),test_unstructured)"

.PHONY: docker-smoke-test
docker-smoke-test:
Expand All @@ -187,7 +187,7 @@ docker-jupyter-notebook:

.PHONY: run-jupyter
run-jupyter:
uv run jupyter-notebook --NotebookApp.token='' --NotebookApp.password=''
uv run --no-sync jupyter-notebook --NotebookApp.token='' --NotebookApp.password=''


###########
Expand All @@ -197,9 +197,9 @@ run-jupyter:
.PHONY: html-fixtures-update
html-fixtures-update:
rm -r test_unstructured_ingest/expected-structured-output-html && \
uv run test_unstructured_ingest/structured-json-to-html.sh test_unstructured_ingest/expected-structured-output-html
uv run --no-sync test_unstructured_ingest/structured-json-to-html.sh test_unstructured_ingest/expected-structured-output-html

.PHONY: markdown-fixtures-update
markdown-fixtures-update:
rm -r test_unstructured_ingest/expected-structured-output-markdown && \
uv run test_unstructured_ingest/structured-json-to-markdown.sh test_unstructured_ingest/expected-structured-output-markdown
uv run --no-sync test_unstructured_ingest/structured-json-to-markdown.sh test_unstructured_ingest/expected-structured-output-markdown
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ Then install all dependencies (base, extras, dev, test, and lint groups):
make install
```

This runs `uv sync --frozen --all-extras --all-groups`, which creates a virtual environment
This runs `uv sync --locked --all-extras --all-groups`, which creates a virtual environment
and installs everything in one step. No need to manually create or activate a virtualenv.

To install only specific document-type extras:
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ dev = [
lint = [
"ruff>=0.15.0, <1.0.0",
]
release = [
"twine",
]

[tool.uv]
required-environments = [
Expand Down
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.20.1" # pragma: no cover
__version__ = "0.20.2" # pragma: no cover
Loading
Loading