Skip to content

Commit 649111c

Browse files
authored
Migrate to uv (#540)
1 parent 99554f9 commit 649111c

31 files changed

Lines changed: 3565 additions & 1220 deletions

.github/dependabot.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
version: 2
22
updates:
3-
- package-ecosystem: "pip"
4-
directory: "/requirements"
3+
- package-ecosystem: "uv"
4+
directory: "/"
55
schedule:
66
interval: "daily"
77
# Only use this to bump our libraries
88
allow:
9-
- dependency-name: "unstructured[local-inference]"
9+
- dependency-name: "unstructured[all-docs]"
1010

1111
- package-ecosystem: "github-actions"
1212
# NOTE(robinson) - Workflow files stored in the

.github/workflows/bump_libraries.yaml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,31 @@ on:
66
- opened
77
- reopened
88
paths:
9-
- 'requirements/**'
10-
11-
env:
12-
PYTHON_VERSION: "3.8"
9+
- 'uv.lock'
10+
- 'pyproject.toml'
1311

1412
jobs:
1513
bump-changelog:
16-
runs-on: ubuntu-latest
14+
runs-on: opensource-linux-8core
1715
if: ${{ github.actor == 'dependabot[bot]' }}
1816
permissions:
1917
contents: write
2018
steps:
2119
- uses: actions/checkout@v5
20+
- name: Read Python version from .python-version
21+
run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
22+
- name: Install uv
23+
uses: astral-sh/setup-uv@v5
2224
- name: Set up Python ${{ env.PYTHON_VERSION }}
23-
uses: actions/setup-python@v6
24-
with:
25-
python-version: ${{ env.PYTHON_VERSION }}
25+
run: uv python install ${{ env.PYTHON_VERSION }}
2626
- name: Dependabot metadata
2727
id: metadata
2828
uses: dependabot/fetch-metadata@v2
2929
with:
3030
github-token: "${{ secrets.GITHUB_TOKEN }}"
3131
- name: Create release version
3232
run: |
33-
pip install pip-tools
34-
make pip-compile
33+
uv lock --upgrade
3534
package=${{ steps.metadata.outputs.dependency-names }}
3635
# Strip any [extras] from name
3736
package=${package%\[*}
@@ -41,4 +40,3 @@ jobs:
4140
- uses: stefanzweifel/git-auto-commit-action@v6
4241
with:
4342
commit_message: "Bump libraries and release"
44-

.github/workflows/ci.yml

Lines changed: 36 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -7,85 +7,62 @@ on:
77
branches: [ main ]
88

99
env:
10-
PYTHON_VERSION: "3.12"
1110
PIPELINE_FAMILY: "general"
1211

1312
jobs:
14-
setup:
15-
runs-on: ubuntu-latest
16-
steps:
17-
- uses: actions/checkout@v5
18-
- uses: actions/cache@v5
19-
id: virtualenv-cache
20-
with:
21-
path: |
22-
.venv
23-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
24-
- name: Set up Python ${{ env.PYTHON_VERSION }}
25-
uses: actions/setup-python@v6
26-
with:
27-
python-version: ${{ env.PYTHON_VERSION }}
28-
- name: Setup virtual environment (no cache hit)
29-
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
30-
run: |
31-
python${{ env.PYTHON_VERSION }} -m venv .venv
32-
source .venv/bin/activate
33-
make install-ci
34-
3513
lint:
36-
runs-on: ubuntu-latest
37-
needs: setup
14+
runs-on: opensource-linux-8core
3815
steps:
3916
- uses: actions/checkout@v5
40-
- uses: actions/cache@v5
41-
id: virtualenv-cache
17+
- name: Read Python version from .python-version
18+
run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
19+
- name: Install uv
20+
uses: astral-sh/setup-uv@v5
4221
with:
43-
path: |
44-
.venv
45-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
22+
enable-cache: true
23+
cache-dependency-glob: "uv.lock"
24+
- name: Set up Python ${{ env.PYTHON_VERSION }}
25+
run: uv python install ${{ env.PYTHON_VERSION }}
26+
- name: Install lint dependencies
27+
run: uv sync --only-group lint --frozen
4628
- name: Lint
47-
run: |
48-
source .venv/bin/activate
49-
make check
29+
run: make check
5030

5131
shellcheck:
52-
runs-on: ubuntu-latest
32+
runs-on: opensource-linux-8core
5333
steps:
5434
- uses: actions/checkout@v5
5535
- name: ShellCheck
5636
uses: ludeeus/action-shellcheck@master
5737

5838
test:
59-
runs-on: ubuntu-latest
60-
needs: [setup, lint]
39+
runs-on: opensource-linux-8core
40+
needs: lint
6141
steps:
6242
- uses: actions/checkout@v5
63-
- uses: actions/cache@v5
64-
id: virtualenv-cache
43+
- name: Read Python version from .python-version
44+
run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
45+
- name: Install uv
46+
uses: astral-sh/setup-uv@v5
6547
with:
66-
path: |
67-
.venv
68-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
69-
48+
enable-cache: true
49+
cache-dependency-glob: "uv.lock"
7050
- name: Set up Python ${{ env.PYTHON_VERSION }}
71-
uses: actions/setup-python@v6
72-
with:
73-
python-version: ${{ env.PYTHON_VERSION }}
74-
- name: Run core tests
51+
run: uv python install ${{ env.PYTHON_VERSION }}
52+
- name: Install dependencies and run core tests
7553
run: |
76-
python${{ env.PYTHON_VERSION }} -m venv .venv
77-
source .venv/bin/activate
7854
sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
79-
make install-test
55+
uv sync --group test --frozen
8056
make install-pandoc
57+
make install-nltk-models
8158
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
8259
sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
8360
tesseract --version
8461
make test
8562
make check-coverage
8663
8764
changelog:
88-
runs-on: ubuntu-latest
65+
runs-on: opensource-linux-8core
8966
steps:
9067
- uses: actions/checkout@v5
9168
- if: github.ref != 'refs/heads/main'
@@ -101,23 +78,20 @@ jobs:
10178
uses: dangoslen/changelog-enforcer@v3
10279

10380
# TODO - figure out best practice for caching docker images
104-
# (Using the virtualenv to get pytest)
10581
test_dockerfile:
106-
runs-on: ubuntu-latest
107-
needs: [setup, lint]
82+
runs-on: opensource-linux-8core
83+
needs: lint
10884
steps:
10985
- uses: actions/checkout@v5
110-
- uses: actions/cache@v5
111-
id: virtualenv-cache
86+
- name: Read Python version from .python-version
87+
run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
88+
- name: Install uv
89+
uses: astral-sh/setup-uv@v5
11290
with:
113-
python-version: ${{ env.PYTHON_VERSION }}
114-
path: |
115-
.venv
116-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
91+
enable-cache: true
92+
cache-dependency-glob: "uv.lock"
11793
- name: Set up Python ${{ env.PYTHON_VERSION }}
118-
uses: actions/setup-python@v6
119-
with:
120-
python-version: ${{ env.PYTHON_VERSION }}
94+
run: uv python install ${{ env.PYTHON_VERSION }}
12195
- name: Free up disk space
12296
run: |
12397
# Clear some space (https://github.com/actions/runner-images/issues/2840)
@@ -140,14 +114,6 @@ jobs:
140114
df -h
141115
- name: Test Dockerfile
142116
run: |
143-
python${{ env.PYTHON_VERSION }} -m venv .venv
144-
source .venv/bin/activate
145-
make install-test
117+
uv sync --group test --frozen
146118
make docker-build
147119
make docker-test
148-
# - name: Scan image
149-
# uses: anchore/scan-action@v3
150-
# with:
151-
# image: "pipeline-family-${{ env.PIPELINE_FAMILY }}-dev"
152-
# # NOTE(robinson) - revert this to medium when we bump libreoffice
153-
# severity-cutoff: critical

.github/workflows/claude.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
1818
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
1919
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20-
runs-on: ubuntu-latest
20+
runs-on: opensource-linux-8core
2121
permissions:
2222
contents: read
2323
pull-requests: read

.github/workflows/docker-publish.yml

Lines changed: 26 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,10 @@ env:
1010
DOCKER_BUILD_REPOSITORY: quay.io/unstructured-io/build-unstructured-api
1111
PACKAGE: "unstructured-api"
1212
PIPELINE_FAMILY: "general"
13-
PIP_VERSION: "25.1.1"
14-
PYTHON_VERSION: "3.12"
1513

1614
jobs:
17-
setup:
18-
runs-on: ubuntu-latest
19-
steps:
20-
- uses: actions/checkout@v5
21-
- uses: actions/cache@v5
22-
id: virtualenv-cache
23-
with:
24-
path: |
25-
.venv
26-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
27-
- name: Set up Python ${{ env.PYTHON_VERSION }}
28-
uses: actions/setup-python@v6
29-
with:
30-
python-version: ${{ env.PYTHON_VERSION }}
31-
- name: Setup virtual environment (no cache hit)
32-
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
33-
run: |
34-
python${{ env.PYTHON_VERSION }} -m venv .venv
35-
source .venv/bin/activate
36-
make install-ci
3715
set-short-sha:
38-
runs-on: ubuntu-latest
16+
runs-on: opensource-linux-8core
3917
outputs:
4018
short_sha: ${{ steps.set_short_sha.outputs.short_sha }}
4119
steps:
@@ -45,21 +23,21 @@ jobs:
4523
build-images:
4624
strategy:
4725
matrix:
48-
#arch: ["arm64", "amd64"]
49-
# NOTE(luke): temporary disable arm64 since its failing the smoke test
50-
arch: ["amd64"]
51-
runs-on: ubuntu-latest
52-
needs: [setup, set-short-sha]
26+
arch: ["arm64", "amd64"]
27+
runs-on: ${{ matrix.arch == 'arm64' && 'opensource-linux-arm64-4core' || 'opensource-linux-8core' }}
28+
needs: set-short-sha
5329
env:
5430
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
5531
DOCKER_PLATFORM: linux/${{ matrix.arch }}
5632
steps:
5733
- name: Set up Docker Buildx
5834
uses: docker/setup-buildx-action@v3
5935
with:
60-
driver: ${{ matrix.arch == 'amd64' && 'docker' || 'docker-container' }}
36+
driver: docker
6137
- name: Checkout code
6238
uses: actions/checkout@v5
39+
- name: Read Python version from .python-version
40+
run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
6341
- name: Login to Quay.io
6442
uses: docker/login-action@v3
6543
with:
@@ -90,25 +68,23 @@ jobs:
9068
run: |
9169
DOCKER_BUILDKIT=1 docker buildx build --load -f Dockerfile \
9270
--platform=$DOCKER_PLATFORM \
93-
--build-arg PIP_VERSION=$PIP_VERSION \
9471
--build-arg BUILDKIT_INLINE_CACHE=1 \
9572
--build-arg PIPELINE_PACKAGE=${{ env.PIPELINE_FAMILY }} \
9673
--provenance=false \
9774
--progress plain \
9875
--cache-from $DOCKER_BUILD_REPOSITORY:${{ matrix.arch }} \
9976
-t $DOCKER_BUILD_REPOSITORY:${{ matrix.arch }}-$SHORT_SHA .
100-
- name: Set virtualenv cache
101-
uses: actions/cache@v5
102-
id: virtualenv-cache
77+
- name: Install uv
78+
uses: astral-sh/setup-uv@v5
10379
with:
104-
path: |
105-
.venv
106-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
107-
- name: Set up QEMU
108-
uses: docker/setup-qemu-action@v3
80+
enable-cache: true
81+
cache-dependency-glob: "uv.lock"
82+
- name: Set up Python ${{ env.PYTHON_VERSION }}
83+
run: uv python install ${{ env.PYTHON_VERSION }}
84+
- name: Install test dependencies
85+
run: uv sync --group test --frozen
10986
- name: Test image
11087
run: |
111-
source .venv/bin/activate
11288
export DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:${{ matrix.arch }}-$SHORT_SHA"
11389
if [ "$DOCKER_PLATFORM" == "linux/arm64" ]; then
11490
SKIP_INFERENCE_TESTS=true make docker-test
@@ -120,8 +96,8 @@ jobs:
12096
# write to the build repository to cache for the publish-images job
12197
docker push $DOCKER_BUILD_REPOSITORY:${{ matrix.arch }}-$SHORT_SHA
12298
publish-images:
123-
runs-on: ubuntu-latest
124-
needs: [setup, set-short-sha, build-images]
99+
runs-on: opensource-linux-8core
100+
needs: [set-short-sha, build-images]
125101
env:
126102
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
127103
steps:
@@ -138,25 +114,22 @@ jobs:
138114
- name: Pull AMD image
139115
run: |
140116
docker pull $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA
141-
# - name: Pull ARM image
142-
# run: |
143-
# docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
117+
- name: Pull ARM image
118+
run: |
119+
docker pull $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA
144120
- name: Push AMD and ARM tags
145121
run: |
146122
# these are used to construct the final manifest but also cache-from in subsequent runs
147123
docker tag $DOCKER_BUILD_REPOSITORY:amd64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
148124
docker push $DOCKER_BUILD_REPOSITORY:amd64
149-
#docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
150-
#docker push $DOCKER_BUILD_REPOSITORY:arm64
125+
docker tag $DOCKER_BUILD_REPOSITORY:arm64-$SHORT_SHA $DOCKER_BUILD_REPOSITORY:arm64
126+
docker push $DOCKER_BUILD_REPOSITORY:arm64
151127
- name: Push multiarch manifest
152128
run: |
153-
#docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
154-
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64
129+
docker manifest create ${DOCKER_REPOSITORY}:latest $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
155130
docker manifest push $DOCKER_REPOSITORY:latest
156-
#docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
157-
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64
131+
docker manifest create ${DOCKER_REPOSITORY}:$SHORT_SHA $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
158132
docker manifest push $DOCKER_REPOSITORY:$SHORT_SHA
159-
VERSION=$(grep -m1 version preprocessing-pipeline-family.yaml | cut -d ' ' -f2)
160-
#docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
161-
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64
133+
VERSION=$(grep -oP '(?<=__version__ = ")[^"]+' prepline_general/api/__version__.py)
134+
docker manifest create ${DOCKER_REPOSITORY}:$VERSION $DOCKER_BUILD_REPOSITORY:amd64 $DOCKER_BUILD_REPOSITORY:arm64
162135
docker manifest push ${DOCKER_REPOSITORY}:$VERSION

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,6 @@ target/
8181
profile_default/
8282
ipython_config.py
8383

84-
# pyenv
85-
.python-version
86-
8784
# pipenv
8885
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
8986
# However, in case of collaboration, if having platform-specific dependencies or dependencies
@@ -120,6 +117,9 @@ venv.bak/
120117
# mkdocs documentation
121118
/site
122119

120+
# ruff
121+
.ruff_cache/
122+
123123
# mypy
124124
.mypy_cache/
125125
.dmypy.json

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

0 commit comments

Comments
 (0)