Skip to content

Commit 90c992a

Browse files
authored
Merge branch 'main' into crag/claude
2 parents da65419 + 532a4fe commit 90c992a

9 files changed

Lines changed: 272 additions & 226 deletions

File tree

.github/workflows/ci.yml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
branches: [ main ]
88

99
env:
10-
PYTHON_VERSION: "3.10"
10+
PYTHON_VERSION: "3.12"
1111
PIPELINE_FAMILY: "general"
1212

1313
jobs:
@@ -20,7 +20,7 @@ jobs:
2020
with:
2121
path: |
2222
.venv
23-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/base.txt') }}
23+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
2424
- name: Set up Python ${{ env.PYTHON_VERSION }}
2525
uses: actions/setup-python@v5
2626
with:
@@ -42,7 +42,7 @@ jobs:
4242
with:
4343
path: |
4444
.venv
45-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/base.txt') }}
45+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/*.txt') }}
4646
- name: Lint
4747
run: |
4848
source .venv/bin/activate
@@ -56,7 +56,7 @@ jobs:
5656
uses: ludeeus/action-shellcheck@master
5757

5858
test:
59-
runs-on: ubuntu-latest-m
59+
runs-on: ubuntu-latest
6060
needs: [setup, lint]
6161
steps:
6262
- uses: actions/checkout@v4
@@ -65,16 +65,22 @@ jobs:
6565
with:
6666
path: |
6767
.venv
68-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/base.txt') }}
68+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
69+
70+
- name: Set up Python ${{ env.PYTHON_VERSION }}
71+
uses: actions/setup-python@v5
72+
with:
73+
python-version: ${{ env.PYTHON_VERSION }}
6974
- name: Run core tests
7075
run: |
76+
python${{ env.PYTHON_VERSION }} -m venv .venv
7177
source .venv/bin/activate
7278
sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
79+
make install-test
7380
make install-pandoc
7481
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
7582
sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
7683
tesseract --version
77-
make install-nltk-models
7884
make test
7985
make check-coverage
8086
@@ -97,19 +103,26 @@ jobs:
97103
# TODO - figure out best practice for caching docker images
98104
# (Using the virtualenv to get pytest)
99105
test_dockerfile:
100-
runs-on: ubuntu-latest-m
106+
runs-on: ubuntu-latest
101107
needs: [setup, lint]
102108
steps:
103109
- uses: actions/checkout@v4
104110
- uses: actions/cache@v4
105111
id: virtualenv-cache
106112
with:
113+
python-version: ${{ env.PYTHON_VERSION }}
107114
path: |
108115
.venv
109-
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/base.txt') }}
116+
key: ci-venv-${{ env.PIPELINE_FAMILY }}-${{ hashFiles('requirements/test.txt') }}
117+
- name: Set up Python ${{ env.PYTHON_VERSION }}
118+
uses: actions/setup-python@v5
119+
with:
120+
python-version: ${{ env.PYTHON_VERSION }}
110121
- name: Test Dockerfile
111122
run: |
123+
python${{ env.PYTHON_VERSION }} -m venv .venv
112124
source .venv/bin/activate
125+
make install-test
113126
make docker-build
114127
make docker-test
115128
# - name: Scan image

.github/workflows/docker-publish.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ env:
1111
PACKAGE: "unstructured-api"
1212
PIPELINE_FAMILY: "general"
1313
PIP_VERSION: "25.1.1"
14-
PYTHON_VERSION: "3.10"
14+
PYTHON_VERSION: "3.12"
1515

1616
jobs:
1717
setup:
@@ -48,7 +48,7 @@ jobs:
4848
#arch: ["arm64", "amd64"]
4949
# NOTE(luke): temporary disable arm64 since its failing the smoke test
5050
arch: ["amd64"]
51-
runs-on: ubuntu-latest-m
51+
runs-on: ubuntu-latest
5252
needs: [setup, set-short-sha]
5353
env:
5454
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}
@@ -103,7 +103,7 @@ jobs:
103103
# write to the build repository to cache for the publish-images job
104104
docker push $DOCKER_BUILD_REPOSITORY:${{ matrix.arch }}-$SHORT_SHA
105105
publish-images:
106-
runs-on: ubuntu-latest-m
106+
runs-on: ubuntu-latest
107107
needs: [setup, set-short-sha, build-images]
108108
env:
109109
SHORT_SHA: ${{ needs.set-short-sha.outputs.short_sha }}

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ run-web-app:
9595
## test: runs core tests
9696
.PHONY: test
9797
test:
98-
PYTHONPATH=. pytest -v test_${PIPELINE_PACKAGE} --cov=${PACKAGE_NAME} --cov-report term-missing
98+
PYTHONPATH=. pytest -n auto -v test_${PIPELINE_PACKAGE} --cov=${PACKAGE_NAME} --cov-report term-missing
9999

100100
# Setting a low bar here - need more tests!
101101
.PHONY: check-coverage

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,13 @@ curl -X 'POST'
289289
* Using `pyenv` to manage virtualenv's is recommended
290290
* Mac install instructions. See [here](https://github.com/Unstructured-IO/community#mac--homebrew) for more detailed instructions.
291291
* `brew install pyenv-virtualenv`
292-
* `pyenv install 3.10.12`
292+
* `pyenv install 3.12`
293293
* Linux instructions are available [here](https://github.com/Unstructured-IO/community#linux).
294294

295295
* Create a virtualenv to work in and activate it, e.g. for one named `document-processing`:
296296

297-
`pyenv virtualenv 3.10.12 unstructured-api` <br />
297+
`pyenv virtualenv 3.12
298+
unstructured-api` <br />
298299
`pyenv activate unstructured-api`
299300

300301
See the [Unstructured Quick Start](https://github.com/Unstructured-IO/unstructured#eight_pointed_black_star-quick-start) for the many OS dependencies that are required, if the ability to process all file types is desired.

requirements/base.in

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
-c constraints.in
22
unstructured[all-docs]
3-
# Pinning click due to a unicode issue in black
4-
# can remove after black drops support for Python 3.6
5-
# ref: https://github.com/psf/black/issues/2964
6-
click==8.2.1
3+
click
74
fastapi
85
uvicorn
96
ratelimit

0 commit comments

Comments
 (0)