File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 2424 - name : Set up Python ${{ env.PYTHON_VERSION }}
2525 run : uv python install ${{ env.PYTHON_VERSION }}
2626 - name : Install lint dependencies
27- run : uv sync --only-group lint --frozen
27+ run : uv sync --only-group lint --locked
2828 - name : Lint
2929 run : make check
3030
5252 - name : Install dependencies and run core tests
5353 run : |
5454 sudo apt-get update && sudo apt-get install --yes poppler-utils libreoffice
55- uv sync --group test --frozen
55+ uv sync --group test --locked
5656 make install-pandoc
57- make install-nltk -models
57+ make install-nlp -models
5858 sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
5959 sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
6060 tesseract --version
@@ -114,6 +114,6 @@ jobs:
114114 df -h
115115 - name : Test Dockerfile
116116 run : |
117- uv sync --group test --frozen
117+ uv sync --group test --locked
118118 make docker-build
119119 make docker-test
Original file line number Diff line number Diff line change 8383 - name : Set up Python ${{ env.PYTHON_VERSION }}
8484 run : uv python install ${{ env.PYTHON_VERSION }}
8585 - name : Install test dependencies
86- run : uv sync --group test --frozen
86+ run : uv sync --group test --locked
8787 - name : Test image
8888 run : |
8989 export DOCKER_IMAGE="$DOCKER_BUILD_REPOSITORY:${{ matrix.arch }}-$SHORT_SHA"
Original file line number Diff line number Diff line change 1+ ## 0.1.2
2+ * Bump all packages (refresh uv.lock), pulling ` unstructured==0.22.12 ` which replaces NLTK with spaCy
3+ * Replace ` download_nltk_packages ` calls with spaCy model pre-download in Makefile, Dockerfile, and CI
4+ * Switch ` uv sync --frozen ` to ` uv sync --locked ` across Dockerfile, Makefile, and CI workflows
5+
16## 0.1.1
27* Switch arm64 Docker build runner from custom ` opensource-linux-arm64-4core ` to GitHub-hosted ` ubuntu-24.04-arm `
38* Consolidate multiarch Docker manifest creation into a single ` docker buildx imagetools create ` call
Original file line number Diff line number Diff line change @@ -61,7 +61,7 @@ ENV UV_PROJECT_ENVIRONMENT="${HOME}/.local"
6161
6262COPY --chown=${NB_USER}:${NB_USER} pyproject.toml pyproject.toml
6363COPY --chown=${NB_USER}:${NB_USER} uv.lock uv.lock
64- RUN uv sync --no-dev --no-install-project --frozen
64+ RUN uv sync --no-dev --no-install-project --locked
6565
6666ARG PANDOC_VERSION="3.9"
6767RUN ARCH=$(uname -m) && \
@@ -71,7 +71,7 @@ RUN ARCH=$(uname -m) && \
7171 cp /tmp/pandoc-${PANDOC_VERSION}/bin/pandoc /home/${USER}/.local/bin/ && \
7272 rm -rf /tmp/pandoc*
7373
74- RUN ${PYTHON} -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages ()" && \
74+ RUN ${PYTHON} -c "from unstructured.nlp.tokenize import _load_spacy_model; _load_spacy_model ()" && \
7575 ${PYTHON} -c "from unstructured.partition.model_init import initialize; initialize()" && \
7676 ${PYTHON} -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
7777
Original file line number Diff line number Diff line change @@ -13,23 +13,23 @@ help: Makefile
1313
1414# # install-base: installs minimum requirements to run the API
1515.PHONY : install-base
16- install-base : install-base-packages install-nltk -models
16+ install-base : install-base-packages install-nlp -models
1717
1818# # install: installs all test and dev requirements
1919.PHONY : install
2020install : install-base install-test
2121
2222.PHONY : install-base-packages
2323install-base-packages :
24- uv sync --no-dev --frozen
24+ uv sync --no-dev --locked
2525
2626.PHONY : install-test
2727install-test :
28- uv sync --group test --frozen
28+ uv sync --group test --locked
2929
30- .PHONY : install-nltk -models
31- install-nltk -models :
32- uv run python -c " from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages ()"
30+ .PHONY : install-nlp -models
31+ install-nlp -models :
32+ uv run python -c " from unstructured.nlp.tokenize import _load_spacy_model; _load_spacy_model ()"
3333
3434# # lock: regenerates uv.lock
3535.PHONY : lock
Original file line number Diff line number Diff line change 1- __version__ = "0.1.1 " # pragma: no cover
1+ __version__ = "0.1.2 " # pragma: no cover
Original file line number Diff line number Diff line change @@ -145,7 +145,7 @@ def partition_file_via_api(
145145 if not request_url :
146146 raise HTTPException (status_code = 500 , detail = "Parallel mode enabled but no url set!" )
147147
148- api_key = request .headers .get ("unstructured-api-key" , default = "" )
148+ api_key = request .headers .get ("unstructured-api-key" , "" )
149149 partition_kwargs ["starting_page_number" ] = (
150150 partition_kwargs .get ("starting_page_number" , 1 ) + page_offset
151151 )
Original file line number Diff line number Diff line change @@ -6,6 +6,7 @@ requires-python = ">=3.12"
66dependencies = [
77 " unstructured[all-docs] >=0.18.31, <1.0.0" ,
88 " fastapi >=0.128.4, <1.0.0" ,
9+ " python-multipart >=0.0.18" ,
910 " uvicorn >=0.40.0, <1.0.0" ,
1011 " backoff >=2.2.1, <3.0.0" ,
1112 " pandas >=3.0.0, <4.0.0" ,
You can’t perform that action at this time.
0 commit comments