Skip to content

Commit 0d7de1c

Browse files
committed
chore: add Dockerfile, Makefile, and .gitignore; implement initial tests and update README
1 parent f8a67fd commit 0d7de1c

9 files changed

Lines changed: 326 additions & 1 deletion

File tree

confluence-updater/.gitignore

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
.openapi-generator/
2+
3+
# Byte-compiled / optimized / DLL files
4+
__pycache__/
5+
*.py[cod]
6+
*$py.class
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
build/
14+
develop-eggs/
15+
dist/
16+
downloads/
17+
eggs/
18+
.eggs/
19+
lib/
20+
lib64/
21+
parts/
22+
sdist/
23+
var/
24+
wheels/
25+
share/python-wheels/
26+
*.egg-info/
27+
.installed.cfg
28+
*.egg
29+
MANIFEST
30+
31+
# PyInstaller
32+
# Usually these files are written by a python script from a template
33+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
34+
*.manifest
35+
*.spec
36+
37+
# Installer logs
38+
pip-log.txt
39+
pip-delete-this-directory.txt
40+
41+
# Unit test / coverage reports
42+
htmlcov/
43+
.tox/
44+
.nox/
45+
.coverage
46+
.coverage.*
47+
.cache
48+
nosetests.xml
49+
coverage.xml
50+
*.cover
51+
*.py,cover
52+
.hypothesis/
53+
.pytest_cache/
54+
cover/
55+
56+
# Translations
57+
*.mo
58+
*.pot
59+
60+
# Django stuff:
61+
*.log
62+
local_settings.py
63+
db.sqlite3
64+
db.sqlite3-journal
65+
66+
# Flask stuff:
67+
instance/
68+
.webassets-cache
69+
70+
# Scrapy stuff:
71+
.scrapy
72+
73+
# Sphinx documentation
74+
docs/_build/
75+
76+
# PyBuilder
77+
.pybuilder/
78+
target/
79+
80+
# Jupyter Notebook
81+
.ipynb_checkpoints
82+
83+
# IPython
84+
profile_default/
85+
ipython_config.py
86+
87+
# pyenv
88+
# For a library or package, you might want to ignore these files since the code is
89+
# intended to run in multiple environments; otherwise, check them in:
90+
# .python-version
91+
92+
# pipenv
93+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
95+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
96+
# install all needed dependencies.
97+
#Pipfile.lock
98+
99+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
100+
__pypackages__/
101+
102+
# Celery stuff
103+
celerybeat-schedule
104+
celerybeat.pid
105+
106+
# SageMath parsed files
107+
*.sage.py
108+
109+
# Environments
110+
.env
111+
.venv
112+
env/
113+
venv/
114+
ENV/
115+
env.bak/
116+
venv.bak/
117+
118+
# Spyder project settings
119+
.spyderproject
120+
.spyproject
121+
122+
# Rope project settings
123+
.ropeproject
124+
125+
# mkdocs documentation
126+
/site
127+
128+
# mypy
129+
.mypy_cache/
130+
.dmypy.json
131+
dmypy.json
132+
133+
# Pyre type checker
134+
.pyre/
135+
136+
# pytype static type analyzer
137+
.pytype/
138+
139+
# Cython debug symbols
140+
cython_debug/

confluence-updater/Dockerfile

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
FROM --platform=linux/amd64 python:3.11.7-bookworm AS build
2+
3+
ARG dev=0
4+
ENV POETRY_VIRTUALENVS_PATH=/app/document-extractor/.venv
5+
ENV POETRY_VERSION=1.8.3
6+
7+
RUN DEBIAN_FRONTEND=noninteractive apt-get update \
8+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential --no-install-recommends make \
9+
ffmpeg \
10+
poppler-utils \
11+
tesseract-ocr \
12+
tesseract-ocr-deu \
13+
tesseract-ocr-eng && \
14+
python3 -m venv "${POETRY_VIRTUALENVS_PATH}" \
15+
&& $POETRY_VIRTUALENVS_PATH/bin/pip install "poetry==${POETRY_VERSION}"
16+
ENV PATH="${POETRY_VIRTUALENVS_PATH}/bin:$PATH"
17+
18+
COPY rag-core-library/extractor-api-lib /app/rag-core-library/extractor-api-lib
19+
20+
WORKDIR /app/document-extractor
21+
COPY document-extractor/pyproject.toml document-extractor/poetry.lock ./
22+
23+
RUN mkdir log && chmod 700 log
24+
RUN touch /app/document-extractor/log/logfile.log && chmod 600 /app/document-extractor/log/logfile.log
25+
26+
RUN poetry config virtualenvs.create false &&\
27+
if [ "$dev" = "1" ]; then \
28+
poetry install --no-interaction --no-ansi --no-root --with dev; \
29+
else \
30+
poetry install --no-interaction --no-ansi --no-root; \
31+
fi
32+
33+
FROM --platform=linux/amd64 python:3.11.7-bookworm
34+
ARG dev=0
35+
36+
RUN adduser --disabled-password --gecos "" --uid 65532 nonroot
37+
38+
ENV POETRY_VIRTUALENVS_PATH=/app/document-extractor/.venv
39+
COPY --from=build --chown=nonroot:nonroot ${POETRY_VIRTUALENVS_PATH} ${POETRY_VIRTUALENVS_PATH}
40+
COPY --from=build /usr/local/bin/ /usr/local/bin/
41+
COPY --from=build /usr/bin/ /usr/bin/
42+
COPY --from=build /usr/local/lib/ /usr/local/lib/
43+
COPY --from=build /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/
44+
COPY --from=build /usr/share/tesseract-ocr/5/tessdata /usr/share/tesseract-ocr/5/tessdata
45+
46+
COPY --chown=nonroot:nonroot rag-core-library/extractor-api-lib /app/rag-core-library/extractor-api-lib
47+
48+
WORKDIR /app/document-extractor
49+
50+
COPY --chown=nonroot:nonroot document-extractor .
51+
52+
53+
# cleanup
54+
RUN apt-get clean autoclean
55+
RUN apt-get autoremove --yes
56+
57+
RUN if [ "$dev" = "0" ]; then \
58+
while read -r shell; do rm -f "$shell"; done < /etc/shells; \
59+
rm -rf /var/lib/{apt,dpkg,cache,log}/ \
60+
else \
61+
echo "POETRY_VIRTUALENVS_PATH=/app/document-extractor/.venv" >> /etc/environment;\
62+
export POETRY_VIRTUALENVS_PATH=/app/document-extractor/.venv;\
63+
export PATH="${POETRY_VIRTUALENVS_PATH}/bin:$PATH";\
64+
fi
65+
66+
67+
USER nonroot
68+
COPY --from=build --chown=nonroot:nonroot /app/document-extractor/log /app/document-extractor/log
69+
70+
ENV PATH="${POETRY_VIRTUALENVS_PATH}/bin:${PATH}"

confluence-updater/Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
.PHONY: lint coverage test
2+
3+
lint:
4+
poetry run flake8 .
5+
6+
test:
7+
poetry run python -m pytest tests
8+
9+
coverage:
10+
poetry run coverage run --omit *.pyc --omit *__init__.py --source src/rag_core -m pytest tests
11+
poetry run coverage report -m
12+
poetry run coverage html

confluence-updater/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Confluence Updater
2+
3+
4+
# Requirements
5+
6+
7+
# Deployment
8+
A detailed explanation of the deployment can be found in the [Readme](../README.md) of the project.
9+
The *helm-chart* used for the deployment can be found [here](../helm-chart/charts/admin-backend/).
10+
11+

confluence-updater/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from extractor_api_lib.main import app as perfect_extractor_app # noqa: F401

confluence-updater/pyproject.toml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
[tool.flake8]
2+
exclude= [".eggs", "./src/openapi_server/models/*", ".git", ".hg", ".mypy_cache", ".tox", ".venv", ".devcontainer", "venv", "_build", "buck-out", "build", "dist"]
3+
statistics = true
4+
show-source = false
5+
max-complexity = 10
6+
max-annotations-complexity = 3
7+
docstring-convent = 'numpy'
8+
max-line-length = 120
9+
ignore = ["E203", "W503", "E704"]
10+
inline-quotes = '"'
11+
docstring-quotes = '"""'
12+
multiline-quotes = '"""'
13+
dictionaries = ["en_US", "python", "technical", "pandas"]
14+
ban-relative-imports = true
15+
16+
[tool.black]
17+
line-length = 120
18+
exclude = """
19+
/(
20+
| .eggs
21+
| .git
22+
| .hg
23+
| .mypy_cache
24+
| .nox
25+
| .pants.d
26+
| .tox
27+
| .venv
28+
| _build
29+
| buck-out
30+
| build
31+
| dist
32+
| node_modules
33+
| venv
34+
)/
35+
"""
36+
37+
[tool.isort]
38+
profile = "black"
39+
skip = ['.eggs', '.git', '.hg', '.mypy_cache', '.nox', '.pants.d', '.tox', '.venv', '_build', 'buck-out', 'build', 'dist', 'node_modules', 'venv']
40+
skip_gitignore = true
41+
42+
[tool.pylint]
43+
max-line-length = 120
44+
45+
[tool.poetry]
46+
name = "pdfextractor_server"
47+
version = "0.0.0"
48+
description = "Extracts the content of pdf documents."
49+
authors = ["STACKIT Data and AI Consulting <data-ai-consulting@stackit.cloud>"]
50+
readme = "README.md"
51+
52+
[tool.poetry.dependencies]
53+
python = ">=3.11,<3.12"
54+
extractor-api-lib = {path = "../rag-core-library/extractor-api-lib", develop = true}
55+
56+
[tool.poetry.group.dev.dependencies]
57+
flake8 = "^7.0.0"
58+
pytest = "^8.0.1"
59+
black = "^24.2.0"
60+
flake8-black = "^0.3.6"
61+
flake8-pyproject = "^1.2.3"
62+
coverage = "^7.5.4"
63+
flake8-quotes = "^3.4.0"
64+
flake8-return = "^1.2.0"
65+
flake8-annotations-complexity = "^0.0.8"
66+
flake8-bandit = "^4.1.1"
67+
flake8-bugbear = "^24.8.19"
68+
flake8-builtins = "^2.5.0"
69+
flake8-comprehensions = "^3.15.0"
70+
flake8-eradicate = "^1.5.0"
71+
flake8-expression-complexity = "^0.0.11"
72+
flake8-pytest-style = "^2.0.0"
73+
pep8-naming = "^0.14.1"
74+
flake8-eol = "^0.0.8"
75+
flake8-exceptions = "^0.0.1a0"
76+
flake8-simplify = "^0.21.0"
77+
flake8-wot = "^0.2.0"
78+
flake8-function-order = "^0.0.5"
79+
flake8-tidy-imports = "^4.10.0"
80+
# flake8-logging-format = "^2024.24.12"
81+
# flake8-docstrings = "^1.7.0"
82+
83+
[tool.poetry.group.tests.dependencies]
84+
httpx = "^0.26.0"
85+
86+
[build-system]
87+
requires = ["poetry-core"]
88+
build-backend = "poetry.core.masonry.api"

confluence-updater/tests/__init__.py

Whitespace-only changes.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def test_dummy() -> None:
2+
print("Dummy test.")
3+
assert True # noqa S101

rag-core-library

Submodule rag-core-library updated 31 files

0 commit comments

Comments
 (0)