diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba86c5ee..0a52fb18 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10", "3.12", "3.14"] steps: - uses: actions/checkout@v5 @@ -24,15 +24,23 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -r requirements.txt python -m pip install -e .[all] - name: Run tests - run: pytest -ra --cov=tika + run: pytest -ra --cov - - name: Upload coverage to Coveralls - if: success() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - coveralls + - name: Coveralls parallel + uses: coverallsapp/github-action@v2 + with: + flag-name: coverage-python-${{ matrix.python-version }} + parallel: true + + finish-coverage: + needs: test + if: ${{ always() }} + runs-on: ubuntu-slim + steps: + - name: Coveralls finished + uses: coverallsapp/github-action@v2 + with: + parallel-finished: true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index f701990b..b1c81727 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -4,7 +4,7 @@ name: Generate and deploy documentation on: # Runs on pushes targeting the default branch push: - branches: ["main", "master", "add-automated-documentation"] + branches: ["master"] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -29,11 +29,15 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.14" + - run: python -m pip install --upgrade pip + - run: pip --version - name: Install dependencies run: | - pip install sphinx furo myst-parser + pip install . --group=docs - name: Sphinx APIDoc run: | sphinx-apidoc -f -o docs/source/ . @@ -43,7 +47,7 @@ jobs: - name: Setup Pages uses: actions/configure-pages@v5 - name: Upload artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@v4 with: # Upload entire repository path: './docs/build/html' diff --git a/.gitignore b/.gitignore index 04e07aa5..f45b1642 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ .idea __pycache__/ .coverage +docs/build diff --git a/MANIFEST.IN b/MANIFEST.IN deleted file mode 100644 index 42eb4101..00000000 --- a/MANIFEST.IN +++ /dev/null @@ -1 +0,0 @@ -include LICENSE.txt diff --git a/README.md b/README.md index b23d52ea..c673452d 100755 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ library that makes Tika available using the [Tika REST Server](https://cwiki.apache.org/confluence/display/TIKA/TikaServer). This makes Apache Tika available as a Python library, -installable via Setuptools, Pip and Easy Install. +installable via pip. To use this library, you need to have Java 11+ installed on your system as tika-python starts up the Tika REST server in the @@ -20,11 +20,6 @@ Installation (with pip) ----------------------- 1. `pip install tika` -Installation (without pip) --------------------------- -1. `python setup.py build` -2. `python setup.py install` - Airgap Environment Setup ------------------------ To get this working in a disconnected environment, download a tika server file (both tika-server.jar and tika-server.jar.md5, which can be found [here](https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/)) and set the TIKA_SERVER_JAR environment variable to TIKA_SERVER_JAR="file:////tika-server-standard.jar" which successfully tells `python-tika` to "download" this file and move it to `/tmp/tika-server-standard.jar` and run as background process. diff --git a/docs/source/conf.py b/docs/source/conf.py index 52d5608f..807e089c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -5,14 +5,9 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -import os -import sys - -# Add the parent directory of the documentation root to sys.path -sys.path.insert(0, os.path.abspath("../..")) project = 'tika-python' -copyright = '2024, Chris A. Mattmann' +copyright = '2026, Chris A. Mattmann' author = 'Chris A. Mattmann' # -- General configuration --------------------------------------------------- @@ -26,16 +21,13 @@ "sphinx.ext.autosectionlabel", "sphinx.ext.todo", "sphinx.ext.duration", - "myst_parser" + "myst_parser", ] -templates_path = ['_templates'] -exclude_patterns = ['tika.tests*'] - - +exclude_patterns = ['_build'] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = 'furo' -html_static_path = ['_static'] + diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 00000000..513d5bcc --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,15 @@ +# Welcome to tika-python's documentation! + +```{toctree} +:maxdepth: 7 +:caption: Contents +readme.md +tika.md +``` + +## Indices and tables + +- {ref}`genindex` +- {ref}`modindex` +- {ref}`search` + diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index dfc67b61..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. tika-python documentation master file, created by - sphinx-quickstart on Sun Apr 14 20:07:31 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to tika-python's documentation! -======================================= - -.. toctree:: - :maxdepth: 7 - :caption: Contents: - - readme - tika - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/setup.rst b/docs/source/setup.rst deleted file mode 100644 index 552eb49d..00000000 --- a/docs/source/setup.rst +++ /dev/null @@ -1,7 +0,0 @@ -setup module -============ - -.. automodule:: setup - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/tika.tests.rst b/docs/source/tika.tests.rst deleted file mode 100644 index 70f525c3..00000000 --- a/docs/source/tika.tests.rst +++ /dev/null @@ -1,77 +0,0 @@ -tika.tests package -================== - -Submodules ----------- - -tika.tests.memory\_benchmark module ------------------------------------ - -.. automodule:: tika.tests.memory_benchmark - :members: - :undoc-members: - :show-inheritance: - -tika.tests.test\_benchmark module ---------------------------------- - -.. automodule:: tika.tests.test_benchmark - :members: - :undoc-members: - :show-inheritance: - -tika.tests.test\_from\_file\_service module -------------------------------------------- - -.. automodule:: tika.tests.test_from_file_service - :members: - :undoc-members: - :show-inheritance: - -tika.tests.test\_ssl\_link module ---------------------------------- - -.. automodule:: tika.tests.test_ssl_link - :members: - :undoc-members: - :show-inheritance: - -tika.tests.test\_tika module ----------------------------- - -.. automodule:: tika.tests.test_tika - :members: - :undoc-members: - :show-inheritance: - -tika.tests.tests\_params module -------------------------------- - -.. automodule:: tika.tests.tests_params - :members: - :undoc-members: - :show-inheritance: - -tika.tests.tests\_unpack module -------------------------------- - -.. automodule:: tika.tests.tests_unpack - :members: - :undoc-members: - :show-inheritance: - -tika.tests.utils module ------------------------ - -.. automodule:: tika.tests.utils - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: tika.tests - :members: - :undoc-members: - :show-inheritance: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..649e4a0c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,64 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ "setuptools" ] + +[project] +name = "tika" +description = "Apache Tika Python library" +readme = "README.md" +keywords = [ "tika", "digital", "babel fish", "apache" ] +license = "Apache-2.0" +authors = [ { name = "Chris Mattmann", email = "chris.a.mattmann@jpl.nasa.gov" } ] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: Database :: Front-Ends", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dynamic = [ "version" ] +dependencies = [ + "beautifulsoup4==4.13.3", + "requests", +] + +[dependency-groups] +test = [ + "memory-profiler", + "pytest-benchmark", + "pytest-cov", +] +docs = [ + "furo", + "myst-parser", + "sphinx", +] + +[project.urls] +homepage = "http://github.com/chrismattmann/tika-python" +repository = "http://github.com/chrismattmann/tika-python.git" + +[project.scripts] +tika-python = "tika.tika:main" + +[tool.setuptools] +packages.find.include = [ "tika*" ] + +[tool.setuptools.dynamic] +version = {attr = "tika.__version__"} + +[tool.coverage.run] +source = [ "tika" ] +branch = true diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8f52d2e5..00000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -setuptools -requests -pyyaml==5.4.1 -coveralls -pytest-cov -memory_profiler -beautifulsoup4==4.13.3 diff --git a/setup.py b/setup.py deleted file mode 100644 index 6695db28..00000000 --- a/setup.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# $Id$ - -import os.path -from io import open - -import tika - -try: - from ez_setup import use_setuptools - - use_setuptools() -except ImportError: - pass - -from setuptools import setup, find_packages, find_namespace_packages - -version = tika.__version__ - -with open(os.path.join(os.path.dirname(__file__), 'README.md'), encoding='utf-8') as f: - _descr = f.read() - -_keywords = 'tika digital babel fish apache' -_classifiers = [ - 'Development Status :: 3 - Alpha', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'Intended Audience :: Information Technology', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Topic :: Database :: Front-Ends', - 'Topic :: Scientific/Engineering', - 'Topic :: Software Development :: Libraries :: Python Modules', -] - - -def read(*rnames): - return open(os.path.join(os.path.dirname(__file__), *rnames)).read() - - -long_description = _descr - -extras_require = { - 'tests': [ - 'memory-profiler>=0.57.0', - 'pytest-benchmark>=3.2.2' - ], - 'all': [ - ] -} - -for reqs in extras_require.values(): - extras_require['all'].extend(reqs) - -setup( - name='tika', - version=version, - description='Apache Tika Python library', - long_description_content_type='text/markdown', - long_description=long_description, - classifiers=_classifiers, - keywords=_keywords, - author='Chris Mattmann', - author_email='chris.a.mattmann@jpl.nasa.gov', - url='http://github.com/chrismattmann/tika-python', - download_url='http://github.com/chrismattmann/tika-python', - license='Apache License version 2 ("ALv2")', - packages=find_packages(exclude=['ez_setup']) + find_namespace_packages(include=['tika.tests']), - include_package_data=True, - zip_safe=True, - test_suite='tika.tests', - entry_points={ - 'console_scripts': [ - 'tika-python = tika.tika:main' - ], - }, - package_data={ - # And include any *.conf files found in the 'conf' subdirectory - # for the package - }, - install_requires=[ - 'setuptools>=40.1', - 'requests' - ], - extras_require=extras_require, -)