deepset-ai · anakin87 · Jan 12, 2026 · Jan 12, 2026
@@ -7,15 +7,14 @@ name = "hanlp-haystack"
 dynamic = ["version"]
 description = 'An integration of Han Language Processing - HanLP as a ChineseDocumentSplitter component.'
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = "Apache-2.0"
 keywords = []
 authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 4 - Beta",
   "Programming Language :: Python",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
@@ -24,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
-  "haystack-ai>=2.13.1",
+  "haystack-ai>=2.22.0",
   "hanlp>=2.1.1"
 ]
 
@@ -88,7 +87,6 @@ line-length = 120
 skip-string-normalization = true
 
 [tool.ruff]
-target-version = "py39"
 line-length = 120
 
 [tool.ruff.lint]
@@ -138,10 +136,6 @@ ignore = [
   "RUF001",
   "RUF002",
 ]
-unfixable = [
-  # Don't touch unused imports
-  "F401",
-]
 
 [tool.ruff.lint.isort]
 known-first-party = ["haystack_integrations"]

@@ -2,8 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from collections.abc import Callable
 from copy import deepcopy
-from typing import Any, Callable, Literal, Optional
+from typing import Any, Literal
 
 from haystack import Document, component, logging
 from haystack.core.serialization import default_from_dict, default_to_dict
@@ -59,7 +60,7 @@ def __init__(
         split_overlap: int = 200,
         split_threshold: int = 0,
         respect_sentence_boundary: bool = False,
-        splitting_function: Optional[Callable] = None,
+        splitting_function: Callable | None = None,
         granularity: Literal["coarse", "fine"] = "coarse",
     ):
         """
@@ -406,7 +407,7 @@ def _create_docs_from_splits(
         """
         documents: list[Document] = []
 
-        for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs)):
+        for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs, strict=True)):
             copied_meta = deepcopy(meta)
             copied_meta["page_number"] = splits_pages[i]
             copied_meta["split_id"] = i

@@ -123,7 +123,7 @@ def test_metadata_copied_to_split_documents(self):
         splitter.warm_up()
         result = splitter.run(documents=documents)
         assert len(result["documents"]) == 2
-        for doc, split_doc in zip(documents, result["documents"]):
+        for doc, split_doc in zip(documents, result["documents"], strict=True):
             assert doc.meta.items() <= split_doc.meta.items()
 
     @pytest.mark.integration
@@ -136,7 +136,7 @@ def test_source_id_stored_in_metadata(self):
         splitter.warm_up()
         result = splitter.run(documents=documents)
         assert len(result["documents"]) == 2
-        for doc, split_doc in zip(documents, result["documents"]):
+        for doc, split_doc in zip(documents, result["documents"], strict=True):
             assert doc.id == split_doc.meta["source_id"]
 
     @pytest.mark.integration