Skip to content

Commit f639abf

Browse files
committed
chore!: hanlp - drop Python 3.9 and use X|Y typing
1 parent 44426de commit f639abf

3 files changed

Lines changed: 8 additions & 13 deletions

File tree

integrations/hanlp/pyproject.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,14 @@ name = "hanlp-haystack"
77
dynamic = ["version"]
88
description = 'An integration of Han Language Processing - HanLP as a ChineseDocumentSplitter component.'
99
readme = "README.md"
10-
requires-python = ">=3.9"
10+
requires-python = ">=3.10"
1111
license = "Apache-2.0"
1212
keywords = []
1313
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
1414
classifiers = [
1515
"License :: OSI Approved :: Apache Software License",
1616
"Development Status :: 4 - Beta",
1717
"Programming Language :: Python",
18-
"Programming Language :: Python :: 3.9",
1918
"Programming Language :: Python :: 3.10",
2019
"Programming Language :: Python :: 3.11",
2120
"Programming Language :: Python :: 3.12",
@@ -24,7 +23,7 @@ classifiers = [
2423
"Programming Language :: Python :: Implementation :: PyPy",
2524
]
2625
dependencies = [
27-
"haystack-ai>=2.13.1",
26+
"haystack-ai>=2.22.0",
2827
"hanlp>=2.1.1"
2928
]
3029

@@ -88,7 +87,6 @@ line-length = 120
8887
skip-string-normalization = true
8988

9089
[tool.ruff]
91-
target-version = "py39"
9290
line-length = 120
9391

9492
[tool.ruff.lint]
@@ -138,10 +136,6 @@ ignore = [
138136
"RUF001",
139137
"RUF002",
140138
]
141-
unfixable = [
142-
# Don't touch unused imports
143-
"F401",
144-
]
145139

146140
[tool.ruff.lint.isort]
147141
known-first-party = ["haystack_integrations"]

integrations/hanlp/src/haystack_integrations/components/preprocessors/hanlp/chinese_document_splitter.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from collections.abc import Callable
56
from copy import deepcopy
6-
from typing import Any, Callable, Literal, Optional
7+
from typing import Any, Literal
78

89
from haystack import Document, component, logging
910
from haystack.core.serialization import default_from_dict, default_to_dict
@@ -59,7 +60,7 @@ def __init__(
5960
split_overlap: int = 200,
6061
split_threshold: int = 0,
6162
respect_sentence_boundary: bool = False,
62-
splitting_function: Optional[Callable] = None,
63+
splitting_function: Callable | None = None,
6364
granularity: Literal["coarse", "fine"] = "coarse",
6465
):
6566
"""
@@ -406,7 +407,7 @@ def _create_docs_from_splits(
406407
"""
407408
documents: list[Document] = []
408409

409-
for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs)):
410+
for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs, strict=True)):
410411
copied_meta = deepcopy(meta)
411412
copied_meta["page_number"] = splits_pages[i]
412413
copied_meta["split_id"] = i

integrations/hanlp/tests/test_chinese_document_splitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_metadata_copied_to_split_documents(self):
123123
splitter.warm_up()
124124
result = splitter.run(documents=documents)
125125
assert len(result["documents"]) == 2
126-
for doc, split_doc in zip(documents, result["documents"]):
126+
for doc, split_doc in zip(documents, result["documents"], strict=True):
127127
assert doc.meta.items() <= split_doc.meta.items()
128128

129129
@pytest.mark.integration
@@ -136,7 +136,7 @@ def test_source_id_stored_in_metadata(self):
136136
splitter.warm_up()
137137
result = splitter.run(documents=documents)
138138
assert len(result["documents"]) == 2
139-
for doc, split_doc in zip(documents, result["documents"]):
139+
for doc, split_doc in zip(documents, result["documents"], strict=True):
140140
assert doc.id == split_doc.meta["source_id"]
141141

142142
@pytest.mark.integration

0 commit comments

Comments
 (0)