Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions integrations/hanlp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@ name = "hanlp-haystack"
dynamic = ["version"]
description = 'An integration of Han Language Processing - HanLP as a ChineseDocumentSplitter component.'
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = "Apache-2.0"
keywords = []
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand All @@ -24,7 +23,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"haystack-ai>=2.13.1",
"haystack-ai>=2.22.0",
"hanlp>=2.1.1"
]

Expand Down Expand Up @@ -88,7 +87,6 @@ line-length = 120
skip-string-normalization = true

[tool.ruff]
target-version = "py39"
line-length = 120

[tool.ruff.lint]
Expand Down Expand Up @@ -138,10 +136,6 @@ ignore = [
"RUF001",
"RUF002",
]
unfixable = [
# Don't touch unused imports
"F401",
]

[tool.ruff.lint.isort]
known-first-party = ["haystack_integrations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#
# SPDX-License-Identifier: Apache-2.0

from collections.abc import Callable
from copy import deepcopy
from typing import Any, Callable, Literal, Optional
from typing import Any, Literal

from haystack import Document, component, logging
from haystack.core.serialization import default_from_dict, default_to_dict
Expand Down Expand Up @@ -59,7 +60,7 @@ def __init__(
split_overlap: int = 200,
split_threshold: int = 0,
respect_sentence_boundary: bool = False,
splitting_function: Optional[Callable] = None,
splitting_function: Callable | None = None,
granularity: Literal["coarse", "fine"] = "coarse",
):
"""
Expand Down Expand Up @@ -406,7 +407,7 @@ def _create_docs_from_splits(
"""
documents: list[Document] = []

for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs)):
for i, (txt, split_idx) in enumerate(zip(text_splits, splits_start_idxs, strict=True)):
copied_meta = deepcopy(meta)
copied_meta["page_number"] = splits_pages[i]
copied_meta["split_id"] = i
Expand Down
4 changes: 2 additions & 2 deletions integrations/hanlp/tests/test_chinese_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_metadata_copied_to_split_documents(self):
splitter.warm_up()
result = splitter.run(documents=documents)
assert len(result["documents"]) == 2
for doc, split_doc in zip(documents, result["documents"]):
for doc, split_doc in zip(documents, result["documents"], strict=True):
assert doc.meta.items() <= split_doc.meta.items()

@pytest.mark.integration
Expand All @@ -136,7 +136,7 @@ def test_source_id_stored_in_metadata(self):
splitter.warm_up()
result = splitter.run(documents=documents)
assert len(result["documents"]) == 2
for doc, split_doc in zip(documents, result["documents"]):
for doc, split_doc in zip(documents, result["documents"], strict=True):
assert doc.id == split_doc.meta["source_id"]

@pytest.mark.integration
Expand Down