diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index 3796ebb2f0..a6329fbd2d 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -7,7 +7,7 @@ name = "llama-cpp-haystack" dynamic = ["version"] description = 'An integration between the llama.cpp LLM framework and Haystack' readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = "Apache-2.0" keywords = [] authors = [ @@ -18,7 +18,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -26,7 +25,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.19.0", "llama-cpp-python>=0.2.87"] +dependencies = ["haystack-ai>=2.22.0", "llama-cpp-python>=0.2.87"] # On macOS GitHub runners, we use a custom index to download pre-built wheels. # Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found") @@ -92,7 +91,6 @@ known-first-party = ["haystack_integrations"] [tool.ruff] -target-version = "py39" line-length = 120 [tool.ruff.lint] @@ -136,10 +134,6 @@ ignore = [ "PLR0913", "PLR0915", ] -unfixable = [ - # Don't touch unused imports - "F401", -] [tool.ruff.lint.flake8-tidy-imports] ban-relative-imports = "parents" diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py index 93ed5ca918..30d827907d 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py @@ -1,7 +1,7 @@ import json from collections.abc import Iterator from datetime import datetime, timezone -from typing import Any, Optional, Union +from typing import Any from haystack import component, default_from_dict, default_to_dict, logging from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message @@ -192,15 +192,15 @@ class LlamaCppChatGenerator: def __init__( self, model: str, - n_ctx: Optional[int] = 0, - n_batch: Optional[int] = 512, - model_kwargs: Optional[dict[str, Any]] = None, - generation_kwargs: Optional[dict[str, Any]] = None, + n_ctx: int | None = 0, + n_batch: int | None = 512, + model_kwargs: dict[str, Any] | None = None, + generation_kwargs: dict[str, Any] | None = None, *, - tools: Optional[ToolsType] = None, - streaming_callback: Optional[StreamingCallbackT] = None, - chat_handler_name: Optional[str] = None, - model_clip_path: Optional[str] = None, + tools: ToolsType | None = None, + streaming_callback: StreamingCallbackT | None = None, + chat_handler_name: str | None = None, + model_clip_path: str | None = None, ): """ :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf". @@ -238,7 +238,7 @@ def __init__( _check_duplicate_tool_names(flatten_tools_or_toolsets(tools)) - handler: Optional[Llava15ChatHandler] = None + handler: Llava15ChatHandler | None = None # Validate multimodal requirements if chat_handler_name is not None: if model_clip_path is None: @@ -256,7 +256,7 @@ def __init__( self.n_batch = n_batch self.model_kwargs = model_kwargs self.generation_kwargs = generation_kwargs - self._model: Optional[Llama] = None + self._model: Llama | None = None self.tools = tools self.streaming_callback = streaming_callback self.chat_handler_name = chat_handler_name @@ -324,10 +324,10 @@ def from_dict(cls, data: dict[str, Any]) -> "LlamaCppChatGenerator": def run( self, messages: list[ChatMessage], - generation_kwargs: Optional[dict[str, Any]] = None, + generation_kwargs: dict[str, Any] | None = None, *, - tools: Optional[ToolsType] = None, - streaming_callback: Optional[StreamingCallbackT] = None, + tools: ToolsType | None = None, + streaming_callback: StreamingCallbackT | None = None, ) -> dict[str, list[ChatMessage]]: """ Run the text generation model on the given list of ChatMessages. @@ -435,8 +435,8 @@ def _handle_streaming_response( if chunk.get("choices") and len(chunk["choices"]) > 0: choice = chunk["choices"][0] - delta: Union[ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty, dict] = ( - choice.get("delta", {}) + delta: ChatCompletionStreamResponseDelta | ChatCompletionStreamResponseDeltaEmpty | dict = choice.get( + "delta", {} ) finish_reason = choice.get("finish_reason") diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py index 0a3fee97c4..0dbd013522 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Union +from typing import Any from haystack import component, logging @@ -28,10 +28,10 @@ class LlamaCppGenerator: def __init__( self, model: str, - n_ctx: Optional[int] = 0, - n_batch: Optional[int] = 512, - model_kwargs: Optional[dict[str, Any]] = None, - generation_kwargs: Optional[dict[str, Any]] = None, + n_ctx: int | None = 0, + n_batch: int | None = 512, + model_kwargs: dict[str, Any] | None = None, + generation_kwargs: dict[str, Any] | None = None, ): """ :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf". @@ -62,7 +62,7 @@ def __init__( self.n_batch = n_batch self.model_kwargs = model_kwargs self.generation_kwargs = generation_kwargs - self.model: Optional[Llama] = None + self.model: Llama | None = None def warm_up(self): if self.model is None: @@ -70,8 +70,8 @@ def warm_up(self): @component.output_types(replies=list[str], meta=list[dict[str, Any]]) def run( - self, prompt: str, generation_kwargs: Optional[dict[str, Any]] = None - ) -> dict[str, Union[list[str], list[dict[str, Any]]]]: + self, prompt: str, generation_kwargs: dict[str, Any] | None = None + ) -> dict[str, list[str] | list[dict[str, Any]]]: """ Run the text generation model on the given prompt.