Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions integrations/llama_cpp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "llama-cpp-haystack"
dynamic = ["version"]
description = 'An integration between the llama.cpp LLM framework and Haystack'
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = "Apache-2.0"
keywords = []
authors = [
Expand All @@ -18,15 +18,14 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.19.0", "llama-cpp-python>=0.2.87"]
dependencies = ["haystack-ai>=2.22.0", "llama-cpp-python>=0.2.87"]

# On macOS GitHub runners, we use a custom index to download pre-built wheels.
# Installing from source might fail due to missing dependencies (CMake fails with "OpenMP not found")
Expand Down Expand Up @@ -92,7 +91,6 @@ known-first-party = ["haystack_integrations"]


[tool.ruff]
target-version = "py39"
line-length = 120

[tool.ruff.lint]
Expand Down Expand Up @@ -136,10 +134,6 @@ ignore = [
"PLR0913",
"PLR0915",
]
unfixable = [
# Don't touch unused imports
"F401",
]

[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "parents"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from collections.abc import Iterator
from datetime import datetime, timezone
from typing import Any, Optional, Union
from typing import Any

from haystack import component, default_from_dict, default_to_dict, logging
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
Expand Down Expand Up @@ -192,15 +192,15 @@ class LlamaCppChatGenerator:
def __init__(
self,
model: str,
n_ctx: Optional[int] = 0,
n_batch: Optional[int] = 512,
model_kwargs: Optional[dict[str, Any]] = None,
generation_kwargs: Optional[dict[str, Any]] = None,
n_ctx: int | None = 0,
n_batch: int | None = 512,
model_kwargs: dict[str, Any] | None = None,
generation_kwargs: dict[str, Any] | None = None,
*,
tools: Optional[ToolsType] = None,
streaming_callback: Optional[StreamingCallbackT] = None,
chat_handler_name: Optional[str] = None,
model_clip_path: Optional[str] = None,
tools: ToolsType | None = None,
streaming_callback: StreamingCallbackT | None = None,
chat_handler_name: str | None = None,
model_clip_path: str | None = None,
):
"""
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
Expand Down Expand Up @@ -238,7 +238,7 @@ def __init__(

_check_duplicate_tool_names(flatten_tools_or_toolsets(tools))

handler: Optional[Llava15ChatHandler] = None
handler: Llava15ChatHandler | None = None
# Validate multimodal requirements
if chat_handler_name is not None:
if model_clip_path is None:
Expand All @@ -256,7 +256,7 @@ def __init__(
self.n_batch = n_batch
self.model_kwargs = model_kwargs
self.generation_kwargs = generation_kwargs
self._model: Optional[Llama] = None
self._model: Llama | None = None
self.tools = tools
self.streaming_callback = streaming_callback
self.chat_handler_name = chat_handler_name
Expand Down Expand Up @@ -324,10 +324,10 @@ def from_dict(cls, data: dict[str, Any]) -> "LlamaCppChatGenerator":
def run(
self,
messages: list[ChatMessage],
generation_kwargs: Optional[dict[str, Any]] = None,
generation_kwargs: dict[str, Any] | None = None,
*,
tools: Optional[ToolsType] = None,
streaming_callback: Optional[StreamingCallbackT] = None,
tools: ToolsType | None = None,
streaming_callback: StreamingCallbackT | None = None,
) -> dict[str, list[ChatMessage]]:
"""
Run the text generation model on the given list of ChatMessages.
Expand Down Expand Up @@ -435,8 +435,8 @@ def _handle_streaming_response(

if chunk.get("choices") and len(chunk["choices"]) > 0:
choice = chunk["choices"][0]
delta: Union[ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty, dict] = (
choice.get("delta", {})
delta: ChatCompletionStreamResponseDelta | ChatCompletionStreamResponseDeltaEmpty | dict = choice.get(
"delta", {}
)

finish_reason = choice.get("finish_reason")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional, Union
from typing import Any

from haystack import component, logging

Expand Down Expand Up @@ -28,10 +28,10 @@ class LlamaCppGenerator:
def __init__(
self,
model: str,
n_ctx: Optional[int] = 0,
n_batch: Optional[int] = 512,
model_kwargs: Optional[dict[str, Any]] = None,
generation_kwargs: Optional[dict[str, Any]] = None,
n_ctx: int | None = 0,
n_batch: int | None = 512,
model_kwargs: dict[str, Any] | None = None,
generation_kwargs: dict[str, Any] | None = None,
):
"""
:param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
Expand Down Expand Up @@ -62,16 +62,16 @@ def __init__(
self.n_batch = n_batch
self.model_kwargs = model_kwargs
self.generation_kwargs = generation_kwargs
self.model: Optional[Llama] = None
self.model: Llama | None = None

def warm_up(self):
if self.model is None:
self.model = Llama(**self.model_kwargs)

@component.output_types(replies=list[str], meta=list[dict[str, Any]])
def run(
self, prompt: str, generation_kwargs: Optional[dict[str, Any]] = None
) -> dict[str, Union[list[str], list[dict[str, Any]]]]:
self, prompt: str, generation_kwargs: dict[str, Any] | None = None
) -> dict[str, list[str] | list[dict[str, Any]]]:
"""
Run the text generation model on the given prompt.

Expand Down