deepset-ai
diff --git a/‎.github/utils/check_imports.py‎
Lines changed: 2 additions & 2 deletions b/‎.github/utils/check_imports.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/utils/delete_outdated_docs.py‎
Lines changed: 1 addition & 2 deletions b/‎.github/utils/delete_outdated_docs.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎e2e/pipelines/test_dense_doc_search.py‎
Lines changed: 4 additions & 1 deletion b/‎e2e/pipelines/test_dense_doc_search.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎e2e/pipelines/test_evaluation_pipeline.py‎
Lines changed: 1 addition & 2 deletions b/‎e2e/pipelines/test_evaluation_pipeline.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎e2e/pipelines/test_extractive_qa_pipeline.py‎
Lines changed: 4 additions & 1 deletion b/‎e2e/pipelines/test_extractive_qa_pipeline.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎e2e/pipelines/test_hybrid_doc_search_pipeline.py‎
Lines changed: 4 additions & 1 deletion b/‎e2e/pipelines/test_hybrid_doc_search_pipeline.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎e2e/pipelines/test_named_entity_extractor.py‎
Lines changed: 12 additions & 3 deletions b/‎e2e/pipelines/test_named_entity_extractor.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎e2e/pipelines/test_pdf_content_extraction_pipeline.py‎
Lines changed: 8 additions & 0 deletions b/‎e2e/pipelines/test_pdf_content_extraction_pipeline.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎e2e/pipelines/test_preprocessing_pipeline.py‎
Lines changed: 4 additions & 1 deletion b/‎e2e/pipelines/test_preprocessing_pipeline.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎haystack/components/agents/agent.py‎
Lines changed: 15 additions & 15 deletions b/‎haystack/components/agents/agent.py‎
Lines changed: 15 additions & 15 deletions
@@ -3,12 +3,12 @@
 import sys
 import traceback
 from pathlib import Path
-from typing import List, Optional
+from typing import Optional
 
 from haystack import logging  # pylint: disable=unused-import  # this is needed to avoid circular imports
 
 
-def validate_module_imports(root_dir: str, exclude_subdirs: Optional[List[str]] = None) -> tuple[list, list]:
+def validate_module_imports(root_dir: str, exclude_subdirs: Optional[list[str]] = None) -> tuple[list, list]:
     """
     Recursively search for all Python modules and attempt to import them.
 
 
@@ -3,7 +3,6 @@
 import os
 import re
 from pathlib import Path
-from typing import List
 
 import requests
 import yaml
@@ -30,7 +29,7 @@ def create_headers(version: str):
     return {"authorization": f"Basic {readme_token()}", "x-readme-version": version}
 
 
-def get_docs_in_category(category_slug: str, version: str) -> List[str]:
+def get_docs_in_category(category_slug: str, version: str) -> list[str]:
     """
     Returns the slugs of all documents in a category for the specific version.
     """
 
@@ -15,7 +15,10 @@
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 
 
-def test_dense_doc_search_pipeline(tmp_path, samples_path):
+def test_dense_doc_search_pipeline(tmp_path, samples_path, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     # Create the indexing pipeline
     indexing_pipeline = Pipeline()
     indexing_pipeline.add_component(
 
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-from typing import List
 
 import pytest
 
@@ -30,7 +29,7 @@
 EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 
 
-def indexing_pipeline(documents: List[Document]):
+def indexing_pipeline(documents: list[Document]):
     """Indexing the documents"""
     document_store = InMemoryDocumentStore()
     doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)
 
@@ -8,7 +8,10 @@
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 
 
-def test_extractive_qa_pipeline(tmp_path):
+def test_extractive_qa_pipeline(tmp_path, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     # Create the pipeline
     qa_pipeline = Pipeline()
     qa_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
 
@@ -11,7 +11,10 @@
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 
 
-def test_hybrid_doc_search_pipeline(tmp_path):
+def test_hybrid_doc_search_pipeline(tmp_path, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     # Create the pipeline
     document_store = InMemoryDocumentStore()
     hybrid_pipeline = Pipeline()
 
@@ -47,7 +47,10 @@ def spacy_annotations():
     ]
 
 
-def test_ner_extractor_init():
+def test_ner_extractor_init(monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     extractor = NamedEntityExtractor(backend=NamedEntityExtractorBackend.HUGGING_FACE, model="dslim/bert-base-NER")
 
     with pytest.raises(RuntimeError, match=r"not warmed up"):
@@ -59,7 +62,10 @@ def test_ner_extractor_init():
 
 
 @pytest.mark.parametrize("batch_size", [1, 3])
-def test_ner_extractor_hf_backend(raw_texts, hf_annotations, batch_size):
+def test_ner_extractor_hf_backend(raw_texts, hf_annotations, batch_size, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     extractor = NamedEntityExtractor(backend=NamedEntityExtractorBackend.HUGGING_FACE, model="dslim/bert-base-NER")
     extractor.warm_up()
 
@@ -87,7 +93,10 @@ def test_ner_extractor_spacy_backend(raw_texts, spacy_annotations, batch_size):
 
 
 @pytest.mark.parametrize("batch_size", [1, 3])
-def test_ner_extractor_in_pipeline(raw_texts, hf_annotations, batch_size):
+def test_ner_extractor_in_pipeline(raw_texts, hf_annotations, batch_size, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     pipeline = Pipeline()
     pipeline.add_component(
         name="ner_extractor",
 
@@ -2,6 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import os
+
+import pytest
+
 from haystack import Pipeline
 from haystack.components.converters.pypdf import PyPDFToDocument
 from haystack.components.joiners import DocumentJoiner
@@ -13,6 +17,10 @@
 from haystack.components.routers.document_length_router import DocumentLengthRouter
 
 
+@pytest.mark.skipif(
+    not os.environ.get("OPENAI_API_KEY", None),
+    reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
+)
 def test_pdf_content_extraction_pipeline():
     """
     Test a pipeline that processes PDFs with the following steps:
 
@@ -12,7 +12,10 @@
 from haystack.document_stores.in_memory import InMemoryDocumentStore
 
 
-def test_preprocessing_pipeline(tmp_path):
+def test_preprocessing_pipeline(tmp_path, monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+    monkeypatch.delenv("HF_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
+
     # Create the pipeline and its components
     document_store = InMemoryDocumentStore()
     preprocessing_pipeline = Pipeline()
 
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import inspect
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 
 from haystack import logging, tracing
 from haystack.components.generators.chat.types import ChatGenerator
@@ -71,14 +71,14 @@ def __init__(
         self,
         *,
         chat_generator: ChatGenerator,
-        tools: Optional[Union[List[Tool], Toolset]] = None,
+        tools: Optional[Union[list[Tool], Toolset]] = None,
         system_prompt: Optional[str] = None,
-        exit_conditions: Optional[List[str]] = None,
-        state_schema: Optional[Dict[str, Any]] = None,
+        exit_conditions: Optional[list[str]] = None,
+        state_schema: Optional[dict[str, Any]] = None,
         max_agent_steps: int = 100,
         streaming_callback: Optional[StreamingCallbackT] = None,
         raise_on_tool_invocation_failure: bool = False,
-        tool_invoker_kwargs: Optional[Dict[str, Any]] = None,
+        tool_invoker_kwargs: Optional[dict[str, Any]] = None,
     ) -> None:
         """
         Initialize the agent component.
@@ -126,7 +126,7 @@ def __init__(
         # Initialize state schema
         resolved_state_schema = _deepcopy_with_exceptions(self._state_schema)
         if resolved_state_schema.get("messages") is None:
-            resolved_state_schema["messages"] = {"type": List[ChatMessage], "handler": merge_lists}
+            resolved_state_schema["messages"] = {"type": list[ChatMessage], "handler": merge_lists}
         self.state_schema = resolved_state_schema
 
         self.chat_generator = chat_generator
@@ -172,7 +172,7 @@ def warm_up(self) -> None:
                 self.chat_generator.warm_up()
             self._is_warmed_up = True
 
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serialize the component to a dictionary.
 
@@ -198,7 +198,7 @@ def to_dict(self) -> Dict[str, Any]:
         )
 
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "Agent":
+    def from_dict(cls, data: dict[str, Any]) -> "Agent":
         """
         Deserialize the agent from a dictionary.
 
@@ -219,9 +219,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "Agent":
 
         return default_from_dict(cls, data)
 
-    def _prepare_generator_inputs(self, streaming_callback: Optional[StreamingCallbackT] = None) -> Dict[str, Any]:
+    def _prepare_generator_inputs(self, streaming_callback: Optional[StreamingCallbackT] = None) -> dict[str, Any]:
         """Prepare inputs for the chat generator."""
-        generator_inputs: Dict[str, Any] = {"tools": self.tools}
+        generator_inputs: dict[str, Any] = {"tools": self.tools}
         if streaming_callback is not None:
             generator_inputs["streaming_callback"] = streaming_callback
         return generator_inputs
@@ -240,13 +240,13 @@ def _create_agent_span(self) -> Any:
 
     def run(  # noqa: PLR0915
         self,
-        messages: List[ChatMessage],
+        messages: list[ChatMessage],
         streaming_callback: Optional[StreamingCallbackT] = None,
         *,
         break_point: Optional[AgentBreakpoint] = None,
         snapshot: Optional[AgentSnapshot] = None,
         **kwargs: Any,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Process messages and execute tools until an exit condition is met.
 
@@ -431,13 +431,13 @@ def run(  # noqa: PLR0915
 
     async def run_async(  # noqa: PLR0915
         self,
-        messages: List[ChatMessage],
+        messages: list[ChatMessage],
         streaming_callback: Optional[StreamingCallbackT] = None,
         *,
         break_point: Optional[AgentBreakpoint] = None,
         snapshot: Optional[AgentSnapshot] = None,
         **kwargs: Any,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Asynchronously process messages and execute tools until the exit condition is met.
 
@@ -626,7 +626,7 @@ async def run_async(  # noqa: PLR0915
             result.update({"last_message": all_messages[-1]})
         return result
 
-    def _check_exit_conditions(self, llm_messages: List[ChatMessage], tool_messages: List[ChatMessage]) -> bool:
+    def _check_exit_conditions(self, llm_messages: list[ChatMessage], tool_messages: list[ChatMessage]) -> bool:
         """
         Check if any of the LLM messages' tool calls match an exit condition and if there are no errors.