Skip to content

Commit 197032e

Browse files
authored
test: improve del_hf_env_vars fixture (#3428)
1 parent 72a2ec5 commit 197032e

9 files changed

Lines changed: 76 additions & 62 deletions

integrations/huggingface_api/tests/conftest.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import os
56
from pathlib import Path
67

78
import pytest
@@ -13,15 +14,16 @@ def test_files_path():
1314

1415

1516
@pytest.fixture()
16-
def del_hf_env_vars(monkeypatch):
17+
def del_hf_env_vars_if_empty(monkeypatch):
1718
"""
18-
Delete Hugging Face environment variables for tests.
19+
Delete Hugging Face environment variables for tests if empty.
1920
2021
Prevents passing empty tokens to Hugging Face, which would cause API calls to fail.
2122
This is particularly relevant for PRs opened from forks, where secrets are not available
2223
and empty environment variables might be set instead of being removed.
2324
2425
See https://github.com/deepset-ai/haystack/issues/8811 for more details.
2526
"""
26-
monkeypatch.delenv("HF_API_TOKEN", raising=False)
27-
monkeypatch.delenv("HF_TOKEN", raising=False)
27+
for var in ("HF_API_TOKEN", "HF_TOKEN"):
28+
if not os.environ.get(var, "").strip():
29+
monkeypatch.delenv(var, raising=False)

integrations/huggingface_api/tests/test_ranker.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313

1414

1515
class TestHuggingFaceTEIRanker:
16-
def test_init(self, del_hf_env_vars):
16+
def test_init(self, del_hf_env_vars_if_empty):
1717
"""Test initialization with default and custom parameters"""
1818
# Default parameters
1919
ranker = HuggingFaceTEIRanker(url="https://api.my-tei-service.com")
2020
assert ranker.url == "https://api.my-tei-service.com"
2121
assert ranker.top_k == 10
2222
assert ranker.timeout == 30
23-
assert not ranker.token.resolve_value()
23+
assert ranker.token == Secret.from_env_var(["HF_API_TOKEN", "HF_TOKEN"], strict=False)
2424
assert ranker.max_retries == 3
2525
assert ranker.retry_status_codes is None
2626

@@ -41,7 +41,7 @@ def test_init(self, del_hf_env_vars):
4141
assert ranker.max_retries == 5
4242
assert ranker.retry_status_codes == [500, 502, 503]
4343

44-
def test_to_dict(self, del_hf_env_vars):
44+
def test_to_dict(self, del_hf_env_vars_if_empty):
4545
"""Test serialization to dict with Secret token"""
4646
component = HuggingFaceTEIRanker(
4747
url="https://api.my-tei-service.com", top_k=5, timeout=30, max_retries=4, retry_status_codes=[500, 502]
@@ -60,7 +60,7 @@ def test_to_dict(self, del_hf_env_vars):
6060
assert data["init_parameters"]["max_retries"] == 4
6161
assert data["init_parameters"]["retry_status_codes"] == [500, 502]
6262

63-
def test_from_dict(self, del_hf_env_vars):
63+
def test_from_dict(self, del_hf_env_vars_if_empty):
6464
"""Test deserialization from dict with environment variable token"""
6565
data = {
6666
"type": "haystack_integrations.components.rankers.huggingface_api.ranker.HuggingFaceTEIRanker",
@@ -82,14 +82,14 @@ def test_from_dict(self, del_hf_env_vars):
8282
assert component.max_retries == 4
8383
assert component.retry_status_codes == [500, 502]
8484

85-
def test_empty_documents(self, del_hf_env_vars):
85+
def test_empty_documents(self, del_hf_env_vars_if_empty):
8686
"""Test that empty documents list returns empty result"""
8787
ranker = HuggingFaceTEIRanker(url="https://api.my-tei-service.com")
8888
result = ranker.run(query="test query", documents=[])
8989
assert result == {"documents": []}
9090

9191
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.request_with_retry")
92-
def test_run_with_mock(self, mock_request, del_hf_env_vars):
92+
def test_run_with_mock(self, mock_request, del_hf_env_vars_if_empty):
9393
"""Test run method with mocked API response"""
9494
# Setup mock response
9595
mock_response = MagicMock(spec=httpx.Response)
@@ -137,7 +137,7 @@ def test_run_with_mock(self, mock_request, del_hf_env_vars):
137137
assert result["documents"][2].score == 0.75
138138

139139
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.request_with_retry")
140-
def test_run_with_truncation_direction(self, mock_request, del_hf_env_vars):
140+
def test_run_with_truncation_direction(self, mock_request, del_hf_env_vars_if_empty):
141141
"""Test run method with truncation direction parameter"""
142142
# Setup mock response
143143
mock_response = MagicMock(spec=httpx.Response)
@@ -170,7 +170,7 @@ def test_run_with_truncation_direction(self, mock_request, del_hf_env_vars):
170170
)
171171

172172
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.request_with_retry")
173-
def test_run_with_custom_top_k(self, mock_request, del_hf_env_vars):
173+
def test_run_with_custom_top_k(self, mock_request, del_hf_env_vars_if_empty):
174174
"""Test run method with custom top_k parameter"""
175175
# Setup mock response with 5 documents
176176
mock_response = MagicMock(spec=httpx.Response)
@@ -207,7 +207,7 @@ def test_run_with_custom_top_k(self, mock_request, del_hf_env_vars):
207207
assert result["documents"][1].content == "Document 3"
208208

209209
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.request_with_retry")
210-
def test_run_deduplicates_documents(self, mock_request, del_hf_env_vars):
210+
def test_run_deduplicates_documents(self, mock_request, del_hf_env_vars_if_empty):
211211
"""Test that duplicate documents are removed before sending to the API."""
212212
mock_response = MagicMock(spec=httpx.Response)
213213
mock_response.json.return_value = [{"index": 1, "score": 0.9}, {"index": 0, "score": 0.2}]
@@ -228,7 +228,7 @@ def test_run_deduplicates_documents(self, mock_request, del_hf_env_vars):
228228
url="https://api.my-tei-service.com/rerank",
229229
json={"query": "test query", "texts": ["keep me", "unique"], "raw_scores": False},
230230
timeout=30,
231-
headers={},
231+
headers={"Authorization": f"Bearer {ranker.token.resolve_value()}"} if ranker.token.resolve_value() else {},
232232
attempts=3,
233233
status_codes_to_retry=None,
234234
)
@@ -237,7 +237,7 @@ def test_run_deduplicates_documents(self, mock_request, del_hf_env_vars):
237237
assert result["documents"][1].content == "keep me"
238238

239239
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.request_with_retry")
240-
def test_error_handling(self, mock_request, del_hf_env_vars):
240+
def test_error_handling(self, mock_request, del_hf_env_vars_if_empty):
241241
"""Test error handling in the ranker"""
242242
# Setup mock response with error
243243
mock_response = MagicMock(spec=httpx.Response)
@@ -261,7 +261,7 @@ def test_error_handling(self, mock_request, del_hf_env_vars):
261261

262262
@pytest.mark.asyncio
263263
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.async_request_with_retry")
264-
async def test_run_async_with_mock(self, mock_request, del_hf_env_vars):
264+
async def test_run_async_with_mock(self, mock_request, del_hf_env_vars_if_empty):
265265
"""Test run_async method with mocked API response"""
266266
# Setup mock response
267267
mock_response = MagicMock(spec=httpx.Response)
@@ -310,7 +310,7 @@ async def test_run_async_with_mock(self, mock_request, del_hf_env_vars):
310310

311311
@pytest.mark.asyncio
312312
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.async_request_with_retry")
313-
async def test_run_async_deduplicates_documents(self, mock_request, del_hf_env_vars):
313+
async def test_run_async_deduplicates_documents(self, mock_request, del_hf_env_vars_if_empty):
314314
"""Test that duplicate documents are removed before sending to the API."""
315315
mock_response = MagicMock(spec=httpx.Response)
316316
mock_response.json.return_value = [{"index": 1, "score": 0.9}, {"index": 0, "score": 0.2}]
@@ -331,7 +331,7 @@ async def test_run_async_deduplicates_documents(self, mock_request, del_hf_env_v
331331
url="https://api.my-tei-service.com/rerank",
332332
json={"query": "test query", "texts": ["keep me", "unique"], "raw_scores": False},
333333
timeout=30,
334-
headers={},
334+
headers={"Authorization": f"Bearer {ranker.token.resolve_value()}"} if ranker.token.resolve_value() else {},
335335
attempts=3,
336336
status_codes_to_retry=None,
337337
)
@@ -341,7 +341,7 @@ async def test_run_async_deduplicates_documents(self, mock_request, del_hf_env_v
341341

342342
@pytest.mark.asyncio
343343
@patch("haystack_integrations.components.rankers.huggingface_api.ranker.async_request_with_retry")
344-
async def test_run_async_empty_documents(self, mock_request, del_hf_env_vars):
344+
async def test_run_async_empty_documents(self, mock_request, del_hf_env_vars_if_empty):
345345
"""Test run_async with empty documents list"""
346346
ranker = HuggingFaceTEIRanker(url="https://api.my-tei-service.com")
347347
result = await ranker.run_async(query="test query", documents=[])

integrations/transformers/tests/conftest.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,26 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import os
6+
57
import pytest
68
from haystack.document_stores.in_memory import InMemoryDocumentStore
79

810

911
@pytest.fixture()
10-
def del_hf_env_vars(monkeypatch):
12+
def del_hf_env_vars_if_empty(monkeypatch):
1113
"""
12-
Delete Hugging Face environment variables for tests.
14+
Delete Hugging Face environment variables for tests if empty.
1315
1416
Prevents passing empty tokens to Hugging Face, which would cause API calls to fail.
1517
This is particularly relevant for PRs opened from forks, where secrets are not available
1618
and empty environment variables might be set instead of being removed.
1719
1820
See https://github.com/deepset-ai/haystack/issues/8811 for more details.
1921
"""
20-
monkeypatch.delenv("HF_API_TOKEN", raising=False)
21-
monkeypatch.delenv("HF_TOKEN", raising=False)
22+
for var in ("HF_API_TOKEN", "HF_TOKEN"):
23+
if not os.environ.get(var, "").strip():
24+
monkeypatch.delenv(var, raising=False)
2225

2326

2427
@pytest.fixture()

integrations/transformers/tests/test_chat_generator.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_from_dict(self, model_info_mock, tools):
256256
}
257257

258258
@patch("haystack_integrations.components.generators.transformers.chat.chat_generator.pipeline")
259-
def test_warm_up(self, pipeline_mock, del_hf_env_vars):
259+
def test_warm_up(self, pipeline_mock, del_hf_env_vars_if_empty):
260260
generator = TransformersChatGenerator(
261261
model="mistralai/Mistral-7B-Instruct-v0.2", task="text-generation", device=ComponentDevice.from_str("cpu")
262262
)
@@ -266,11 +266,14 @@ def test_warm_up(self, pipeline_mock, del_hf_env_vars):
266266
generator.warm_up()
267267

268268
pipeline_mock.assert_called_once_with(
269-
model="mistralai/Mistral-7B-Instruct-v0.2", task="text-generation", token=None, device="cpu"
269+
model="mistralai/Mistral-7B-Instruct-v0.2",
270+
task="text-generation",
271+
token=generator.token.resolve_value(),
272+
device="cpu",
270273
)
271274

272275
@patch("haystack_integrations.components.generators.transformers.chat.chat_generator.pipeline")
273-
def test_warm_up_with_tools(self, pipeline_mock, del_hf_env_vars):
276+
def test_warm_up_with_tools(self, pipeline_mock, del_hf_env_vars_if_empty):
274277
"""Test that warm_up() calls warm_up on tools and is idempotent."""
275278

276279
# Create a mock tool that tracks if warm_up() was called
@@ -324,7 +327,7 @@ def warm_up(self):
324327
pipeline_mock.assert_called_once()
325328

326329
@patch("haystack_integrations.components.generators.transformers.chat.chat_generator.pipeline")
327-
def test_warm_up_with_no_tools(self, pipeline_mock, del_hf_env_vars):
330+
def test_warm_up_with_no_tools(self, pipeline_mock, del_hf_env_vars_if_empty):
328331
"""Test that warm_up() works when no tools are provided."""
329332

330333
generator = TransformersChatGenerator(
@@ -349,7 +352,7 @@ def test_warm_up_with_no_tools(self, pipeline_mock, del_hf_env_vars):
349352
pipeline_mock.assert_called_once()
350353

351354
@patch("haystack_integrations.components.generators.transformers.chat.chat_generator.pipeline")
352-
def test_warm_up_with_multiple_tools(self, pipeline_mock, del_hf_env_vars):
355+
def test_warm_up_with_multiple_tools(self, pipeline_mock, del_hf_env_vars_if_empty):
353356
"""Test that warm_up() works with multiple tools."""
354357

355358
# Track warm_up calls
@@ -507,7 +510,7 @@ def test_messages_conversion_is_called(self, mock_convert, model_info_mock):
507510

508511
@pytest.mark.integration
509512
@pytest.mark.flaky(reruns=3, reruns_delay=10)
510-
def test_live_run(self, del_hf_env_vars):
513+
def test_live_run(self, del_hf_env_vars_if_empty):
511514
"""Test live run with default behavior (no thinking)."""
512515
messages = [ChatMessage.from_user("Please create a summary about the following topic: Climate change")]
513516

@@ -521,7 +524,7 @@ def test_live_run(self, del_hf_env_vars):
521524

522525
@pytest.mark.integration
523526
@pytest.mark.flaky(reruns=3, reruns_delay=10)
524-
def test_live_run_thinking(self, del_hf_env_vars):
527+
def test_live_run_thinking(self, del_hf_env_vars_if_empty):
525528
"""Test live run with enable_thinking=True."""
526529
messages = [ChatMessage.from_user("What is 2+2?")]
527530

@@ -865,7 +868,7 @@ def mock_pipeline_call(*args, **kwargs):
865868
@pytest.mark.integration
866869
@pytest.mark.flaky(reruns=3, reruns_delay=10)
867870
@pytest.mark.asyncio
868-
async def test_live_run_async_with_streaming(self, del_hf_env_vars):
871+
async def test_live_run_async_with_streaming(self, del_hf_env_vars_if_empty):
869872
"""Test async streaming with a live model."""
870873
streaming_chunks = []
871874

integrations/transformers/tests/test_extractive_reader.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ def __init__(self):
640640
"haystack_integrations.components.readers.transformers.extractive_reader."
641641
"AutoModelForQuestionAnswering.from_pretrained"
642642
)
643-
def test_device_map_auto(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars):
643+
def test_device_map_auto(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars_if_empty):
644644
reader = TransformersExtractiveReader("deepset/roberta-base-squad2", model_kwargs={"device_map": "auto"})
645645
auto_device = ComponentDevice.resolve_device(None)
646646

@@ -651,7 +651,9 @@ def __init__(self):
651651
mocked_automodel.return_value = MockedModel()
652652
reader.warm_up()
653653

654-
mocked_automodel.assert_called_once_with("deepset/roberta-base-squad2", token=None, device_map="auto")
654+
mocked_automodel.assert_called_once_with(
655+
"deepset/roberta-base-squad2", token=reader.token.resolve_value(), device_map="auto"
656+
)
655657
assert reader.device == ComponentDevice.from_multiple(DeviceMap.from_hf({"": auto_device.to_hf()}))
656658

657659

@@ -660,7 +662,7 @@ def __init__(self):
660662
"haystack_integrations.components.readers.transformers.extractive_reader."
661663
"AutoModelForQuestionAnswering.from_pretrained"
662664
)
663-
def test_device_map_str(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars):
665+
def test_device_map_str(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars_if_empty):
664666
reader = TransformersExtractiveReader("deepset/roberta-base-squad2", model_kwargs={"device_map": "cpu:0"})
665667

666668
class MockedModel:
@@ -670,7 +672,9 @@ def __init__(self):
670672
mocked_automodel.return_value = MockedModel()
671673
reader.warm_up()
672674

673-
mocked_automodel.assert_called_once_with("deepset/roberta-base-squad2", token=None, device_map="cpu:0")
675+
mocked_automodel.assert_called_once_with(
676+
"deepset/roberta-base-squad2", token=reader.token.resolve_value(), device_map="cpu:0"
677+
)
674678
assert reader.device == ComponentDevice.from_multiple(DeviceMap.from_hf({"": "cpu:0"}))
675679

676680

@@ -679,7 +683,7 @@ def __init__(self):
679683
"haystack_integrations.components.readers.transformers.extractive_reader."
680684
"AutoModelForQuestionAnswering.from_pretrained"
681685
)
682-
def test_device_map_dict(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars):
686+
def test_device_map_dict(mocked_automodel, _mocked_autotokenizer, del_hf_env_vars_if_empty):
683687
reader = TransformersExtractiveReader(
684688
"deepset/roberta-base-squad2", model_kwargs={"device_map": {"layer_1": 1, "classifier": "cpu"}}
685689
)
@@ -692,7 +696,9 @@ def __init__(self):
692696
reader.warm_up()
693697

694698
mocked_automodel.assert_called_once_with(
695-
"deepset/roberta-base-squad2", token=None, device_map={"layer_1": 1, "classifier": "cpu"}
699+
"deepset/roberta-base-squad2",
700+
token=reader.token.resolve_value(),
701+
device_map={"layer_1": 1, "classifier": "cpu"},
696702
)
697703
assert reader.device == ComponentDevice.from_multiple(DeviceMap.from_hf({"layer_1": 1, "classifier": "cpu"}))
698704

@@ -907,7 +913,7 @@ def test_deduplicate_by_overlap(
907913

908914

909915
@pytest.mark.integration
910-
def test_t5(del_hf_env_vars):
916+
def test_t5(del_hf_env_vars_if_empty):
911917
reader = TransformersExtractiveReader("sjrhuschlee/flan-t5-base-squad2")
912918
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
913919
"answers"
@@ -930,7 +936,7 @@ def test_t5(del_hf_env_vars):
930936

931937

932938
@pytest.mark.integration
933-
def test_roberta(del_hf_env_vars):
939+
def test_roberta(del_hf_env_vars_if_empty):
934940
reader = TransformersExtractiveReader("deepset/tinyroberta-squad2")
935941
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
936942
"answers"

integrations/transformers/tests/test_named_entity_extractor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def test_named_entity_extractor_serde():
101101
_ = TransformersNamedEntityExtractor.from_dict(serde_data)
102102

103103

104-
def test_to_dict_default(del_hf_env_vars):
104+
def test_to_dict_default(del_hf_env_vars_if_empty):
105105
component = TransformersNamedEntityExtractor(
106106
model="dslim/bert-base-NER",
107107
device=ComponentDevice.from_str("mps"),
@@ -144,7 +144,7 @@ def test_to_dict_with_parameters():
144144
}
145145

146146

147-
def test_named_entity_extractor_from_dict_no_default_parameters(del_hf_env_vars):
147+
def test_named_entity_extractor_from_dict_no_default_parameters(del_hf_env_vars_if_empty):
148148
data = {
149149
"type": COMPONENT_TYPE,
150150
"init_parameters": {"model": "dslim/bert-base-NER"},
@@ -226,15 +226,15 @@ def test_named_entity_extractor_run_fails_with_wrong_number_of_annotations():
226226

227227

228228
@pytest.mark.integration
229-
def test_ner_extractor_init(del_hf_env_vars):
229+
def test_ner_extractor_init(del_hf_env_vars_if_empty):
230230
extractor = TransformersNamedEntityExtractor(model="dslim/bert-base-NER")
231231
extractor.warm_up()
232232
assert extractor.initialized
233233

234234

235235
@pytest.mark.integration
236236
@pytest.mark.parametrize("batch_size", [1, 3])
237-
def test_ner_extractor(raw_texts, hf_annotations, batch_size, del_hf_env_vars):
237+
def test_ner_extractor(raw_texts, hf_annotations, batch_size, del_hf_env_vars_if_empty):
238238
extractor = TransformersNamedEntityExtractor(model="dslim/bert-base-NER")
239239
extractor.warm_up()
240240

@@ -256,7 +256,7 @@ def test_ner_extractor_private_models(raw_texts, hf_annotations, batch_size):
256256

257257
@pytest.mark.integration
258258
@pytest.mark.parametrize("batch_size", [1, 3])
259-
def test_ner_extractor_in_pipeline(raw_texts, hf_annotations, batch_size, del_hf_env_vars):
259+
def test_ner_extractor_in_pipeline(raw_texts, hf_annotations, batch_size, del_hf_env_vars_if_empty):
260260
pipeline = Pipeline()
261261
pipeline.add_component(
262262
name="ner_extractor",

0 commit comments

Comments
 (0)