1414from haystack .components .writers import DocumentWriter
1515from haystack .core .serialization import component_to_dict
1616from haystack .dataclasses .chat_message import ChatMessage , ImageContent
17- from haystack .document_stores .in_memory import InMemoryDocumentStore
1817
1918
2019class TestLLMDocumentContentExtractor :
@@ -403,18 +402,17 @@ def test_run_on_thread_with_none_prompt(self, monkeypatch):
403402 not os .environ .get ("OPENAI_API_KEY" , None ),
404403 reason = "Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test." ,
405404 )
406- def test_live_run (self ):
405+ def test_live_run (self , in_memory_doc_store ):
407406 docs = [Document (content = "" , meta = {"file_path" : "./test/test_files/images/apple.jpg" })]
408- doc_store = InMemoryDocumentStore ()
409407 extractor = LLMDocumentContentExtractor (chat_generator = OpenAIChatGenerator (model = "gpt-4.1-nano" ))
410- writer = DocumentWriter (document_store = doc_store )
408+ writer = DocumentWriter (document_store = in_memory_doc_store )
411409 pipeline = Pipeline ()
412410 pipeline .add_component ("extractor" , extractor )
413411 pipeline .add_component ("doc_writer" , writer )
414412 pipeline .connect ("extractor.documents" , "doc_writer.documents" )
415413 pipeline .run (data = {"documents" : docs })
416414
417- doc_store_docs = doc_store .filter_documents ()
415+ doc_store_docs = in_memory_doc_store .filter_documents ()
418416 assert len (doc_store_docs ) >= 1
419417 assert len (doc_store_docs [0 ].content ) > 0
420418
@@ -423,7 +421,7 @@ def test_live_run(self):
423421 not os .environ .get ("OPENAI_API_KEY" , None ),
424422 reason = "Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test." ,
425423 )
426- def test_live_run_on_image_with_metadata (self ):
424+ def test_live_run_on_image_with_metadata (self , in_memory_doc_store ):
427425 """
428426 Live test using image_metadata.png: single prompt; LLM can return JSON with document_content
429427 and metadata keys (author, date, document_type, topic) in one response.
@@ -469,7 +467,6 @@ def test_live_run_on_image_with_metadata(self):
469467
470468 image_path = "./test/test_files/images/image_metadata.png"
471469 docs = [Document (content = "" , meta = {"file_path" : image_path })]
472- doc_store = InMemoryDocumentStore ()
473470 extractor = LLMDocumentContentExtractor (
474471 prompt = prompt ,
475472 chat_generator = OpenAIChatGenerator (
@@ -494,14 +491,14 @@ def test_live_run_on_image_with_metadata(self):
494491 },
495492 ),
496493 )
497- writer = DocumentWriter (document_store = doc_store )
494+ writer = DocumentWriter (document_store = in_memory_doc_store )
498495 pipeline = Pipeline ()
499496 pipeline .add_component ("extractor" , extractor )
500497 pipeline .add_component ("doc_writer" , writer )
501498 pipeline .connect ("extractor.documents" , "doc_writer.documents" )
502499 pipeline .run (data = {"documents" : docs })
503500
504- doc_store_docs = doc_store .filter_documents ()
501+ doc_store_docs = in_memory_doc_store .filter_documents ()
505502 assert len (doc_store_docs ) >= 1
506503 doc = doc_store_docs [0 ]
507504 assert len (doc .content ) > 0 , "Expected non-empty content (image/document description)"
0 commit comments