|
| 1 | +# To run this example, you will need to: |
| 2 | +# 1) Set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_DEFAULT_REGION` environment variables |
| 3 | +# 2) Place a document image named `medical_form.png` in the same directory as this script |
| 4 | +# |
| 5 | +# This example demonstrates natural-language queries using AWS Textract. |
| 6 | +# The QUERIES feature type is enabled automatically when you pass the `queries` |
| 7 | +# parameter at runtime. Textract will attempt to find answers to each question |
| 8 | +# in the document. |
| 9 | + |
| 10 | +from haystack_integrations.components.converters.amazon_textract import AmazonTextractConverter |
| 11 | + |
| 12 | +converter = AmazonTextractConverter() |
| 13 | + |
| 14 | +results = converter.run( |
| 15 | + sources=["medical_form.png"], |
| 16 | + queries=["What is the patient name?", "What is the date of birth?", "What is the diagnosis?"], |
| 17 | +) |
| 18 | + |
| 19 | +for doc in results["documents"]: |
| 20 | + print("--- Extracted text ---") |
| 21 | + print(doc.content) |
| 22 | + print() |
| 23 | + |
| 24 | +raw = results["raw_textract_response"][0] |
| 25 | +query_blocks = [b for b in raw.get("Blocks", []) if b.get("BlockType") == "QUERY"] |
| 26 | +for block in query_blocks: |
| 27 | + question = block.get("Query", {}).get("Text", "") |
| 28 | + print(f"Q: {question}") |
| 29 | + |
| 30 | +query_result_blocks = [b for b in raw.get("Blocks", []) if b.get("BlockType") == "QUERY_RESULT"] |
| 31 | +for block in query_result_blocks: |
| 32 | + answer = block.get("Text", "") |
| 33 | + confidence = block.get("Confidence", 0) |
| 34 | + print(f"A: {answer} (confidence: {confidence:.1f}%)") |
0 commit comments