forked from microsoft/agent-framework
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path04_invoice_processing.py
More file actions
193 lines (161 loc) · 6.96 KB
/
04_invoice_processing.py
File metadata and controls
193 lines (161 loc) · 6.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Copyright (c) Microsoft. All rights reserved.
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "agent-framework-azure-contentunderstanding",
# "agent-framework-foundry",
# "azure-identity",
# "pydantic",
# ]
# ///
# Run with: uv run packages/azure-contentunderstanding/samples/01-get-started/04_invoice_processing.py
import asyncio
import os
from pathlib import Path
from agent_framework import Agent, AgentSession, Content, Message
from agent_framework.foundry import ContentUnderstandingContextProvider, FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()
"""
Invoice Processing — Structured output with prebuilt-invoice analyzer
This sample demonstrates CU's structured field extraction combined with
LLM structured output (Pydantic model). The prebuilt-invoice analyzer extracts
typed fields (VendorName, InvoiceTotal, DueDate, LineItems, etc.) with
confidence scores. We use output_sections=["fields"] only (no markdown needed)
since we want the LLM to produce a structured JSON response from the extracted
fields, not summarize document text.
Environment variables:
FOUNDRY_PROJECT_ENDPOINT — Azure AI Foundry project endpoint
FOUNDRY_MODEL — Model deployment name (e.g. gpt-4.1)
AZURE_CONTENTUNDERSTANDING_ENDPOINT — CU endpoint URL
"""
SAMPLE_PDF_PATH = Path(__file__).resolve().parents[1] / "shared" / "sample_assets" / "invoice.pdf"
# Structured output model — the LLM will return JSON matching this schema
# Structured output models — the LLM returns JSON matching this schema.
#
# Note: the prebuilt-invoice analyzer extracts an extensive set of fields
# (VendorName, BillingAddress, ShippingAddress, TaxDetails, PONumber, etc.).
# This sample defines a simplified schema to extract only the fields of
# interest to the caller. The LLM maps the full CU field output to this
# subset automatically.
# Learn more about prebuilt analyzers: https://learn.microsoft.com/azure/ai-services/content-understanding/concepts/prebuilt-analyzers
class LineItem(BaseModel):
description: str
quantity: float | None = None
unit_price: float | None = None
amount: float | None = None
class LowConfidenceField(BaseModel):
field_name: str
confidence: float
class InvoiceResult(BaseModel):
vendor_name: str
total_amount: float | None = None
currency: str = "USD"
due_date: str | None = None
line_items: list[LineItem] = Field(default_factory=list)
low_confidence_fields: list[LowConfidenceField] = Field(
default_factory=list,
description="Fields with confidence < 0.8, including their confidence score",
)
async def main() -> None:
# 1. Set up credentials and CU context provider
credential = AzureCliCredential()
# Default analyzer is prebuilt-documentSearch (RAG-optimized).
# Per-file override via additional_properties["analyzer_id"] lets us
# use prebuilt-invoice for structured field extraction on specific files.
#
# Only request "fields" (not "markdown") — we want the extracted typed
# fields for structured output, not the raw document text.
cu = ContentUnderstandingContextProvider(
endpoint=os.environ["AZURE_CONTENTUNDERSTANDING_ENDPOINT"],
credential=credential,
analyzer_id="prebuilt-documentSearch", # default for all files
max_wait=None, # wait until CU analysis finishes
output_sections=["fields"], # fields only — structured output doesn't need markdown
)
# 2. Set up the LLM client
client = FoundryChatClient(
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
model=os.environ["FOUNDRY_MODEL"],
credential=credential,
)
# 3. Create agent and session
async with cu:
agent = Agent(
client=client,
name="InvoiceProcessor",
instructions=(
"You are an invoice processing assistant. Extract invoice data from "
"the provided CU fields (JSON with confidence scores). Return structured "
"output matching the requested schema. Flag fields with confidence < 0.8 "
"in the low_confidence_fields list."
),
context_providers=[cu],
)
session = AgentSession()
# 4. Upload an invoice PDF — uses structured output (Pydantic model)
print("--- Upload Invoice (Structured Output) ---")
pdf_bytes = SAMPLE_PDF_PATH.read_bytes()
response = await agent.run(
Message(
role="user",
contents=[
Content.from_text(
"Process this invoice. Extract the vendor name, total amount, due date, and all line items."
),
Content.from_data(
pdf_bytes,
"application/pdf",
# Per-file analyzer override: use prebuilt-invoice for
# structured field extraction (VendorName, InvoiceTotal, etc.)
# instead of the provider default (prebuilt-documentSearch).
additional_properties={
"filename": SAMPLE_PDF_PATH.name,
"analyzer_id": "prebuilt-invoice",
},
),
],
),
session=session,
options={"response_format": InvoiceResult},
)
# Parse the structured output from JSON text
try:
invoice = InvoiceResult.model_validate_json(response.text)
print(f"Vendor: {invoice.vendor_name}")
print(f"Total: {invoice.currency} {invoice.total_amount}")
print(f"Due date: {invoice.due_date}")
print(f"Line items ({len(invoice.line_items)}):")
for item in invoice.line_items:
print(f" - {item.description}: {item.amount}")
if invoice.low_confidence_fields:
print("⚠ Low confidence fields:")
for f in invoice.low_confidence_fields:
print(f" - {f.field_name}: {f.confidence:.3f}")
except Exception:
print(f"Agent (raw): {response.text}\n")
# 5. Follow-up: free-text question about the invoice
print("\n--- Follow-up (Free Text) ---")
response = await agent.run(
"What is the payment term? Are there any fields with low confidence?",
session=session,
)
print(f"Agent: {response}\n")
if __name__ == "__main__":
asyncio.run(main())
"""
Sample output:
--- Upload Invoice (Structured Output) ---
Vendor: CONTOSO LTD.
Total: USD 110.0
Due date: 2019-12-15
Line items (3):
- Consulting Services: 60.0
- Document Fee: 30.0
- Printing Fee: 10.0
⚠ Low confidence: VendorName, CustomerName
--- Follow-up (Free Text) ---
Agent: The payment terms are not explicitly stated on the invoice...
"""