Skip to content

Commit fb97fc6

Browse files
refactor: promote _is_reasoning_model and _resolve_model_name to public API
Remove underscore prefix from helper functions in agent_builder.py to make them part of the supported public API, eliminating fragile cross-module dependency on private internals from map_handler.py. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 83e189a commit fb97fc6

5 files changed

Lines changed: 751 additions & 8 deletions

File tree

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Standalone deployment for LLM Token Usage Workbook
2+
// Connects to an existing Application Insights instance from any content processing RG
3+
4+
targetScope = 'resourceGroup'
5+
6+
@description('Full resource ID of the Application Insights instance to query.')
7+
param appInsightsResourceId string
8+
9+
@description('Azure region for the workbook resource.')
10+
param location string = resourceGroup().location
11+
12+
var workbookId = guid(resourceGroup().id, 'token-usage-workbook')
13+
var workbookTemplate = loadTextContent('token-usage-workbook.json')
14+
var workbookContent = replace(workbookTemplate, '__APP_INSIGHTS_RESOURCE_ID__', appInsightsResourceId)
15+
16+
resource workbook 'Microsoft.Insights/workbooks@2022-04-01' = {
17+
name: workbookId
18+
location: location
19+
kind: 'shared'
20+
properties: {
21+
displayName: 'LLM Token Usage Dashboard'
22+
category: 'workbook'
23+
sourceId: appInsightsResourceId
24+
serializedData: workbookContent
25+
}
26+
}
27+
28+
output workbookName string = workbook.name
29+
output workbookId string = workbook.id
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
// ============================================================
2+
// KQL Queries for LLM Token Usage Monitoring
3+
// Content Processing Solution Accelerator
4+
// Run these in Application Insights > Logs
5+
//
6+
// DEDUPLICATION: Each event includes a deterministic event_id
7+
// (SHA-256 of event_name|process_id|agent_name|model). Queries
8+
// use arg_max() by event_id to naturally eliminate duplicates
9+
// without accelerator-specific logic.
10+
// ============================================================
11+
12+
// 1. Overall token usage summary
13+
// Dedup: deduplicate by event_id, then aggregate
14+
customEvents
15+
| where name == 'LLM_Agent_Token_Usage'
16+
| where timestamp > ago(7d)
17+
| extend event_id = tostring(customDimensions['event_id'])
18+
| extend process_id = tostring(customDimensions['process_id'])
19+
| extend input_tokens = toint(customDimensions['input_tokens'])
20+
| extend output_tokens = toint(customDimensions['output_tokens'])
21+
| extend total_tokens = toint(customDimensions['total_tokens'])
22+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id
23+
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id
24+
| summarize
25+
TotalDocuments = count(),
26+
TotalInputTokens = sum(input_tokens),
27+
TotalOutputTokens = sum(output_tokens),
28+
TotalTokens = sum(total_tokens),
29+
AvgTokensPerDocument = round(avg(total_tokens), 0)
30+
31+
// 2. Token usage by pipeline step (agent)
32+
// Dedup: deduplicate by event_id, then sum by agent
33+
customEvents
34+
| where name == 'LLM_Agent_Token_Usage'
35+
| where timestamp > ago(7d)
36+
| extend event_id = tostring(customDimensions['event_id'])
37+
| extend agent = tostring(customDimensions['agent_name'])
38+
| extend input_tokens = toint(customDimensions['input_tokens'])
39+
| extend output_tokens = toint(customDimensions['output_tokens'])
40+
| extend total_tokens = toint(customDimensions['total_tokens'])
41+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), agent=take_any(agent) by event_id
42+
| summarize
43+
InputTokens = sum(input_tokens),
44+
OutputTokens = sum(output_tokens),
45+
TotalTokens = sum(total_tokens),
46+
Invocations = count()
47+
by Step = agent
48+
| project Step, InputTokens, OutputTokens, TotalTokens, Invocations
49+
| order by TotalTokens desc
50+
51+
// 3. Token usage over time (hourly)
52+
customEvents
53+
| where name == 'LLM_Agent_Token_Usage'
54+
| where timestamp > ago(7d)
55+
| extend event_id = tostring(customDimensions['event_id'])
56+
| extend process_id = tostring(customDimensions['process_id'])
57+
| extend input_tokens = toint(customDimensions['input_tokens'])
58+
| extend output_tokens = toint(customDimensions['output_tokens'])
59+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), timestamp=min(timestamp), process_id=take_any(process_id) by event_id
60+
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id
61+
| summarize InputTokens = sum(input_tokens), OutputTokens = sum(output_tokens) by bin(timestamp, 1h)
62+
| order by timestamp asc
63+
| render areachart
64+
65+
// 4. Token distribution by agent (pie chart)
66+
customEvents
67+
| where name == 'LLM_Agent_Token_Usage'
68+
| where timestamp > ago(7d)
69+
| extend event_id = tostring(customDimensions['event_id'])
70+
| extend agent = tostring(customDimensions['agent_name'])
71+
| extend total_tokens = toint(customDimensions['total_tokens'])
72+
| summarize total_tokens=take_any(total_tokens), agent=take_any(agent) by event_id
73+
| summarize TotalTokens = sum(total_tokens) by agent
74+
| render piechart
75+
76+
// 5. Estimated daily cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
77+
let input_price_per_million = 2.50;
78+
let output_price_per_million = 10.00;
79+
customEvents
80+
| where name == 'LLM_Agent_Token_Usage'
81+
| where timestamp > ago(30d)
82+
| extend event_id = tostring(customDimensions['event_id'])
83+
| extend process_id = tostring(customDimensions['process_id'])
84+
| extend input_tokens = toint(customDimensions['input_tokens'])
85+
| extend output_tokens = toint(customDimensions['output_tokens'])
86+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), timestamp=min(timestamp), process_id=take_any(process_id) by event_id
87+
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id
88+
| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by bin(timestamp, 1d)
89+
| extend InputCost = round(TotalInput * input_price_per_million / 1000000.0, 4)
90+
| extend OutputCost = round(TotalOutput * output_price_per_million / 1000000.0, 4)
91+
| extend TotalCost = InputCost + OutputCost
92+
| project Day = timestamp, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
93+
| order by Day desc
94+
95+
// 6. Estimated cost by model (adjust pricing per model)
96+
let gpt4o_input = 2.50;
97+
let gpt4o_output = 10.00;
98+
let gpt4o_mini_input = 0.15;
99+
let gpt4o_mini_output = 0.60;
100+
customEvents
101+
| where name == 'LLM_Model_Token_Usage'
102+
| where timestamp > ago(30d)
103+
| extend event_id = tostring(customDimensions['event_id'])
104+
| extend model = tostring(customDimensions['model_deployment_name'])
105+
| extend input_tokens = toint(customDimensions['input_tokens'])
106+
| extend output_tokens = toint(customDimensions['output_tokens'])
107+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), model=take_any(model) by event_id
108+
| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by model
109+
| extend InputPrice = case(
110+
model has "mini", gpt4o_mini_input,
111+
gpt4o_input)
112+
| extend OutputPrice = case(
113+
model has "mini", gpt4o_mini_output,
114+
gpt4o_output)
115+
| extend InputCost = round(TotalInput * InputPrice / 1000000.0, 4)
116+
| extend OutputCost = round(TotalOutput * OutputPrice / 1000000.0, 4)
117+
| extend TotalCost = InputCost + OutputCost
118+
| project Model = model, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
119+
| order by TotalCost desc
120+
121+
// 7. Token usage by model deployment
122+
customEvents
123+
| where name == 'LLM_Model_Token_Usage'
124+
| where timestamp > ago(7d)
125+
| extend event_id = tostring(customDimensions['event_id'])
126+
| extend model = tostring(customDimensions['model_deployment_name'])
127+
| extend process_id = tostring(customDimensions['process_id'])
128+
| extend input_tokens = toint(customDimensions['input_tokens'])
129+
| extend output_tokens = toint(customDimensions['output_tokens'])
130+
| extend total_tokens = toint(customDimensions['total_tokens'])
131+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), model=take_any(model), process_id=take_any(process_id) by event_id
132+
| summarize
133+
InputTokens = sum(input_tokens),
134+
OutputTokens = sum(output_tokens),
135+
TotalTokens = sum(total_tokens),
136+
Invocations = dcount(process_id)
137+
by Model = model
138+
| order by TotalTokens desc
139+
140+
// 8. Step-to-model mapping with token usage
141+
customEvents
142+
| where name == 'LLM_Agent_Token_Usage'
143+
| where timestamp > ago(7d)
144+
| extend event_id = tostring(customDimensions['event_id'])
145+
| extend agent = tostring(customDimensions['agent_name'])
146+
| extend model = tostring(customDimensions['model_deployment_name'])
147+
| extend process_id = tostring(customDimensions['process_id'])
148+
| extend input_tokens = toint(customDimensions['input_tokens'])
149+
| extend output_tokens = toint(customDimensions['output_tokens'])
150+
| extend total_tokens = toint(customDimensions['total_tokens'])
151+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), agent=take_any(agent), model=take_any(model), process_id=take_any(process_id) by event_id
152+
| summarize
153+
InputTokens = sum(input_tokens),
154+
OutputTokens = sum(output_tokens),
155+
TotalTokens = sum(total_tokens),
156+
Invocations = dcount(process_id)
157+
by Step = agent, Model = model
158+
| order by TotalTokens desc
159+
160+
// 9. Top 20 token consumers by document
161+
customEvents
162+
| where name == 'LLM_Agent_Token_Usage'
163+
| where timestamp > ago(7d)
164+
| extend event_id = tostring(customDimensions['event_id'])
165+
| extend process_id = tostring(customDimensions['process_id'])
166+
| extend total_tokens = toint(customDimensions['total_tokens'])
167+
| summarize total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id
168+
| summarize TotalTokens = sum(total_tokens) by process_id
169+
| join kind=leftouter (
170+
customEvents
171+
| where name == 'LLM_Token_Usage_Summary'
172+
| where timestamp > ago(7d)
173+
| extend process_id = tostring(customDimensions['process_id'])
174+
| extend file_name = tostring(customDimensions['file_name'])
175+
| summarize file_name=take_any(file_name) by process_id
176+
) on process_id
177+
| project process_id, file_name, TotalTokens
178+
| order by TotalTokens desc
179+
| take 20
180+
181+
// 10. Token usage by file type (PDF, DOCX, image, etc.)
182+
customEvents
183+
| where name == 'LLM_Agent_Token_Usage'
184+
| where timestamp > ago(7d)
185+
| extend event_id = tostring(customDimensions['event_id'])
186+
| extend process_id = tostring(customDimensions['process_id'])
187+
| extend input_tokens = toint(customDimensions['input_tokens'])
188+
| extend output_tokens = toint(customDimensions['output_tokens'])
189+
| extend total_tokens = toint(customDimensions['total_tokens'])
190+
| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id
191+
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id
192+
| join kind=leftouter (
193+
customEvents
194+
| where name == 'LLM_Token_Usage_Summary'
195+
| where timestamp > ago(7d)
196+
| extend process_id = tostring(customDimensions['process_id'])
197+
| extend mime_type = tostring(customDimensions['file_mime_type'])
198+
| summarize mime_type=take_any(mime_type) by process_id
199+
) on process_id
200+
| extend file_type = case(
201+
mime_type has "pdf", "PDF",
202+
mime_type has "image", "Image",
203+
mime_type has "word" or mime_type has "docx", "Word",
204+
mime_type has "excel" or mime_type has "xlsx", "Excel",
205+
mime_type has "text", "Text",
206+
"Other")
207+
| summarize
208+
Documents = count(),
209+
TotalInputTokens = sum(input_tokens),
210+
TotalOutputTokens = sum(output_tokens),
211+
TotalTokens = sum(total_tokens),
212+
AvgTokensPerDoc = round(avg(total_tokens), 0)
213+
by FileType = file_type
214+
| order by TotalTokens desc
215+
216+
// ============================================================
217+
// Processing Time Queries
218+
// ============================================================
219+
220+
// 11. Step completion time (seconds from document start to step completion)
221+
customEvents
222+
| where name == 'LLM_Agent_Token_Usage'
223+
| where timestamp > ago(7d)
224+
| extend agent = tostring(customDimensions['agent_name'])
225+
| extend process_id = tostring(customDimensions['process_id'])
226+
| join kind=inner (
227+
customEvents
228+
| where name == 'LLM_Agent_Token_Usage'
229+
| where timestamp > ago(7d)
230+
| extend process_id = tostring(customDimensions['process_id'])
231+
| summarize DocStartTime = min(timestamp) by process_id
232+
) on process_id
233+
| extend StepDurationSeconds = round(datetime_diff('millisecond', timestamp, DocStartTime) / 1000.0, 2)
234+
| summarize
235+
AvgCompletionTime = round(avg(StepDurationSeconds), 2),
236+
P50CompletionTime = round(percentile(StepDurationSeconds, 50), 2),
237+
P90CompletionTime = round(percentile(StepDurationSeconds, 90), 2),
238+
MaxCompletionTime = round(max(StepDurationSeconds), 2),
239+
Invocations = count()
240+
by Step = agent
241+
| order by AvgCompletionTime desc
242+
243+
// 12. OpenAI API call durations from dependencies table
244+
dependencies
245+
| where timestamp > ago(7d)
246+
| where target has "openai" or name has "chat" or type == "HTTP" or name has "openai"
247+
| where success == true
248+
| extend durationSeconds = round(duration / 1000.0, 2)
249+
| summarize
250+
TotalCalls = count(),
251+
AvgSeconds = round(avg(durationSeconds), 2),
252+
P50Seconds = round(percentile(durationSeconds, 50), 2),
253+
P90Seconds = round(percentile(durationSeconds, 90), 2),
254+
MaxSeconds = round(max(durationSeconds), 2)
255+
by OperationName = name
256+
| order by TotalCalls desc
257+
| take 10
258+
259+
// 13. Per-document step timeline
260+
customEvents
261+
| where name == 'LLM_Agent_Token_Usage'
262+
| where timestamp > ago(7d)
263+
| extend agent = tostring(customDimensions['agent_name'])
264+
| extend process_id = tostring(customDimensions['process_id'])
265+
| join kind=inner (
266+
customEvents
267+
| where name == 'LLM_Agent_Token_Usage'
268+
| where timestamp > ago(7d)
269+
| extend process_id = tostring(customDimensions['process_id'])
270+
| summarize DocStartTime = min(timestamp) by process_id
271+
) on process_id
272+
| extend StepCompletedAt = round(datetime_diff('millisecond', timestamp, DocStartTime) / 1000.0, 2)
273+
| project timestamp, process_id, Step=agent, StepCompletedAtSeconds=StepCompletedAt
274+
| order by process_id, timestamp asc
275+
276+
// 14. Total document processing time (first to last step)
277+
customEvents
278+
| where name == 'LLM_Agent_Token_Usage'
279+
| where timestamp > ago(7d)
280+
| extend process_id = tostring(customDimensions['process_id'])
281+
| summarize StartTime = min(timestamp), EndTime = max(timestamp) by process_id
282+
| extend TotalSeconds = round(datetime_diff('millisecond', EndTime, StartTime) / 1000.0, 2)
283+
| summarize
284+
DocumentsProcessed = count(),
285+
AvgSeconds = round(avg(TotalSeconds), 2),
286+
P50Seconds = round(percentile(TotalSeconds, 50), 2),
287+
P90Seconds = round(percentile(TotalSeconds, 90), 2),
288+
MaxSeconds = round(max(TotalSeconds), 2)
289+
290+
// ============================================================
291+
// Percentiles & Trends
292+
// ============================================================
293+
294+
// 15. Token usage percentiles per document
295+
customEvents
296+
| where name == 'LLM_Agent_Token_Usage'
297+
| where timestamp > ago(7d)
298+
| extend process_id = tostring(customDimensions['process_id'])
299+
| extend agent = tostring(customDimensions['agent_name'])
300+
| extend total_tokens = toint(customDimensions['total_tokens'])
301+
| summarize total_tokens=max(total_tokens) by agent, process_id
302+
| summarize total_tokens=sum(total_tokens) by process_id
303+
| summarize
304+
p50 = percentile(total_tokens, 50),
305+
p90 = percentile(total_tokens, 90),
306+
p95 = percentile(total_tokens, 95),
307+
p99 = percentile(total_tokens, 99),
308+
Max = max(total_tokens)
309+
310+
// 16. Daily processing volume with token usage
311+
customEvents
312+
| where name == 'LLM_Agent_Token_Usage'
313+
| where timestamp > ago(30d)
314+
| extend process_id = tostring(customDimensions['process_id'])
315+
| extend agent = tostring(customDimensions['agent_name'])
316+
| extend total_tokens = toint(customDimensions['total_tokens'])
317+
| summarize total_tokens=max(total_tokens), timestamp=min(timestamp) by agent, process_id
318+
| summarize total_tokens=sum(total_tokens), timestamp=min(timestamp) by process_id
319+
| summarize
320+
DocumentsProcessed = count(),
321+
TotalTokens = sum(total_tokens),
322+
AvgTokensPerDoc = round(avg(total_tokens), 0),
323+
MaxTokensPerDoc = max(total_tokens)
324+
by Day = bin(timestamp, 1d)
325+
| order by Day desc

0 commit comments

Comments
 (0)