Skip to content

Commit 7520e16

Browse files
Fix token usage workbook: dedup events, time range params, portable resource IDs
- All token queries now use max() by (agent, process_id) dedup pattern to handle duplicate custom events from ContentProcessor/Workflow - Replaced hardcoded ago(7d) with TimeRange workbook parameter so the time picker actually filters data correctly - Removed hardcoded App Insights resource IDs from workbook JSON making it portable for redeployment to any RG via deploy-workbook.bicep - Updated token-usage-queries.kql to match all workbook queries with dedup pattern and reorganized (16 queries total) - Fixed registries: null in main.bicep/main.json for container apps Queries fixed with dedup: Overall Summary, Pipeline Step, Token Over Time, Daily Cost, Step-to-Model, Top 20 Consumers, Token by File Type, Percentiles, Daily Volume, Pie Chart Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent b49f863 commit 7520e16

4 files changed

Lines changed: 197 additions & 190 deletions

File tree

infra/dashboards/token-usage-queries.kql

Lines changed: 93 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,25 @@
22
// KQL Queries for LLM Token Usage Monitoring
33
// Content Processing Solution Accelerator
44
// Run these in Application Insights > Logs
5+
//
6+
// IMPORTANT: All queries use a deduplication pattern:
7+
// max() by (agent, process_id) first, then sum()
8+
// This handles duplicate custom events that can occur when
9+
// both ContentProcessor and ContentProcessorWorkflow emit
10+
// telemetry through the same Application Insights instance.
511
// ============================================================
612

7-
// 1. Overall token usage summary (last 7 days)
13+
// 1. Overall token usage summary
14+
// Dedup: max by (agent, process_id) → sum by process_id → aggregate
815
customEvents
916
| where name == 'LLM_Agent_Token_Usage'
1017
| where timestamp > ago(7d)
1118
| extend process_id = tostring(customDimensions['process_id'])
19+
| extend agent = tostring(customDimensions['agent_name'])
1220
| extend input_tokens = toint(customDimensions['input_tokens'])
1321
| extend output_tokens = toint(customDimensions['output_tokens'])
1422
| extend total_tokens = toint(customDimensions['total_tokens'])
23+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), total_tokens=max(total_tokens) by agent, process_id
1524
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id
1625
| summarize
1726
TotalDocuments = count(),
@@ -21,42 +30,61 @@ customEvents
2130
AvgTokensPerDocument = round(avg(total_tokens), 0)
2231

2332
// 2. Token usage by pipeline step (agent)
33+
// Dedup: max by (agent, process_id) → sum by agent
2434
customEvents
2535
| where name == 'LLM_Agent_Token_Usage'
2636
| where timestamp > ago(7d)
2737
| extend agent = tostring(customDimensions['agent_name'])
38+
| extend process_id = tostring(customDimensions['process_id'])
2839
| extend input_tokens = toint(customDimensions['input_tokens'])
2940
| extend output_tokens = toint(customDimensions['output_tokens'])
3041
| extend total_tokens = toint(customDimensions['total_tokens'])
42+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), total_tokens=max(total_tokens) by agent, process_id
3143
| summarize
3244
InputTokens = sum(input_tokens),
3345
OutputTokens = sum(output_tokens),
3446
TotalTokens = sum(total_tokens),
3547
Invocations = count()
3648
by Step = agent
49+
| project Step, InputTokens, OutputTokens, TotalTokens, Invocations
3750
| order by TotalTokens desc
3851

3952
// 3. Token usage over time (hourly)
4053
customEvents
4154
| where name == 'LLM_Agent_Token_Usage'
4255
| where timestamp > ago(7d)
4356
| extend process_id = tostring(customDimensions['process_id'])
57+
| extend agent = tostring(customDimensions['agent_name'])
4458
| extend input_tokens = toint(customDimensions['input_tokens'])
4559
| extend output_tokens = toint(customDimensions['output_tokens'])
60+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), timestamp=min(timestamp) by agent, process_id
4661
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id
4762
| summarize InputTokens = sum(input_tokens), OutputTokens = sum(output_tokens) by bin(timestamp, 1h)
4863
| order by timestamp asc
4964
| render areachart
5065

51-
// 4. Estimated cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
66+
// 4. Token distribution by agent (pie chart)
67+
customEvents
68+
| where name == 'LLM_Agent_Token_Usage'
69+
| where timestamp > ago(7d)
70+
| extend agent = tostring(customDimensions['agent_name'])
71+
| extend process_id = tostring(customDimensions['process_id'])
72+
| extend total_tokens = toint(customDimensions['total_tokens'])
73+
| summarize total_tokens=max(total_tokens) by agent, process_id
74+
| summarize TotalTokens = sum(total_tokens) by agent
75+
| render piechart
76+
77+
// 5. Estimated daily cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
5278
let input_price_per_million = 2.50;
5379
let output_price_per_million = 10.00;
5480
customEvents
5581
| where name == 'LLM_Agent_Token_Usage'
5682
| where timestamp > ago(30d)
5783
| extend process_id = tostring(customDimensions['process_id'])
84+
| extend agent = tostring(customDimensions['agent_name'])
5885
| extend input_tokens = toint(customDimensions['input_tokens'])
5986
| extend output_tokens = toint(customDimensions['output_tokens'])
87+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), timestamp=min(timestamp) by agent, process_id
6088
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id
6189
| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by bin(timestamp, 1d)
6290
| extend InputCost = round(TotalInput * input_price_per_million / 1000000.0, 4)
@@ -65,109 +93,7 @@ customEvents
6593
| project Day = timestamp, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
6694
| order by Day desc
6795

68-
// 5. Top token consumers by document
69-
customEvents
70-
| where name == 'LLM_Agent_Token_Usage'
71-
| where timestamp > ago(7d)
72-
| extend process_id = tostring(customDimensions['process_id'])
73-
| extend total_tokens = toint(customDimensions['total_tokens'])
74-
| summarize TotalTokens = sum(total_tokens) by process_id
75-
| join kind=leftouter (
76-
customEvents
77-
| where name == 'LLM_Token_Usage_Summary'
78-
| where timestamp > ago(7d)
79-
| extend process_id = tostring(customDimensions['process_id'])
80-
| extend file_name = tostring(customDimensions['file_name'])
81-
| summarize file_name=take_any(file_name) by process_id
82-
) on process_id
83-
| project process_id, file_name, TotalTokens
84-
| order by TotalTokens desc
85-
| take 20
86-
87-
// 6. Pipeline step token distribution (pie chart)
88-
customEvents
89-
| where name == 'LLM_Agent_Token_Usage'
90-
| where timestamp > ago(7d)
91-
| extend agent = tostring(customDimensions['agent_name'])
92-
| extend total_tokens = toint(customDimensions['total_tokens'])
93-
| summarize TotalTokens = sum(total_tokens) by agent
94-
| render piechart
95-
96-
// 7. Token usage percentiles per document
97-
customEvents
98-
| where name == 'LLM_Agent_Token_Usage'
99-
| where timestamp > ago(7d)
100-
| extend process_id = tostring(customDimensions['process_id'])
101-
| extend total_tokens = toint(customDimensions['total_tokens'])
102-
| summarize total_tokens=sum(total_tokens) by process_id
103-
| summarize
104-
p50 = percentile(total_tokens, 50),
105-
p90 = percentile(total_tokens, 90),
106-
p95 = percentile(total_tokens, 95),
107-
p99 = percentile(total_tokens, 99),
108-
Max = max(total_tokens)
109-
110-
// 8. Token usage by step grouping (Extraction vs Analysis vs Safety)
111-
let StepGroupMapping = datatable(agent:string, StepGroup:string) [
112-
"MapHandler", "Extraction",
113-
"RAI", "Safety",
114-
"Summarize", "Analysis",
115-
"GapAnalysis", "Analysis"
116-
];
117-
customEvents
118-
| where name == 'LLM_Agent_Token_Usage'
119-
| where timestamp > ago(7d)
120-
| extend agent = tostring(customDimensions['agent_name'])
121-
| extend input_tokens = toint(customDimensions['input_tokens'])
122-
| extend output_tokens = toint(customDimensions['output_tokens'])
123-
| extend total_tokens = toint(customDimensions['total_tokens'])
124-
| lookup kind=leftouter StepGroupMapping on agent
125-
| extend StepGroup = iff(isempty(StepGroup), "Unknown", StepGroup)
126-
| summarize
127-
TotalRequests = count(),
128-
TotalInputTokens = sum(input_tokens),
129-
TotalOutputTokens = sum(output_tokens),
130-
TotalTokens = sum(total_tokens),
131-
AvgTokensPerRequest = round(avg(total_tokens), 0)
132-
by StepGroup
133-
| order by TotalTokens desc
134-
135-
// 9. Token usage by model deployment
136-
customEvents
137-
| where name == 'LLM_Model_Token_Usage'
138-
| where timestamp > ago(7d)
139-
| extend model = tostring(customDimensions['model_deployment_name'])
140-
| extend input_tokens = toint(customDimensions['input_tokens'])
141-
| extend output_tokens = toint(customDimensions['output_tokens'])
142-
| extend total_tokens = toint(customDimensions['total_tokens'])
143-
| summarize
144-
InputTokens = sum(input_tokens),
145-
OutputTokens = sum(output_tokens),
146-
TotalTokens = sum(total_tokens),
147-
Invocations = count()
148-
by Model = model
149-
| order by TotalTokens desc
150-
151-
// 10. Token usage by model over time (hourly)
152-
customEvents
153-
| where name == 'LLM_Model_Token_Usage'
154-
| where timestamp > ago(7d)
155-
| extend model = tostring(customDimensions['model_deployment_name'])
156-
| extend total_tokens = toint(customDimensions['total_tokens'])
157-
| summarize TotalTokens = sum(total_tokens) by bin(timestamp, 1h), model
158-
| order by timestamp asc
159-
| render areachart
160-
161-
// 11. Model token distribution (pie chart)
162-
customEvents
163-
| where name == 'LLM_Model_Token_Usage'
164-
| where timestamp > ago(7d)
165-
| extend model = tostring(customDimensions['model_deployment_name'])
166-
| extend total_tokens = toint(customDimensions['total_tokens'])
167-
| summarize TotalTokens = sum(total_tokens) by model
168-
| render piechart
169-
170-
// 12. Estimated cost by model (adjust pricing per model)
96+
// 6. Estimated cost by model (adjust pricing per model)
17197
let gpt4o_input = 2.50;
17298
let gpt4o_output = 10.00;
17399
let gpt4o_mini_input = 0.15;
@@ -191,11 +117,10 @@ customEvents
191117
| project Model = model, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
192118
| order by TotalCost desc
193119

194-
// 13. Step-to-model mapping with token usage
120+
// 7. Token usage by model deployment
195121
customEvents
196-
| where name == 'LLM_Agent_Token_Usage'
122+
| where name == 'LLM_Model_Token_Usage'
197123
| where timestamp > ago(7d)
198-
| extend agent = tostring(customDimensions['agent_name'])
199124
| extend model = tostring(customDimensions['model_deployment_name'])
200125
| extend input_tokens = toint(customDimensions['input_tokens'])
201126
| extend output_tokens = toint(customDimensions['output_tokens'])
@@ -205,53 +130,59 @@ customEvents
205130
OutputTokens = sum(output_tokens),
206131
TotalTokens = sum(total_tokens),
207132
Invocations = count()
208-
by Step = agent, Model = model
133+
by Model = model
209134
| order by TotalTokens desc
210135

211-
// 14. RAI agent specific token usage
136+
// 8. Step-to-model mapping with token usage
212137
customEvents
213138
| where name == 'LLM_Agent_Token_Usage'
214139
| where timestamp > ago(7d)
215140
| extend agent = tostring(customDimensions['agent_name'])
216-
| where agent == "RAI"
141+
| extend model = tostring(customDimensions['model_deployment_name'])
142+
| extend process_id = tostring(customDimensions['process_id'])
217143
| extend input_tokens = toint(customDimensions['input_tokens'])
218144
| extend output_tokens = toint(customDimensions['output_tokens'])
219145
| extend total_tokens = toint(customDimensions['total_tokens'])
220-
| extend model = tostring(customDimensions['model_deployment_name'])
146+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), total_tokens=max(total_tokens) by agent, model, process_id
221147
| summarize
222148
InputTokens = sum(input_tokens),
223149
OutputTokens = sum(output_tokens),
224150
TotalTokens = sum(total_tokens),
225-
Invocations = count()
226-
by Model = model
151+
Invocations = dcount(process_id)
152+
by Step = agent, Model = model
153+
| order by TotalTokens desc
227154

228-
// 15. OpenTelemetry auto-instrumented OpenAI calls (if available)
229-
dependencies
230-
| where name has "openai" or target has "openai"
155+
// 9. Top 20 token consumers by document
156+
customEvents
157+
| where name == 'LLM_Agent_Token_Usage'
231158
| where timestamp > ago(7d)
232-
| extend input_tokens = tolong(customDimensions["gen_ai.usage.input_tokens"])
233-
| extend output_tokens = tolong(customDimensions["gen_ai.usage.output_tokens"])
234-
| extend model = tostring(customDimensions["gen_ai.request.model"])
235-
| where isnotnull(input_tokens)
236-
| summarize
237-
Calls = count(),
238-
TotalInput = sum(input_tokens),
239-
TotalOutput = sum(output_tokens)
240-
by model
241-
| order by TotalInput desc
242-
243-
// ============================================================
244-
// Content Processing Specific Queries
245-
// ============================================================
159+
| extend process_id = tostring(customDimensions['process_id'])
160+
| extend agent = tostring(customDimensions['agent_name'])
161+
| extend total_tokens = toint(customDimensions['total_tokens'])
162+
| summarize total_tokens=max(total_tokens) by agent, process_id
163+
| summarize TotalTokens = sum(total_tokens) by process_id
164+
| join kind=leftouter (
165+
customEvents
166+
| where name == 'LLM_Token_Usage_Summary'
167+
| where timestamp > ago(7d)
168+
| extend process_id = tostring(customDimensions['process_id'])
169+
| extend file_name = tostring(customDimensions['file_name'])
170+
| summarize file_name=take_any(file_name) by process_id
171+
) on process_id
172+
| project process_id, file_name, TotalTokens
173+
| order by TotalTokens desc
174+
| take 20
246175

247-
// 16. Token usage by file type (PDF, DOCX, image, etc.)
176+
// 10. Token usage by file type (PDF, DOCX, image, etc.)
248177
customEvents
249178
| where name == 'LLM_Agent_Token_Usage'
250179
| where timestamp > ago(7d)
251180
| extend process_id = tostring(customDimensions['process_id'])
181+
| extend agent = tostring(customDimensions['agent_name'])
252182
| extend input_tokens = toint(customDimensions['input_tokens'])
253183
| extend output_tokens = toint(customDimensions['output_tokens'])
254184
| extend total_tokens = toint(customDimensions['total_tokens'])
185+
| summarize input_tokens=max(input_tokens), output_tokens=max(output_tokens), total_tokens=max(total_tokens) by agent, process_id
255186
| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id
256187
| join kind=leftouter (
257188
customEvents
@@ -277,23 +208,11 @@ customEvents
277208
by FileType = file_type
278209
| order by TotalTokens desc
279210

280-
// 17. Per-document token breakdown by step
281-
customEvents
282-
| where name == 'LLM_Agent_Token_Usage'
283-
| where timestamp > ago(7d)
284-
| extend agent = tostring(customDimensions['agent_name'])
285-
| extend process_id = tostring(customDimensions['process_id'])
286-
| extend input_tokens = toint(customDimensions['input_tokens'])
287-
| extend output_tokens = toint(customDimensions['output_tokens'])
288-
| extend total_tokens = toint(customDimensions['total_tokens'])
289-
| summarize
290-
InputTokens = sum(input_tokens),
291-
OutputTokens = sum(output_tokens),
292-
TotalTokens = sum(total_tokens)
293-
by process_id, Step = agent
294-
| order by process_id, TotalTokens desc
211+
// ============================================================
212+
// Processing Time Queries
213+
// ============================================================
295214

296-
// 18. Step completion time (seconds from document start to step completion)
215+
// 11. Step completion time (seconds from document start to step completion)
297216
customEvents
298217
| where name == 'LLM_Agent_Token_Usage'
299218
| where timestamp > ago(7d)
@@ -316,7 +235,7 @@ customEvents
316235
by Step = agent
317236
| order by AvgCompletionTime desc
318237

319-
// 19. OpenAI API call durations from dependencies table
238+
// 12. OpenAI API call durations from dependencies table
320239
dependencies
321240
| where timestamp > ago(7d)
322241
| where target has "openai" or name has "chat" or type == "HTTP" or name has "openai"
@@ -332,7 +251,7 @@ dependencies
332251
| order by TotalCalls desc
333252
| take 10
334253

335-
// 20. Per-document step timeline
254+
// 13. Per-document step timeline
336255
customEvents
337256
| where name == 'LLM_Agent_Token_Usage'
338257
| where timestamp > ago(7d)
@@ -349,7 +268,7 @@ customEvents
349268
| project timestamp, process_id, Step=agent, StepCompletedAtSeconds=StepCompletedAt
350269
| order by process_id, timestamp asc
351270

352-
// 21. Total document processing time (first to last step)
271+
// 14. Total document processing time (first to last step)
353272
customEvents
354273
| where name == 'LLM_Agent_Token_Usage'
355274
| where timestamp > ago(7d)
@@ -363,12 +282,34 @@ customEvents
363282
P90Seconds = round(percentile(TotalSeconds, 90), 2),
364283
MaxSeconds = round(max(TotalSeconds), 2)
365284

366-
// 21. Daily processing volume with token costs
285+
// ============================================================
286+
// Percentiles & Trends
287+
// ============================================================
288+
289+
// 15. Token usage percentiles per document
290+
customEvents
291+
| where name == 'LLM_Agent_Token_Usage'
292+
| where timestamp > ago(7d)
293+
| extend process_id = tostring(customDimensions['process_id'])
294+
| extend agent = tostring(customDimensions['agent_name'])
295+
| extend total_tokens = toint(customDimensions['total_tokens'])
296+
| summarize total_tokens=max(total_tokens) by agent, process_id
297+
| summarize total_tokens=sum(total_tokens) by process_id
298+
| summarize
299+
p50 = percentile(total_tokens, 50),
300+
p90 = percentile(total_tokens, 90),
301+
p95 = percentile(total_tokens, 95),
302+
p99 = percentile(total_tokens, 99),
303+
Max = max(total_tokens)
304+
305+
// 16. Daily processing volume with token usage
367306
customEvents
368307
| where name == 'LLM_Agent_Token_Usage'
369308
| where timestamp > ago(30d)
370309
| extend process_id = tostring(customDimensions['process_id'])
310+
| extend agent = tostring(customDimensions['agent_name'])
371311
| extend total_tokens = toint(customDimensions['total_tokens'])
312+
| summarize total_tokens=max(total_tokens), timestamp=min(timestamp) by agent, process_id
372313
| summarize total_tokens=sum(total_tokens), timestamp=min(timestamp) by process_id
373314
| summarize
374315
DocumentsProcessed = count(),

0 commit comments

Comments
 (0)