22// KQL Queries for LLM Token Usage Monitoring
33// Content Processing Solution Accelerator
44// Run these in Application Insights > Logs
5+ //
6+ // IMPORTANT: All queries use a deduplication pattern:
7+ // max() by (agent, process_id) first, then sum()
8+ // This handles duplicate custom events that can occur when
9+ // both ContentProcessor and ContentProcessorWorkflow emit
10+ // telemetry through the same Application Insights instance.
511// ============================================================
612
7- // 1. Overall token usage summary (last 7 days)
13+ // 1. Overall token usage summary
14+ // Dedup: max by (agent, process_id) → sum by process_id → aggregate
815customEvents
916| where name == 'LLM_Agent_Token_Usage'
1017| where timestamp > ago (7 d )
1118| extend process_id = tostring (customDimensions['process_id' ])
19+ | extend agent = tostring (customDimensions['agent_name' ])
1220| extend input_tokens = toint (customDimensions['input_tokens' ])
1321| extend output_tokens = toint (customDimensions['output_tokens' ])
1422| extend total_tokens = toint (customDimensions['total_tokens' ])
23+ | summarize input_tokens=max (input_tokens), output_tokens=max (output_tokens), total_tokens=max (total_tokens) by agent, process_id
1524| summarize input_tokens=sum (input_tokens), output_tokens=sum (output_tokens), total_tokens=sum (total_tokens) by process_id
1625| summarize
1726 TotalDocuments = count (),
@@ -21,42 +30,61 @@ customEvents
2130 AvgTokensPerDocument = round (avg (total_tokens), 0 )
2231
2332// 2. Token usage by pipeline step (agent)
33+ // Dedup: max by (agent, process_id) → sum by agent
2434customEvents
2535| where name == 'LLM_Agent_Token_Usage'
2636| where timestamp > ago (7 d )
2737| extend agent = tostring (customDimensions['agent_name' ])
38+ | extend process_id = tostring (customDimensions['process_id' ])
2839| extend input_tokens = toint (customDimensions['input_tokens' ])
2940| extend output_tokens = toint (customDimensions['output_tokens' ])
3041| extend total_tokens = toint (customDimensions['total_tokens' ])
42+ | summarize input_tokens=max (input_tokens), output_tokens=max (output_tokens), total_tokens=max (total_tokens) by agent, process_id
3143| summarize
3244 InputTokens = sum (input_tokens),
3345 OutputTokens = sum (output_tokens),
3446 TotalTokens = sum (total_tokens),
3547 Invocations = count ()
3648 by Step = agent
49+ | project Step, InputTokens, OutputTokens, TotalTokens, Invocations
3750| order by TotalTokens desc
3851
3952// 3. Token usage over time (hourly)
4053customEvents
4154| where name == 'LLM_Agent_Token_Usage'
4255| where timestamp > ago (7 d )
4356| extend process_id = tostring (customDimensions['process_id' ])
57+ | extend agent = tostring (customDimensions['agent_name' ])
4458| extend input_tokens = toint (customDimensions['input_tokens' ])
4559| extend output_tokens = toint (customDimensions['output_tokens' ])
60+ | summarize input_tokens=max (input_tokens), output_tokens=max (output_tokens), timestamp=min (timestamp) by agent, process_id
4661| summarize input_tokens=sum (input_tokens), output_tokens=sum (output_tokens), timestamp=min (timestamp) by process_id
4762| summarize InputTokens = sum (input_tokens), OutputTokens = sum (output_tokens) by bin (timestamp, 1 h )
4863| order by timestamp asc
4964| render areachart
5065
51- // 4. Estimated cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
66+ // 4. Token distribution by agent (pie chart)
67+ customEvents
68+ | where name == 'LLM_Agent_Token_Usage'
69+ | where timestamp > ago (7 d )
70+ | extend agent = tostring (customDimensions['agent_name' ])
71+ | extend process_id = tostring (customDimensions['process_id' ])
72+ | extend total_tokens = toint (customDimensions['total_tokens' ])
73+ | summarize total_tokens=max (total_tokens) by agent, process_id
74+ | summarize TotalTokens = sum (total_tokens) by agent
75+ | render piechart
76+
77+ // 5. Estimated daily cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
5278let input_price_per_million = 2.50 ;
5379let output_price_per_million = 10.00 ;
5480customEvents
5581| where name == 'LLM_Agent_Token_Usage'
5682| where timestamp > ago (30 d )
5783| extend process_id = tostring (customDimensions['process_id' ])
84+ | extend agent = tostring (customDimensions['agent_name' ])
5885| extend input_tokens = toint (customDimensions['input_tokens' ])
5986| extend output_tokens = toint (customDimensions['output_tokens' ])
87+ | summarize input_tokens=max (input_tokens), output_tokens=max (output_tokens), timestamp=min (timestamp) by agent, process_id
6088| summarize input_tokens=sum (input_tokens), output_tokens=sum (output_tokens), timestamp=min (timestamp) by process_id
6189| summarize TotalInput = sum (input_tokens), TotalOutput = sum (output_tokens) by bin (timestamp, 1 d )
6290| extend InputCost = round (TotalInput * input_price_per_million / 1000000.0 , 4 )
@@ -65,109 +93,7 @@ customEvents
6593| project Day = timestamp, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
6694| order by Day desc
6795
68- // 5. Top token consumers by document
69- customEvents
70- | where name == 'LLM_Agent_Token_Usage'
71- | where timestamp > ago (7 d )
72- | extend process_id = tostring (customDimensions['process_id' ])
73- | extend total_tokens = toint (customDimensions['total_tokens' ])
74- | summarize TotalTokens = sum (total_tokens) by process_id
75- | join kind =leftouter (
76- customEvents
77- | where name == 'LLM_Token_Usage_Summary'
78- | where timestamp > ago (7 d )
79- | extend process_id = tostring (customDimensions['process_id' ])
80- | extend file_name = tostring (customDimensions['file_name' ])
81- | summarize file_name=take_any(file_name) by process_id
82- ) on process_id
83- | project process_id, file_name, TotalTokens
84- | order by TotalTokens desc
85- | take 20
86-
87- // 6. Pipeline step token distribution (pie chart)
88- customEvents
89- | where name == 'LLM_Agent_Token_Usage'
90- | where timestamp > ago (7 d )
91- | extend agent = tostring (customDimensions['agent_name' ])
92- | extend total_tokens = toint (customDimensions['total_tokens' ])
93- | summarize TotalTokens = sum (total_tokens) by agent
94- | render piechart
95-
96- // 7. Token usage percentiles per document
97- customEvents
98- | where name == 'LLM_Agent_Token_Usage'
99- | where timestamp > ago (7 d )
100- | extend process_id = tostring (customDimensions['process_id' ])
101- | extend total_tokens = toint (customDimensions['total_tokens' ])
102- | summarize total_tokens=sum (total_tokens) by process_id
103- | summarize
104- p50 = percentile(total_tokens, 50 ),
105- p90 = percentile(total_tokens, 90 ),
106- p95 = percentile(total_tokens, 95 ),
107- p99 = percentile(total_tokens, 99 ),
108- Max = max (total_tokens)
109-
110- // 8. Token usage by step grouping (Extraction vs Analysis vs Safety)
111- let StepGroupMapping = datatable (agent:string , StepGroup:string ) [
112- "MapHandler" , "Extraction" ,
113- "RAI" , "Safety" ,
114- "Summarize" , "Analysis" ,
115- "GapAnalysis" , "Analysis"
116- ];
117- customEvents
118- | where name == 'LLM_Agent_Token_Usage'
119- | where timestamp > ago (7 d )
120- | extend agent = tostring (customDimensions['agent_name' ])
121- | extend input_tokens = toint (customDimensions['input_tokens' ])
122- | extend output_tokens = toint (customDimensions['output_tokens' ])
123- | extend total_tokens = toint (customDimensions['total_tokens' ])
124- | lookup kind =leftouter StepGroupMapping on agent
125- | extend StepGroup = iff (isempty (StepGroup), "Unknown" , StepGroup)
126- | summarize
127- TotalRequests = count (),
128- TotalInputTokens = sum (input_tokens),
129- TotalOutputTokens = sum (output_tokens),
130- TotalTokens = sum (total_tokens),
131- AvgTokensPerRequest = round (avg (total_tokens), 0 )
132- by StepGroup
133- | order by TotalTokens desc
134-
135- // 9. Token usage by model deployment
136- customEvents
137- | where name == 'LLM_Model_Token_Usage'
138- | where timestamp > ago (7 d )
139- | extend model = tostring (customDimensions['model_deployment_name' ])
140- | extend input_tokens = toint (customDimensions['input_tokens' ])
141- | extend output_tokens = toint (customDimensions['output_tokens' ])
142- | extend total_tokens = toint (customDimensions['total_tokens' ])
143- | summarize
144- InputTokens = sum (input_tokens),
145- OutputTokens = sum (output_tokens),
146- TotalTokens = sum (total_tokens),
147- Invocations = count ()
148- by Model = model
149- | order by TotalTokens desc
150-
151- // 10. Token usage by model over time (hourly)
152- customEvents
153- | where name == 'LLM_Model_Token_Usage'
154- | where timestamp > ago (7 d )
155- | extend model = tostring (customDimensions['model_deployment_name' ])
156- | extend total_tokens = toint (customDimensions['total_tokens' ])
157- | summarize TotalTokens = sum (total_tokens) by bin (timestamp, 1 h ), model
158- | order by timestamp asc
159- | render areachart
160-
161- // 11. Model token distribution (pie chart)
162- customEvents
163- | where name == 'LLM_Model_Token_Usage'
164- | where timestamp > ago (7 d )
165- | extend model = tostring (customDimensions['model_deployment_name' ])
166- | extend total_tokens = toint (customDimensions['total_tokens' ])
167- | summarize TotalTokens = sum (total_tokens) by model
168- | render piechart
169-
170- // 12. Estimated cost by model (adjust pricing per model)
96+ // 6. Estimated cost by model (adjust pricing per model)
17197let gpt4o_input = 2.50 ;
17298let gpt4o_output = 10.00 ;
17399let gpt4o_mini_input = 0.15 ;
@@ -191,11 +117,10 @@ customEvents
191117| project Model = model, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
192118| order by TotalCost desc
193119
194- // 13. Step-to-model mapping with token usage
120+ // 7. Token usage by model deployment
195121customEvents
196- | where name == 'LLM_Agent_Token_Usage '
122+ | where name == 'LLM_Model_Token_Usage '
197123| where timestamp > ago (7 d )
198- | extend agent = tostring (customDimensions['agent_name' ])
199124| extend model = tostring (customDimensions['model_deployment_name' ])
200125| extend input_tokens = toint (customDimensions['input_tokens' ])
201126| extend output_tokens = toint (customDimensions['output_tokens' ])
@@ -205,53 +130,59 @@ customEvents
205130 OutputTokens = sum (output_tokens),
206131 TotalTokens = sum (total_tokens),
207132 Invocations = count ()
208- by Step = agent, Model = model
133+ by Model = model
209134| order by TotalTokens desc
210135
211- // 14. RAI agent specific token usage
136+ // 8. Step-to-model mapping with token usage
212137customEvents
213138| where name == 'LLM_Agent_Token_Usage'
214139| where timestamp > ago (7 d )
215140| extend agent = tostring (customDimensions['agent_name' ])
216- | where agent == "RAI"
141+ | extend model = tostring (customDimensions['model_deployment_name' ])
142+ | extend process_id = tostring (customDimensions['process_id' ])
217143| extend input_tokens = toint (customDimensions['input_tokens' ])
218144| extend output_tokens = toint (customDimensions['output_tokens' ])
219145| extend total_tokens = toint (customDimensions['total_tokens' ])
220- | extend model = tostring (customDimensions[ 'model_deployment_name' ])
146+ | summarize input_tokens= max (input_tokens), output_tokens= max (output_tokens), total_tokens= max (total_tokens) by agent, model, process_id
221147| summarize
222148 InputTokens = sum (input_tokens),
223149 OutputTokens = sum (output_tokens),
224150 TotalTokens = sum (total_tokens),
225- Invocations = count ()
226- by Model = model
151+ Invocations = dcount (process_id)
152+ by Step = agent, Model = model
153+ | order by TotalTokens desc
227154
228- // 15. OpenTelemetry auto-instrumented OpenAI calls (if available)
229- dependencies
230- | where name has "openai" or target has "openai"
155+ // 9. Top 20 token consumers by document
156+ customEvents
157+ | where name == 'LLM_Agent_Token_Usage'
231158| where timestamp > ago (7 d )
232- | extend input_tokens = tolong (customDimensions["gen_ai.usage.input_tokens" ])
233- | extend output_tokens = tolong (customDimensions["gen_ai.usage.output_tokens" ])
234- | extend model = tostring (customDimensions["gen_ai.request.model" ])
235- | where isnotnull (input_tokens)
236- | summarize
237- Calls = count (),
238- TotalInput = sum (input_tokens),
239- TotalOutput = sum (output_tokens)
240- by model
241- | order by TotalInput desc
242-
243- // ============================================================
244- // Content Processing Specific Queries
245- // ============================================================
159+ | extend process_id = tostring (customDimensions['process_id' ])
160+ | extend agent = tostring (customDimensions['agent_name' ])
161+ | extend total_tokens = toint (customDimensions['total_tokens' ])
162+ | summarize total_tokens=max (total_tokens) by agent, process_id
163+ | summarize TotalTokens = sum (total_tokens) by process_id
164+ | join kind =leftouter (
165+ customEvents
166+ | where name == 'LLM_Token_Usage_Summary'
167+ | where timestamp > ago (7 d )
168+ | extend process_id = tostring (customDimensions['process_id' ])
169+ | extend file_name = tostring (customDimensions['file_name' ])
170+ | summarize file_name=take_any(file_name) by process_id
171+ ) on process_id
172+ | project process_id, file_name, TotalTokens
173+ | order by TotalTokens desc
174+ | take 20
246175
247- // 16 . Token usage by file type (PDF, DOCX, image, etc.)
176+ // 10 . Token usage by file type (PDF, DOCX, image, etc.)
248177customEvents
249178| where name == 'LLM_Agent_Token_Usage'
250179| where timestamp > ago (7 d )
251180| extend process_id = tostring (customDimensions['process_id' ])
181+ | extend agent = tostring (customDimensions['agent_name' ])
252182| extend input_tokens = toint (customDimensions['input_tokens' ])
253183| extend output_tokens = toint (customDimensions['output_tokens' ])
254184| extend total_tokens = toint (customDimensions['total_tokens' ])
185+ | summarize input_tokens=max (input_tokens), output_tokens=max (output_tokens), total_tokens=max (total_tokens) by agent, process_id
255186| summarize input_tokens=sum (input_tokens), output_tokens=sum (output_tokens), total_tokens=sum (total_tokens) by process_id
256187| join kind =leftouter (
257188 customEvents
@@ -277,23 +208,11 @@ customEvents
277208 by FileType = file_type
278209| order by TotalTokens desc
279210
280- // 17. Per-document token breakdown by step
281- customEvents
282- | where name == 'LLM_Agent_Token_Usage'
283- | where timestamp > ago (7 d )
284- | extend agent = tostring (customDimensions['agent_name' ])
285- | extend process_id = tostring (customDimensions['process_id' ])
286- | extend input_tokens = toint (customDimensions['input_tokens' ])
287- | extend output_tokens = toint (customDimensions['output_tokens' ])
288- | extend total_tokens = toint (customDimensions['total_tokens' ])
289- | summarize
290- InputTokens = sum (input_tokens),
291- OutputTokens = sum (output_tokens),
292- TotalTokens = sum (total_tokens)
293- by process_id, Step = agent
294- | order by process_id, TotalTokens desc
211+ // ============================================================
212+ // Processing Time Queries
213+ // ============================================================
295214
296- // 18 . Step completion time (seconds from document start to step completion)
215+ // 11 . Step completion time (seconds from document start to step completion)
297216customEvents
298217| where name == 'LLM_Agent_Token_Usage'
299218| where timestamp > ago (7 d )
@@ -316,7 +235,7 @@ customEvents
316235 by Step = agent
317236| order by AvgCompletionTime desc
318237
319- // 19 . OpenAI API call durations from dependencies table
238+ // 12 . OpenAI API call durations from dependencies table
320239dependencies
321240| where timestamp > ago (7 d )
322241| where target has "openai" or name has "chat" or type == "HTTP" or name has "openai"
@@ -332,7 +251,7 @@ dependencies
332251| order by TotalCalls desc
333252| take 10
334253
335- // 20 . Per-document step timeline
254+ // 13 . Per-document step timeline
336255customEvents
337256| where name == 'LLM_Agent_Token_Usage'
338257| where timestamp > ago (7 d )
@@ -349,7 +268,7 @@ customEvents
349268| project timestamp, process_id, Step=agent, StepCompletedAtSeconds=StepCompletedAt
350269| order by process_id, timestamp asc
351270
352- // 21 . Total document processing time (first to last step)
271+ // 14 . Total document processing time (first to last step)
353272customEvents
354273| where name == 'LLM_Agent_Token_Usage'
355274| where timestamp > ago (7 d )
@@ -363,12 +282,34 @@ customEvents
363282 P90Seconds = round (percentile(TotalSeconds, 90 ), 2 ),
364283 MaxSeconds = round (max (TotalSeconds), 2 )
365284
366- // 21. Daily processing volume with token costs
285+ // ============================================================
286+ // Percentiles & Trends
287+ // ============================================================
288+
289+ // 15. Token usage percentiles per document
290+ customEvents
291+ | where name == 'LLM_Agent_Token_Usage'
292+ | where timestamp > ago (7 d )
293+ | extend process_id = tostring (customDimensions['process_id' ])
294+ | extend agent = tostring (customDimensions['agent_name' ])
295+ | extend total_tokens = toint (customDimensions['total_tokens' ])
296+ | summarize total_tokens=max (total_tokens) by agent, process_id
297+ | summarize total_tokens=sum (total_tokens) by process_id
298+ | summarize
299+ p50 = percentile(total_tokens, 50 ),
300+ p90 = percentile(total_tokens, 90 ),
301+ p95 = percentile(total_tokens, 95 ),
302+ p99 = percentile(total_tokens, 99 ),
303+ Max = max (total_tokens)
304+
305+ // 16. Daily processing volume with token usage
367306customEvents
368307| where name == 'LLM_Agent_Token_Usage'
369308| where timestamp > ago (30 d )
370309| extend process_id = tostring (customDimensions['process_id' ])
310+ | extend agent = tostring (customDimensions['agent_name' ])
371311| extend total_tokens = toint (customDimensions['total_tokens' ])
312+ | summarize total_tokens=max (total_tokens), timestamp=min (timestamp) by agent, process_id
372313| summarize total_tokens=sum (total_tokens), timestamp=min (timestamp) by process_id
373314| summarize
374315 DocumentsProcessed = count (),
0 commit comments