|
| 1 | +// ============================================================ |
| 2 | +// KQL Queries for LLM Token Usage Monitoring |
| 3 | +// Content Processing Solution Accelerator |
| 4 | +// Run these in Application Insights > Logs |
| 5 | +// |
| 6 | +// DEDUPLICATION: Each event includes a deterministic event_id |
| 7 | +// (SHA-256 of event_name|process_id|agent_name|model). Queries |
| 8 | +// use arg_max() by event_id to naturally eliminate duplicates |
| 9 | +// without accelerator-specific logic. |
| 10 | +// ============================================================ |
| 11 | + |
| 12 | +// 1. Overall token usage summary |
| 13 | +// Dedup: deduplicate by event_id, then aggregate |
| 14 | +customEvents |
| 15 | +| where name == 'LLM_Agent_Token_Usage' |
| 16 | +| where timestamp > ago(7d) |
| 17 | +| extend event_id = tostring(customDimensions['event_id']) |
| 18 | +| extend process_id = tostring(customDimensions['process_id']) |
| 19 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 20 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 21 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 22 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id |
| 23 | +| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id |
| 24 | +| summarize |
| 25 | + TotalDocuments = count(), |
| 26 | + TotalInputTokens = sum(input_tokens), |
| 27 | + TotalOutputTokens = sum(output_tokens), |
| 28 | + TotalTokens = sum(total_tokens), |
| 29 | + AvgTokensPerDocument = round(avg(total_tokens), 0) |
| 30 | + |
| 31 | +// 2. Token usage by pipeline step (agent) |
| 32 | +// Dedup: deduplicate by event_id, then sum by agent |
| 33 | +customEvents |
| 34 | +| where name == 'LLM_Agent_Token_Usage' |
| 35 | +| where timestamp > ago(7d) |
| 36 | +| extend event_id = tostring(customDimensions['event_id']) |
| 37 | +| extend agent = tostring(customDimensions['agent_name']) |
| 38 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 39 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 40 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 41 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), agent=take_any(agent) by event_id |
| 42 | +| summarize |
| 43 | + InputTokens = sum(input_tokens), |
| 44 | + OutputTokens = sum(output_tokens), |
| 45 | + TotalTokens = sum(total_tokens), |
| 46 | + Invocations = count() |
| 47 | + by Step = agent |
| 48 | +| project Step, InputTokens, OutputTokens, TotalTokens, Invocations |
| 49 | +| order by TotalTokens desc |
| 50 | + |
| 51 | +// 3. Token usage over time (hourly) |
| 52 | +customEvents |
| 53 | +| where name == 'LLM_Agent_Token_Usage' |
| 54 | +| where timestamp > ago(7d) |
| 55 | +| extend event_id = tostring(customDimensions['event_id']) |
| 56 | +| extend process_id = tostring(customDimensions['process_id']) |
| 57 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 58 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 59 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), timestamp=min(timestamp), process_id=take_any(process_id) by event_id |
| 60 | +| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id |
| 61 | +| summarize InputTokens = sum(input_tokens), OutputTokens = sum(output_tokens) by bin(timestamp, 1h) |
| 62 | +| order by timestamp asc |
| 63 | +| render areachart |
| 64 | + |
| 65 | +// 4. Token distribution by agent (pie chart) |
| 66 | +customEvents |
| 67 | +| where name == 'LLM_Agent_Token_Usage' |
| 68 | +| where timestamp > ago(7d) |
| 69 | +| extend event_id = tostring(customDimensions['event_id']) |
| 70 | +| extend agent = tostring(customDimensions['agent_name']) |
| 71 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 72 | +| summarize total_tokens=take_any(total_tokens), agent=take_any(agent) by event_id |
| 73 | +| summarize TotalTokens = sum(total_tokens) by agent |
| 74 | +| render piechart |
| 75 | + |
| 76 | +// 5. Estimated daily cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output) |
| 77 | +let input_price_per_million = 2.50; |
| 78 | +let output_price_per_million = 10.00; |
| 79 | +customEvents |
| 80 | +| where name == 'LLM_Agent_Token_Usage' |
| 81 | +| where timestamp > ago(30d) |
| 82 | +| extend event_id = tostring(customDimensions['event_id']) |
| 83 | +| extend process_id = tostring(customDimensions['process_id']) |
| 84 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 85 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 86 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), timestamp=min(timestamp), process_id=take_any(process_id) by event_id |
| 87 | +| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), timestamp=min(timestamp) by process_id |
| 88 | +| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by bin(timestamp, 1d) |
| 89 | +| extend InputCost = round(TotalInput * input_price_per_million / 1000000.0, 4) |
| 90 | +| extend OutputCost = round(TotalOutput * output_price_per_million / 1000000.0, 4) |
| 91 | +| extend TotalCost = InputCost + OutputCost |
| 92 | +| project Day = timestamp, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost |
| 93 | +| order by Day desc |
| 94 | + |
| 95 | +// 6. Estimated cost by model (adjust pricing per model) |
| 96 | +let gpt4o_input = 2.50; |
| 97 | +let gpt4o_output = 10.00; |
| 98 | +let gpt4o_mini_input = 0.15; |
| 99 | +let gpt4o_mini_output = 0.60; |
| 100 | +customEvents |
| 101 | +| where name == 'LLM_Model_Token_Usage' |
| 102 | +| where timestamp > ago(30d) |
| 103 | +| extend event_id = tostring(customDimensions['event_id']) |
| 104 | +| extend model = tostring(customDimensions['model_deployment_name']) |
| 105 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 106 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 107 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), model=take_any(model) by event_id |
| 108 | +| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by model |
| 109 | +| extend InputPrice = case( |
| 110 | + model has "mini", gpt4o_mini_input, |
| 111 | + gpt4o_input) |
| 112 | +| extend OutputPrice = case( |
| 113 | + model has "mini", gpt4o_mini_output, |
| 114 | + gpt4o_output) |
| 115 | +| extend InputCost = round(TotalInput * InputPrice / 1000000.0, 4) |
| 116 | +| extend OutputCost = round(TotalOutput * OutputPrice / 1000000.0, 4) |
| 117 | +| extend TotalCost = InputCost + OutputCost |
| 118 | +| project Model = model, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost |
| 119 | +| order by TotalCost desc |
| 120 | + |
| 121 | +// 7. Token usage by model deployment |
| 122 | +customEvents |
| 123 | +| where name == 'LLM_Model_Token_Usage' |
| 124 | +| where timestamp > ago(7d) |
| 125 | +| extend event_id = tostring(customDimensions['event_id']) |
| 126 | +| extend model = tostring(customDimensions['model_deployment_name']) |
| 127 | +| extend process_id = tostring(customDimensions['process_id']) |
| 128 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 129 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 130 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 131 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), model=take_any(model), process_id=take_any(process_id) by event_id |
| 132 | +| summarize |
| 133 | + InputTokens = sum(input_tokens), |
| 134 | + OutputTokens = sum(output_tokens), |
| 135 | + TotalTokens = sum(total_tokens), |
| 136 | + Invocations = dcount(process_id) |
| 137 | + by Model = model |
| 138 | +| order by TotalTokens desc |
| 139 | + |
| 140 | +// 8. Step-to-model mapping with token usage |
| 141 | +customEvents |
| 142 | +| where name == 'LLM_Agent_Token_Usage' |
| 143 | +| where timestamp > ago(7d) |
| 144 | +| extend event_id = tostring(customDimensions['event_id']) |
| 145 | +| extend agent = tostring(customDimensions['agent_name']) |
| 146 | +| extend model = tostring(customDimensions['model_deployment_name']) |
| 147 | +| extend process_id = tostring(customDimensions['process_id']) |
| 148 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 149 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 150 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 151 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), agent=take_any(agent), model=take_any(model), process_id=take_any(process_id) by event_id |
| 152 | +| summarize |
| 153 | + InputTokens = sum(input_tokens), |
| 154 | + OutputTokens = sum(output_tokens), |
| 155 | + TotalTokens = sum(total_tokens), |
| 156 | + Invocations = dcount(process_id) |
| 157 | + by Step = agent, Model = model |
| 158 | +| order by TotalTokens desc |
| 159 | + |
| 160 | +// 9. Top 20 token consumers by document |
| 161 | +customEvents |
| 162 | +| where name == 'LLM_Agent_Token_Usage' |
| 163 | +| where timestamp > ago(7d) |
| 164 | +| extend event_id = tostring(customDimensions['event_id']) |
| 165 | +| extend process_id = tostring(customDimensions['process_id']) |
| 166 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 167 | +| summarize total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id |
| 168 | +| summarize TotalTokens = sum(total_tokens) by process_id |
| 169 | +| join kind=leftouter ( |
| 170 | + customEvents |
| 171 | + | where name == 'LLM_Token_Usage_Summary' |
| 172 | + | where timestamp > ago(7d) |
| 173 | + | extend process_id = tostring(customDimensions['process_id']) |
| 174 | + | extend file_name = tostring(customDimensions['file_name']) |
| 175 | + | summarize file_name=take_any(file_name) by process_id |
| 176 | +) on process_id |
| 177 | +| project process_id, file_name, TotalTokens |
| 178 | +| order by TotalTokens desc |
| 179 | +| take 20 |
| 180 | + |
| 181 | +// 10. Token usage by file type (PDF, DOCX, image, etc.) |
| 182 | +customEvents |
| 183 | +| where name == 'LLM_Agent_Token_Usage' |
| 184 | +| where timestamp > ago(7d) |
| 185 | +| extend event_id = tostring(customDimensions['event_id']) |
| 186 | +| extend process_id = tostring(customDimensions['process_id']) |
| 187 | +| extend input_tokens = toint(customDimensions['input_tokens']) |
| 188 | +| extend output_tokens = toint(customDimensions['output_tokens']) |
| 189 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 190 | +| summarize input_tokens=take_any(input_tokens), output_tokens=take_any(output_tokens), total_tokens=take_any(total_tokens), process_id=take_any(process_id) by event_id |
| 191 | +| summarize input_tokens=sum(input_tokens), output_tokens=sum(output_tokens), total_tokens=sum(total_tokens) by process_id |
| 192 | +| join kind=leftouter ( |
| 193 | + customEvents |
| 194 | + | where name == 'LLM_Token_Usage_Summary' |
| 195 | + | where timestamp > ago(7d) |
| 196 | + | extend process_id = tostring(customDimensions['process_id']) |
| 197 | + | extend mime_type = tostring(customDimensions['file_mime_type']) |
| 198 | + | summarize mime_type=take_any(mime_type) by process_id |
| 199 | +) on process_id |
| 200 | +| extend file_type = case( |
| 201 | + mime_type has "pdf", "PDF", |
| 202 | + mime_type has "image", "Image", |
| 203 | + mime_type has "word" or mime_type has "docx", "Word", |
| 204 | + mime_type has "excel" or mime_type has "xlsx", "Excel", |
| 205 | + mime_type has "text", "Text", |
| 206 | + "Other") |
| 207 | +| summarize |
| 208 | + Documents = count(), |
| 209 | + TotalInputTokens = sum(input_tokens), |
| 210 | + TotalOutputTokens = sum(output_tokens), |
| 211 | + TotalTokens = sum(total_tokens), |
| 212 | + AvgTokensPerDoc = round(avg(total_tokens), 0) |
| 213 | + by FileType = file_type |
| 214 | +| order by TotalTokens desc |
| 215 | + |
| 216 | +// ============================================================ |
| 217 | +// Processing Time Queries |
| 218 | +// ============================================================ |
| 219 | + |
| 220 | +// 11. Step completion time (seconds from document start to step completion) |
| 221 | +customEvents |
| 222 | +| where name == 'LLM_Agent_Token_Usage' |
| 223 | +| where timestamp > ago(7d) |
| 224 | +| extend agent = tostring(customDimensions['agent_name']) |
| 225 | +| extend process_id = tostring(customDimensions['process_id']) |
| 226 | +| join kind=inner ( |
| 227 | + customEvents |
| 228 | + | where name == 'LLM_Agent_Token_Usage' |
| 229 | + | where timestamp > ago(7d) |
| 230 | + | extend process_id = tostring(customDimensions['process_id']) |
| 231 | + | summarize DocStartTime = min(timestamp) by process_id |
| 232 | +) on process_id |
| 233 | +| extend StepDurationSeconds = round(datetime_diff('millisecond', timestamp, DocStartTime) / 1000.0, 2) |
| 234 | +| summarize |
| 235 | + AvgCompletionTime = round(avg(StepDurationSeconds), 2), |
| 236 | + P50CompletionTime = round(percentile(StepDurationSeconds, 50), 2), |
| 237 | + P90CompletionTime = round(percentile(StepDurationSeconds, 90), 2), |
| 238 | + MaxCompletionTime = round(max(StepDurationSeconds), 2), |
| 239 | + Invocations = count() |
| 240 | + by Step = agent |
| 241 | +| order by AvgCompletionTime desc |
| 242 | + |
| 243 | +// 12. OpenAI API call durations from dependencies table |
| 244 | +dependencies |
| 245 | +| where timestamp > ago(7d) |
| 246 | +| where target has "openai" or name has "chat" or type == "HTTP" or name has "openai" |
| 247 | +| where success == true |
| 248 | +| extend durationSeconds = round(duration / 1000.0, 2) |
| 249 | +| summarize |
| 250 | + TotalCalls = count(), |
| 251 | + AvgSeconds = round(avg(durationSeconds), 2), |
| 252 | + P50Seconds = round(percentile(durationSeconds, 50), 2), |
| 253 | + P90Seconds = round(percentile(durationSeconds, 90), 2), |
| 254 | + MaxSeconds = round(max(durationSeconds), 2) |
| 255 | + by OperationName = name |
| 256 | +| order by TotalCalls desc |
| 257 | +| take 10 |
| 258 | + |
| 259 | +// 13. Per-document step timeline |
| 260 | +customEvents |
| 261 | +| where name == 'LLM_Agent_Token_Usage' |
| 262 | +| where timestamp > ago(7d) |
| 263 | +| extend agent = tostring(customDimensions['agent_name']) |
| 264 | +| extend process_id = tostring(customDimensions['process_id']) |
| 265 | +| join kind=inner ( |
| 266 | + customEvents |
| 267 | + | where name == 'LLM_Agent_Token_Usage' |
| 268 | + | where timestamp > ago(7d) |
| 269 | + | extend process_id = tostring(customDimensions['process_id']) |
| 270 | + | summarize DocStartTime = min(timestamp) by process_id |
| 271 | +) on process_id |
| 272 | +| extend StepCompletedAt = round(datetime_diff('millisecond', timestamp, DocStartTime) / 1000.0, 2) |
| 273 | +| project timestamp, process_id, Step=agent, StepCompletedAtSeconds=StepCompletedAt |
| 274 | +| order by process_id, timestamp asc |
| 275 | + |
| 276 | +// 14. Total document processing time (first to last step) |
| 277 | +customEvents |
| 278 | +| where name == 'LLM_Agent_Token_Usage' |
| 279 | +| where timestamp > ago(7d) |
| 280 | +| extend process_id = tostring(customDimensions['process_id']) |
| 281 | +| summarize StartTime = min(timestamp), EndTime = max(timestamp) by process_id |
| 282 | +| extend TotalSeconds = round(datetime_diff('millisecond', EndTime, StartTime) / 1000.0, 2) |
| 283 | +| summarize |
| 284 | + DocumentsProcessed = count(), |
| 285 | + AvgSeconds = round(avg(TotalSeconds), 2), |
| 286 | + P50Seconds = round(percentile(TotalSeconds, 50), 2), |
| 287 | + P90Seconds = round(percentile(TotalSeconds, 90), 2), |
| 288 | + MaxSeconds = round(max(TotalSeconds), 2) |
| 289 | + |
| 290 | +// ============================================================ |
| 291 | +// Percentiles & Trends |
| 292 | +// ============================================================ |
| 293 | + |
| 294 | +// 15. Token usage percentiles per document |
| 295 | +customEvents |
| 296 | +| where name == 'LLM_Agent_Token_Usage' |
| 297 | +| where timestamp > ago(7d) |
| 298 | +| extend process_id = tostring(customDimensions['process_id']) |
| 299 | +| extend agent = tostring(customDimensions['agent_name']) |
| 300 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 301 | +| summarize total_tokens=max(total_tokens) by agent, process_id |
| 302 | +| summarize total_tokens=sum(total_tokens) by process_id |
| 303 | +| summarize |
| 304 | + p50 = percentile(total_tokens, 50), |
| 305 | + p90 = percentile(total_tokens, 90), |
| 306 | + p95 = percentile(total_tokens, 95), |
| 307 | + p99 = percentile(total_tokens, 99), |
| 308 | + Max = max(total_tokens) |
| 309 | + |
| 310 | +// 16. Daily processing volume with token usage |
| 311 | +customEvents |
| 312 | +| where name == 'LLM_Agent_Token_Usage' |
| 313 | +| where timestamp > ago(30d) |
| 314 | +| extend process_id = tostring(customDimensions['process_id']) |
| 315 | +| extend agent = tostring(customDimensions['agent_name']) |
| 316 | +| extend total_tokens = toint(customDimensions['total_tokens']) |
| 317 | +| summarize total_tokens=max(total_tokens), timestamp=min(timestamp) by agent, process_id |
| 318 | +| summarize total_tokens=sum(total_tokens), timestamp=min(timestamp) by process_id |
| 319 | +| summarize |
| 320 | + DocumentsProcessed = count(), |
| 321 | + TotalTokens = sum(total_tokens), |
| 322 | + AvgTokensPerDoc = round(avg(total_tokens), 0), |
| 323 | + MaxTokensPerDoc = max(total_tokens) |
| 324 | + by Day = bin(timestamp, 1d) |
| 325 | +| order by Day desc |
0 commit comments