Skip to content

Commit 35f43cd

Browse files
committed
update query
1 parent 30745ec commit 35f43cd

10 files changed

+37
-80
lines changed
Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
1-
// Query 1 — Registration Failure Stage Breakdown per Manager (28 days)
2-
// For each manager that failed, shows WHICH stage in the registration flow broke.
3-
// failureStage is hierarchical: "getPipenv:nativeFinderRefresh", "constructCondaSourcingStatus", etc.
1+
// Query 1 — Registration Failure Stage Breakdown per Manager
2+
// For events that DO have failureStage, what values are present?
43
// High counts at a specific stage → that code path is the priority fix target.
5-
let endDate = startofday(now()-1d);
64
RawEventsVSCodeExt
7-
| where ServerTimestamp > endDate-28d and ServerTimestamp < endDate
5+
| where ServerTimestamp > ago(7d)
86
| where EventName == "ms-python.vscode-python-envs/manager_registration.failed"
97
| extend ExtVersion = tostring(Properties["common.extversion"])
10-
| extend _minor = toint(extract("^1\\.(\\d+)", 1, ExtVersion)), _patch = tolong(extract("^1\\.\\d+\\.(\\d+)", 1, ExtVersion))
8+
| extend _minor = toint(extract("^1\\.(\\d+)", 1, ExtVersion))
9+
| extend _patch = tolong(extract("^1\\.\\d+\\.(\\d+)", 1, ExtVersion))
1110
| where _minor > 23 or (_minor == 23 and _patch >= 10781012)
12-
| extend ManagerName = tostring(Properties.managername)
1311
| extend FailureStage = tostring(Properties.failurestage)
14-
| summarize
15-
FailureCount = count(),
16-
AffectedMachines = dcount(VSCodeMachineId)
17-
by ManagerName, FailureStage
18-
| order by ManagerName asc, AffectedMachines desc
12+
| where isnotempty(FailureStage)
13+
| summarize EventCount = count(), Machines = dcount(VSCodeMachineId)
14+
by ManagerName = tostring(Properties.managername), FailureStage, ExtVersion
15+
| order by Machines desc

analysis/kusto/02-error-type-x-failure-stage.kql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
1-
// Query 2 — Error Type × Failure Stage Matrix (28 days)
1+
// Query 2 — Error Type × Failure Stage Matrix
22
// Cross-tabulates errorType (what kind of error) with failureStage (where it happened).
33
// This is the key diagnostic view: e.g., "connection_error at nativeFinderRefresh" means
44
// PET process dies during native finder, while "tool_not_found at pathLookup" means the
55
// tool binary wasn't on PATH. Prioritize cells with the highest AffectedMachines.
6-
let endDate = startofday(now()-1d);
76
RawEventsVSCodeExt
8-
| where ServerTimestamp > endDate-28d and ServerTimestamp < endDate
7+
| where ServerTimestamp > ago(7d)
98
| where EventName == "ms-python.vscode-python-envs/manager_registration.failed"
109
| extend ExtVersion = tostring(Properties["common.extversion"])
11-
| extend _minor = toint(extract("^1\\.(\\d+)", 1, ExtVersion)), _patch = tolong(extract("^1\\.\\d+\\.(\\d+)", 1, ExtVersion))
10+
| extend _minor = toint(extract("^1\\.(\\d+)", 1, ExtVersion))
11+
| extend _patch = tolong(extract("^1\\.\\d+\\.(\\d+)", 1, ExtVersion))
1212
| where _minor > 23 or (_minor == 23 and _patch >= 10781012)
1313
| extend ManagerName = tostring(Properties.managername)
1414
| extend ErrorType = tostring(Properties.errortype)
1515
| extend FailureStage = tostring(Properties.failurestage)
16+
| where isnotempty(FailureStage)
1617
| summarize
1718
FailureCount = count(),
1819
AffectedMachines = dcount(VSCodeMachineId)

analysis/kusto/03-failure-stage-daily-trend.kql

Lines changed: 0 additions & 19 deletions
This file was deleted.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

analysis/kusto/dashboard.ipynb

Lines changed: 23 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,9 @@
156156
"metadata": {},
157157
"source": [
158158
"---\n",
159-
"## 3. Daily Registration Failures by Manager & Stage (14 days)\n",
159+
"## 3. Overall Setup Success Rate (28 days)\n",
160160
"\n",
161-
"Day-by-day failure counts per manager + stage. A spike in one stage on a specific day = regression in that code path."
161+
"Top-level health metric. If this drops, something is broken."
162162
]
163163
},
164164
{
@@ -168,7 +168,7 @@
168168
"metadata": {},
169169
"outputs": [],
170170
"source": [
171-
"df = run_kql_file(client, \"03-failure-stage-daily-trend.kql\")\n",
171+
"df = run_kql_file(client, \"03-overall-setup-success-rate.kql\")\n",
172172
"display(df)"
173173
]
174174
},
@@ -178,41 +178,19 @@
178178
"metadata": {},
179179
"source": [
180180
"---\n",
181-
"## 4. Overall Setup Success Rate (28 days)\n",
182-
"\n",
183-
"Top-level health metric. If this drops, something is broken."
184-
]
185-
},
186-
{
187-
"cell_type": "code",
188-
"execution_count": null,
189-
"id": "13",
190-
"metadata": {},
191-
"outputs": [],
192-
"source": [
193-
"df = run_kql_file(client, \"04-overall-setup-success-rate.kql\")\n",
194-
"display(df)"
195-
]
196-
},
197-
{
198-
"cell_type": "markdown",
199-
"id": "14",
200-
"metadata": {},
201-
"source": [
202-
"---\n",
203-
"## 5. Manager Availability\n",
181+
"## 4. Manager Availability\n",
204182
"\n",
205183
"What tools do users actually have installed? Shows registered vs skipped vs failed per manager."
206184
]
207185
},
208186
{
209187
"cell_type": "code",
210188
"execution_count": null,
211-
"id": "15",
189+
"id": "13",
212190
"metadata": {},
213191
"outputs": [],
214192
"source": [
215-
"df = run_kql_file(client, \"05-manager-availability.kql\")\n",
193+
"df = run_kql_file(client, \"04-manager-availability.kql\")\n",
216194
"display(df)\n",
217195
"\n",
218196
"if not df.empty:\n",
@@ -241,23 +219,23 @@
241219
},
242220
{
243221
"cell_type": "markdown",
244-
"id": "16",
222+
"id": "14",
245223
"metadata": {},
246224
"source": [
247225
"---\n",
248-
"## 6. Daily Trend (14 days)\n",
226+
"## 5. Daily Trend (14 days)\n",
249227
"\n",
250228
"Day-by-day trend of setup success rate. Check after shipping a new version."
251229
]
252230
},
253231
{
254232
"cell_type": "code",
255233
"execution_count": null,
256-
"id": "17",
234+
"id": "15",
257235
"metadata": {},
258236
"outputs": [],
259237
"source": [
260-
"df = run_kql_file(client, \"06-daily-trend.kql\")\n",
238+
"df = run_kql_file(client, \"05-daily-trend.kql\")\n",
261239
"display(df)\n",
262240
"\n",
263241
"if not df.empty:\n",
@@ -276,23 +254,23 @@
276254
},
277255
{
278256
"cell_type": "markdown",
279-
"id": "18",
257+
"id": "16",
280258
"metadata": {},
281259
"source": [
282260
"---\n",
283-
"## 7. Error Type Distribution\n",
261+
"## 6. Error Type Distribution\n",
284262
"\n",
285263
"Groups all failures by error type across setup and individual managers."
286264
]
287265
},
288266
{
289267
"cell_type": "code",
290268
"execution_count": null,
291-
"id": "19",
269+
"id": "17",
292270
"metadata": {},
293271
"outputs": [],
294272
"source": [
295-
"df = run_kql_file(client, \"07-error-type-distribution.kql\")\n",
273+
"df = run_kql_file(client, \"06-error-type-distribution.kql\")\n",
296274
"display(df)\n",
297275
"\n",
298276
"if not df.empty:\n",
@@ -312,52 +290,52 @@
312290
},
313291
{
314292
"cell_type": "markdown",
315-
"id": "20",
293+
"id": "18",
316294
"metadata": {},
317295
"source": [
318296
"---\n",
319-
"## 8. Hang ↔ Failure Correlation\n",
297+
"## 7. Hang ↔ Failure Correlation\n",
320298
"\n",
321299
"Do hangs always cause failures, or do some self-recover?"
322300
]
323301
},
324302
{
325303
"cell_type": "code",
326304
"execution_count": null,
327-
"id": "21",
305+
"id": "19",
328306
"metadata": {},
329307
"outputs": [],
330308
"source": [
331-
"df = run_kql_file(client, \"08-hang-failure-correlation.kql\")\n",
309+
"df = run_kql_file(client, \"07-hang-failure-correlation.kql\")\n",
332310
"display(df)"
333311
]
334312
},
335313
{
336314
"cell_type": "markdown",
337-
"id": "22",
315+
"id": "20",
338316
"metadata": {},
339317
"source": [
340318
"---\n",
341-
"## 9. Weekly Health Summary\n",
319+
"## 8. Weekly Health Summary\n",
342320
"\n",
343321
"One-stop query for weekly check. Returns all key numbers in a single row."
344322
]
345323
},
346324
{
347325
"cell_type": "code",
348326
"execution_count": null,
349-
"id": "23",
327+
"id": "21",
350328
"metadata": {},
351329
"outputs": [],
352330
"source": [
353-
"df = run_kql_file(client, \"09-weekly-health-summary.kql\")\n",
331+
"df = run_kql_file(client, \"08-weekly-health-summary.kql\")\n",
354332
"display(df)"
355333
]
356334
}
357335
],
358336
"metadata": {
359337
"kernelspec": {
360-
"display_name": ".venv (3.13.12)",
338+
"display_name": ".venv",
361339
"language": "python",
362340
"name": "python3"
363341
},

0 commit comments

Comments
 (0)