microsoft · tmeschter · Jun 22, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 16, 2026
diff --git a/.github/workflows/test-all-integration.yml b/.github/workflows/test-all-integration.yml
@@ -397,3 +397,16 @@ jobs:
           BRANCH: ${{ github.ref_name }}
           RUN_ID: ${{ github.run_id }}
         run: npm run upload:token-usage
+
+      # Upload per-run tool usage (one row per tool call) to the Azure Table that
+      # powers the dashboard's per-run tool review. Runs for both scheduled and manual runs.
+      # Note: The managed identity must have Storage Table Data Contributor on the storage account.
+      - name: Upload tool usage to table
+        if: always() && vars.REPORT_STORAGE_ACCOUNT != ''
+        env:
+          TOOL_USAGE_STORAGE_ACCOUNT: ${{ vars.REPORT_STORAGE_ACCOUNT }}
+          TOOL_USAGE_TABLE_NAME: ${{ vars.TOOL_USAGE_TABLE || 'integrationtoolusage' }}
+          SKILL: ${{ matrix.skill }}
+          BRANCH: ${{ github.ref_name }}
+          RUN_ID: ${{ github.run_id }}
+        run: npm run upload:tool-usage
diff --git a/.github/workflows/test-azure-deploy.yml b/.github/workflows/test-azure-deploy.yml
@@ -271,3 +271,16 @@ jobs:
           BRANCH: ${{ github.ref_name }}
           RUN_ID: ${{ github.run_id }}
         run: npm run upload:token-usage
+
+      # Upload per-run tool usage (one row per tool call) to the Azure Table that
+      # powers the dashboard's per-run tool review. Runs for both scheduled and manual runs.
+      # Note: The managed identity must have Storage Table Data Contributor on the storage account.
+      - name: Upload tool usage to table
+        if: always() && vars.REPORT_STORAGE_ACCOUNT != ''
+        env:
+          TOOL_USAGE_STORAGE_ACCOUNT: ${{ vars.REPORT_STORAGE_ACCOUNT }}
+          TOOL_USAGE_TABLE_NAME: ${{ vars.TOOL_USAGE_TABLE || 'integrationtoolusage' }}
+          SKILL: azure-deploy
+          BRANCH: ${{ github.ref_name }}
+          RUN_ID: ${{ github.run_id }}
+        run: npm run upload:tool-usage
diff --git a/dashboard/api/src/functions/getData.ts b/dashboard/api/src/functions/getData.ts
@@ -15,11 +15,13 @@ import { logRequestIdentity } from "../requestIdentity";
  * 5. ${DATE}/${RUN_ID}/{skill-name}/{arbitrary-test-case-name}/agent-metadata-{datetime}{optional-dedupe-suffix}.md
  * 6. ${DATE}/${RUN_ID}/{skill-name}/{arbitrary-test-case-name}/agent-metadata.json
  * 7. ${DATE}/${RUN_ID}/{skill-name}/{arbitrary-test-case-name}/token-usage.json
+ * 8. ${DATE}/${RUN_ID}/{skill-name}/{arbitrary-test-case-name}/tool-usage-{datetime}{optional-dedupe-suffix}.json
  * 
  * The test-run-{datetime}-{skill-name}-SKILL-REPORT.md is unique per skill. It is a summarized version of the result of all test runs in its job.
  * The test-consolidated-report.md is unique per test case. It is a summarized version of the result of all agent runs for its test case.
  * The agent-metadata-{datetime}{optional-dedupe-suffix}.md captures the details of each agent run for its test case.
- * token-usage.json and agent-metadata.json should not be exposed for now.
+ * The tool-usage-{datetime}{optional-dedupe-suffix}.json captures the ordered tool calls of each agent run, named to match its agent-metadata-*.md report.
+ * token-usage.json, agent-metadata.json, and tool-usage-*.json should not be exposed for now.
  * 
  * For azure-deploy skill:
  * 1. ${DATE}/${RUN_ID}/{skill-name}/{test-group}/test-run-{datetime}-{skill-name}-SKILL-REPORT.md
@@ -29,6 +31,7 @@ import { logRequestIdentity } from "../requestIdentity";
  * 5. ${DATE}/${RUN_ID}/{skill-name}/{test-group}/{arbitrary-test-case-name}/agent-metadata-{datetime}{optional-dedupe-suffix}.md
  * 6. ${DATE}/${RUN_ID}/{skill-name}/{test-group}/{arbitrary-test-case-name}/agent-metadata.json
  * 7. ${DATE}/${RUN_ID}/{skill-name}/{test-group}/{arbitrary-test-case-name}/token-usage.json
+ * 8. ${DATE}/${RUN_ID}/{skill-name}/{test-group}/{arbitrary-test-case-name}/tool-usage-{datetime}{optional-dedupe-suffix}.json
  * 
  * All ${DATE} are in the format of yyyy-mm-dd.
  */

diff --git a/dashboard/api/src/functions/getToolUsage.ts b/dashboard/api/src/functions/getToolUsage.ts
@@ -0,0 +1,132 @@
+import { app, HttpRequest, HttpResponseInit, InvocationContext } from "@azure/functions";
+import { TableClient } from "@azure/data-tables";
+import { AzureCliCredential, ManagedIdentityCredential } from "@azure/identity";
+import { logRequestIdentity } from "../requestIdentity";
+
+const STORAGE_ACCOUNT_NAME = process.env.STORAGE_ACCOUNT_NAME;
+const TOOL_USAGE_TABLE_NAME = process.env.TOOL_USAGE_TABLE_NAME;
+
+function getToolUsageTableClient(): TableClient {
+    if (!STORAGE_ACCOUNT_NAME) {
+        throw new Error("STORAGE_ACCOUNT_NAME environment variable is not set");
+    }
+    if (!TOOL_USAGE_TABLE_NAME) {
+        throw new Error("TOOL_USAGE_TABLE_NAME environment variable is not set");
+    }
+    const clientId = process.env.AZURE_CLIENT_ID;
+    const isDevEnvironment = process.env.AZURE_FUNCTIONS_ENVIRONMENT === "Development";
+    const credential = isDevEnvironment ? new AzureCliCredential() : new ManagedIdentityCredential(clientId!);
+    return new TableClient(
+        `https://${STORAGE_ACCOUNT_NAME}.table.core.windows.net`,
+        TOOL_USAGE_TABLE_NAME,
+        credential
+    );
+}
+
+/** Escape a value for use inside an OData string literal (single quotes are doubled). */
+function odataLiteral(value: string): string {
+    return value.replace(/'/g, "''");
+}
+
+/**
+ * Build the OData filter for tool-usage queries from optional equality filters.
+ * Returns undefined when no filters are provided.
+ */
+export function buildToolUsageFilter(filters: {
+    skill?: string;
+    test?: string;
+    branch?: string;
+    runId?: string;
+    runToken?: string;
+    runDate?: string;
+}): string | undefined {
+    const clauses: string[] = [];
+    if (filters.skill) clauses.push(`skill eq '${odataLiteral(filters.skill)}'`);
+    if (filters.test) clauses.push(`testName eq '${odataLiteral(filters.test)}'`);
+    if (filters.branch) clauses.push(`branch eq '${odataLiteral(filters.branch)}'`);
+    if (filters.runId) clauses.push(`runId eq '${odataLiteral(filters.runId)}'`);
+    if (filters.runToken) clauses.push(`runToken eq '${odataLiteral(filters.runToken)}'`);
+    if (filters.runDate) clauses.push(`runDate eq '${odataLiteral(filters.runDate)}'`);
+    return clauses.length > 0 ? clauses.join(" and ") : undefined;
+}
+
+/**
+ * Returns integration-test tool usage rows from the table.
+ * GET /api/tool-usage
+ * Query params: skill (optional), test (optional), branch (optional),
+ *               runId (optional), runToken (optional), runDate (optional)
+ *
+ * Each row represents a single tool call in one run. Full tool arguments are not
+ * stored here — they live in the per-run blob and are fetched on demand.
+ */
+async function getToolUsage(request: HttpRequest, context: InvocationContext): Promise<HttpResponseInit> {
+    logRequestIdentity(request, context, "getToolUsage");
+
+    const filter = buildToolUsageFilter({
+        skill: request.query.get("skill") || undefined,
+        test: request.query.get("test") || undefined,
+        branch: request.query.get("branch") || undefined,
+        runId: request.query.get("runId") || undefined,
+        runToken: request.query.get("runToken") || undefined,
+        runDate: request.query.get("runDate") || undefined,
+    });
+
+    // Require at least one filter. An unfiltered scan of the one-row-per-tool-call
+    // table can be very large and risks timeouts / excessive storage reads.
+    if (!filter) {
+        return {
+            status: 400,
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+                error: "At least one filter is required: skill, test, branch, runId, runToken, or runDate.",
+            }),
+        };
+    }
+
+    try {
+        const tableClient = getToolUsageTableClient();
+        const listOptions = { queryOptions: { filter } };
+        const entities: Record<string, unknown>[] = [];
+
+        for await (const entity of tableClient.listEntities(listOptions)) {
+            entities.push({
+                skill: entity.skill,
+                testName: entity.testName,
+                branch: entity.branch,
+                runId: entity.runId,
+                runDate: entity.runDate,
+                runTimestamp: entity.runTimestamp,
+                runToken: entity.runToken,
+                reportFile: entity.reportFile,
+                sessionId: entity.sessionId,
+                model: entity.model,
+                order: entity.order,
+                toolName: entity.toolName,
+                toolCallId: entity.toolCallId,
+                successState: entity.successState,
+                durationMs: entity.durationMs,
+                outputBytes: entity.outputBytes,
+            });
+        }
+
+        return {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify(entities),
+        };
+    } catch (err: any) {
+        context.error("Error querying tool usage:", err?.message ?? err);
+        return {
+            status: 500,
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({ error: "Failed to query tool usage" }),
+        };
+    }
+}
+
+app.http("getToolUsage", {
+    methods: ["GET"],
+    authLevel: "anonymous",
+    route: "tool-usage",
+    handler: getToolUsage,
+});
diff --git a/dashboard/infra/main.bicep b/dashboard/infra/main.bicep
@@ -21,6 +21,9 @@ param msbenchReportsContainerName string = 'msbench-reports'
 @description('Name of the Azure Table that stores integration-test token usage history.')
 param tokenUsageTableName string = 'integrationtokenusage'
 
+@description('Name of the Azure Table that stores integration-test per-run tool usage history.')
+param toolUsageTableName string = 'integrationtoolusage'
+
 @description('Principal (object) ID of the user-assigned managed identity used by the integration test pipeline to write token usage rows (skillcitestidentity).')
 param ciTestIdentityPrincipalId string = '531282f7-49cb-4149-af74-6c84a5270e87'
 
@@ -76,6 +79,7 @@ module storage './modules/storage.bicep' = {
     environmentName: environmentName
     principalId: identity.outputs.identityPrincipalId
     tokenUsageTableName: tokenUsageTableName
+    toolUsageTableName: toolUsageTableName
     ciTestIdentityPrincipalId: ciTestIdentityPrincipalId
   }
 }
@@ -99,6 +103,7 @@ module functionApp './modules/function-app.bicep' = {
     userAssignedIdentityClientId: identity.outputs.identityClientId
     storageAccountName: storage.outputs.storageAccountName
     tokenUsageTableName: storage.outputs.tokenUsageTableName
+    toolUsageTableName: storage.outputs.toolUsageTableName
     msbenchStorageAccountName: msbenchStorageAccountName
     msbenchEvalTableName: msbenchEvalTableName
     msbenchReportsContainerName: msbenchReportsContainerName

diff --git a/dashboard/infra/modules/function-app.bicep b/dashboard/infra/modules/function-app.bicep
@@ -21,6 +21,9 @@ param storageAccountName string
 @description('Name of the Azure Table that stores integration-test token usage history.')
 param tokenUsageTableName string
 
+@description('Name of the Azure Table that stores integration-test per-run tool usage history.')
+param toolUsageTableName string
+
 @description('Application Insights connection string for monitoring.')
 param appInsightsConnectionString string
 
@@ -118,6 +121,7 @@ resource functionApp 'Microsoft.Web/sites@2024-04-01' = {
         { name: 'AZURE_CLIENT_ID', value: userAssignedIdentityClientId }
         { name: 'STORAGE_ACCOUNT_NAME', value: storageAccountName }
         { name: 'TOKEN_USAGE_TABLE_NAME', value: tokenUsageTableName }
+        { name: 'TOOL_USAGE_TABLE_NAME', value: toolUsageTableName }
         { name: 'MSBENCH_STORAGE_ACCOUNT', value: msbenchStorageAccountName }
         { name: 'MSBENCH_REPORTS_CONTAINER', value: msbenchReportsContainerName }
         { name: 'MSBENCH_EVAL_TABLE_NAME', value: msbenchEvalTableName }

diff --git a/dashboard/infra/modules/storage.bicep b/dashboard/infra/modules/storage.bicep
@@ -15,6 +15,9 @@ param principalId string
 @description('Name of the Azure Table that stores integration-test token usage history.')
 param tokenUsageTableName string = 'integrationtokenusage'
 
+@description('Name of the Azure Table that stores integration-test per-run tool usage history.')
+param toolUsageTableName string = 'integrationtoolusage'
+
 @description('Principal (object) ID of the user-assigned managed identity used by the integration test pipeline to write token usage rows (skillcitestidentity in the skillcitest resource group, GithubCopilotForAzure-Testing subscription).')
 param ciTestIdentityPrincipalId string = '531282f7-49cb-4149-af74-6c84a5270e87'
 
@@ -97,6 +100,11 @@ resource tokenUsageTable 'Microsoft.Storage/storageAccounts/tableServices/tables
   name: tokenUsageTableName
 }
 
+resource toolUsageTable 'Microsoft.Storage/storageAccounts/tableServices/tables@2023-05-01' = {
+  parent: tableServices
+  name: toolUsageTableName
+}
+
 resource storageBlobDataReaderRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
   name: guid(storageAccount.id, principalId, storageBlobDataReaderRoleId)
   scope: storageAccount
@@ -107,7 +115,7 @@ resource storageBlobDataReaderRole 'Microsoft.Authorization/roleAssignments@2022
   }
 }
 
-// Allows the dashboard Function App identity to read token-usage entities from the table.
+// Allows the dashboard Function App identity to read token-usage and tool-usage entities from the tables.
 resource storageTableDataReaderRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
   name: guid(storageAccount.id, principalId, storageTableDataReaderRoleId)
   scope: storageAccount
@@ -118,7 +126,7 @@ resource storageTableDataReaderRole 'Microsoft.Authorization/roleAssignments@202
   }
 }
 
-// Allows the integration test pipeline identity to write token-usage entities to the table.
+// Allows the integration test pipeline identity to write token-usage and tool-usage entities to the tables.
 resource storageTableDataContributorRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
   name: guid(storageAccount.id, ciTestIdentityPrincipalId, storageTableDataContributorRoleId)
   scope: storageAccount
@@ -131,3 +139,4 @@ resource storageTableDataContributorRole 'Microsoft.Authorization/roleAssignment
 
 output storageAccountName string = storageAccount.name
 output tokenUsageTableName string = tokenUsageTable.name
+output toolUsageTableName string = toolUsageTable.name