Skip to content

Commit 164231b

Browse files
feat(events): add count, timeseries, and field-value-discovery tools
Adds three new MCP tools so that end-to-end runtime-event investigations can be done in a few tool calls instead of paginating event bodies: - count_runtime_events: returns a 16-category × 8-severity histogram for any filter and time window in a single call. No pagination, no truncation. Backed by GET /api/v1/secureEvents/count. - runtime_events_timeseries: buckets event counts over time, grouped by a categorical field (default "severity"). Server picks the coarsest bucket size that fits the rows cap; minimum bucket is 1 minute. Lets the model find when a burst started/ended in two calls (coarse pass + zoom). Backed by GET /api/v1/secureEvents/timeseriesBy. - discover_runtime_event_field_values: enumerates the distinct values of a runtime-events field present in a window, split into "suggested" (active in window) and "other" (known but inactive). Lets the model learn real cluster/rule/image names before writing a filter instead of guessing. Backed by GET /secure/events/v2/eventFields/{field}. Also: - Extracts the runtime-events baseline filter ("not originator in (benchmarks, compliance, cloudsec, scanning, hostscanning)") into a shared helper used by all four runtime-event tools. - Shares the filter-expression DSL documentation across the four tools so the LLM applies identical filter intuition everywhere. - Fixes two filter-DSL examples in list_runtime_events whose syntax was rejected by the live API: 'host.hostName startsWith "web-"' is not accepted (correct form: 'host.hostName starts with "web-"'), and 'container.imageName' is not a valid field (correct forms: 'container.image.repo' and 'container.image.tag'). All three new tools require policy-events.read, the same permission as list_runtime_events and get_event_info. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d7aec54 commit 164231b

14 files changed

Lines changed: 1108 additions & 25 deletions

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,21 @@ The server dynamically filters the available tools based on the permissions asso
150150
- **Required Permission**: `policy-events.read`
151151
- **Sample Prompt**: "Get the process tree for event ID abc123"
152152

153+
- **`count_runtime_events`**
154+
- **Description**: Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories, each bucketed by severity codes "0"-"7".
155+
- **Required Permission**: `policy-events.read`
156+
- **Sample Prompt**: "How many high-severity runtime events fired in cluster 'prod-gke' in the last 24 hours?"
157+
158+
- **`runtime_events_timeseries`**
159+
- **Description**: Bucket runtime security event counts over time, grouped by a categorical field (default `severity`). Use to find when a burst started or ended without paginating; minimum bucket width is 1 minute.
160+
- **Required Permission**: `policy-events.read`
161+
- **Sample Prompt**: "When did the spike in Suspicious Outbound Connection events on cluster 'prod-gke' start and stop?"
162+
163+
- **`discover_runtime_event_field_values`**
164+
- **Description**: Discover the distinct values of a runtime-events field present in a time window. Returns `suggested` (values active in the window) and `other` (values known to the tenant but inactive). Use BEFORE writing filters to avoid guessing cluster, rule, or image names.
165+
- **Required Permission**: `policy-events.read`
166+
- **Sample Prompt**: "Which clusters produced any runtime events in the last hour?" or "What rule names are firing right now?"
167+
153168
- **`run_sysql`**
154169
- **Description**: Execute a pre-written SysQL query directly (use only when user provides explicit query).
155170
- **Required Permission**: `sage.exec`, `risks.read`

cmd/server/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
116116
tools.NewToolListRuntimeEvents(sysdigClient, systemClock),
117117
tools.NewToolGetEventInfo(sysdigClient),
118118
tools.NewToolGetEventProcessTree(sysdigClient),
119+
tools.NewToolCountRuntimeEvents(sysdigClient, systemClock),
120+
tools.NewToolRuntimeEventsTimeseries(sysdigClient, systemClock),
121+
tools.NewToolDiscoverRuntimeEventFieldValues(sysdigClient, systemClock),
119122
tools.NewToolRunSysql(sysdigClient),
120123
tools.NewToolGenerateSysql(sysdigClient),
121124

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package tools
2+
3+
// secureEventsBaseFilter is the filter prefix applied to every runtime-events
4+
// query the server makes on behalf of an LLM. It hides classes of events that
5+
// are noisy at investigation time (benchmarks, posture findings, scanning
6+
// activity) so that user-supplied filters target the runtime signal.
7+
const secureEventsBaseFilter = `not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`
8+
9+
// composeSecureEventsFilter merges the user-supplied filter expression with
10+
// the baseline. An empty userFilter returns the baseline unchanged.
11+
func composeSecureEventsFilter(userFilter string) string {
12+
if userFilter == "" {
13+
return secureEventsBaseFilter
14+
}
15+
return secureEventsBaseFilter + " and " + userFilter
16+
}
17+
18+
// secureEventsFilterDSL is the shared filter-expression description used by
19+
// list_runtime_events, count_runtime_events, runtime_events_timeseries, and
20+
// discover_runtime_event_field_values. Keeping the prose in one place lets the
21+
// LLM apply identical filter intuition across all four tools.
22+
const secureEventsFilterDSL = `Logical filter expression to select runtime security events.
23+
Supports operators: =, !=, in, contains, starts with, exists.
24+
Combine with and/or/not.
25+
Key attributes include: severity (codes "0"-"7"), originator, sourceType, ruleName, rawEventCategory, engine, source, category, kubernetes.cluster.name, host.hostName, container.image.repo, container.image.tag, aws.accountId, azure.subscriptionId, gcp.projectId, policyId, trigger.
26+
27+
To find machine learning (ML) detections (e.g. crypto mining, anomalous logins), use engine or source filters:
28+
- All ML events: 'engine = "machineLearning"'
29+
- AWS ML detections: 'source = "agentless-aws-ml"'
30+
- Okta ML detections: 'source = "agentless-okta-ml"'
31+
- By category: 'category = "machine-learning"'
32+
33+
You can specify the severity of the events based on the following cases:
34+
- high-severity: 'severity in ("0","1","2","3")'
35+
- medium: 'severity in ("4","5")'
36+
- low: 'severity in ("6")'
37+
- info: 'severity in ("7")'
38+
`
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/mark3labs/mcp-go/mcp"
8+
"github.com/mark3labs/mcp-go/server"
9+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
10+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
11+
)
12+
13+
type ToolCountRuntimeEvents struct {
14+
sysdigClient sysdig.ExtendedClientWithResponsesInterface
15+
clock clock.Clock
16+
}
17+
18+
func NewToolCountRuntimeEvents(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolCountRuntimeEvents {
19+
return &ToolCountRuntimeEvents{
20+
sysdigClient: client,
21+
clock: clock,
22+
}
23+
}
24+
25+
func (h *ToolCountRuntimeEvents) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
26+
params := toolRequestToCountParams(request, h.clock)
27+
28+
response, err := h.sysdigClient.GetSecureEventsCountWithResponse(ctx, params)
29+
if err != nil {
30+
return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
31+
}
32+
if response.StatusCode() >= 400 {
33+
return mcp.NewToolResultErrorf("error counting events, status code: %d, response: %s", response.StatusCode(), response.Body), nil
34+
}
35+
36+
return mcp.NewToolResultJSON(response.JSON200)
37+
}
38+
39+
func toolRequestToCountParams(request mcp.CallToolRequest, clock clock.Clock) *sysdig.GetSecureEventsCountParams {
40+
scopeHours := request.GetInt("scope_hours", 1)
41+
to := clock.Now()
42+
from := to.Add(-time.Duration(scopeHours) * time.Hour)
43+
44+
filter := composeSecureEventsFilter(request.GetString("filter_expr", ""))
45+
46+
return &sysdig.GetSecureEventsCountParams{
47+
From: from.UnixNano(),
48+
To: to.UnixNano(),
49+
Filter: &filter,
50+
}
51+
}
52+
53+
func (h *ToolCountRuntimeEvents) RegisterInServer(s *server.MCPServer) {
54+
tool := mcp.NewTool("count_runtime_events",
55+
mcp.WithDescription("Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories (policyEvents, scanningEvents, cloudTrailEvents, mlCloudEvents, oktaEvents, githubEvents, gcpEvents, falcoCloudEvents, admissionControllerEvents, profilingDetectionEvents, awsMlConsoleLoginEvents, hostScanningEvents, benchmarkEvents, complianceEvents, cloudsecEvents, statefulDetectionEvents) where each category carries a `countBySeverity` map keyed \"0\" (highest) through \"7\" (info). Use this when the question is \"how many\" rather than \"which ones\" — it is one call regardless of result size."),
56+
mcp.WithNumber("scope_hours",
57+
mcp.Description("Number of hours back from now to count events over. Maximum 336 (14 days) — the backend rejects wider windows. Default 1."),
58+
mcp.DefaultNumber(1),
59+
),
60+
mcp.WithString("filter_expr",
61+
mcp.Description(secureEventsFilterDSL),
62+
Examples(
63+
`severity in ("0","1","2","3")`,
64+
`ruleName = "Malware Detection"`,
65+
`kubernetes.cluster.name = "cluster1" and severity in ("0","1","2","3")`,
66+
`engine = "machineLearning"`,
67+
`aws.accountId = "123456789012"`,
68+
),
69+
),
70+
mcp.WithOutputSchema[map[string]any](),
71+
mcp.WithReadOnlyHintAnnotation(true),
72+
mcp.WithDestructiveHintAnnotation(false),
73+
WithRequiredPermissions("policy-events.read"),
74+
)
75+
76+
s.AddTool(tool, h.handle)
77+
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
package tools_test
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"time"
8+
9+
"github.com/mark3labs/mcp-go/client"
10+
"github.com/mark3labs/mcp-go/mcp"
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"go.uber.org/mock/gomock"
14+
15+
mocks_clock "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock/mocks"
16+
inframcp "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp"
17+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
18+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
19+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
20+
)
21+
22+
var _ = Describe("ToolCountRuntimeEvents", func() {
23+
var (
24+
mockClient *mocks.MockExtendedClientWithResponsesInterface
25+
mockClock *mocks_clock.MockClock
26+
tool *tools.ToolCountRuntimeEvents
27+
ctrl *gomock.Controller
28+
handler *inframcp.Handler
29+
mcpClient *client.Client
30+
)
31+
32+
BeforeEach(func() {
33+
ctrl = gomock.NewController(GinkgoT())
34+
mockClient = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
35+
mockClient.EXPECT().GetMyPermissionsWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetMyPermissionsResponse{
36+
HTTPResponse: &http.Response{StatusCode: 200},
37+
JSON200: &sysdig.UserPermissions{
38+
Permissions: []string{"policy-events.read"},
39+
},
40+
}, nil).AnyTimes()
41+
mockClock = mocks_clock.NewMockClock(ctrl)
42+
mockClock.EXPECT().Now().AnyTimes().Return(time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC))
43+
tool = tools.NewToolCountRuntimeEvents(mockClient, mockClock)
44+
handler = inframcp.NewHandler("dev", mockClient)
45+
handler.RegisterTools(tool)
46+
47+
var err error
48+
mcpClient, err = handler.ServeInProcessClient()
49+
Expect(err).NotTo(HaveOccurred())
50+
51+
_, err = mcpClient.Initialize(context.Background(), mcp.InitializeRequest{})
52+
Expect(err).NotTo(HaveOccurred())
53+
})
54+
55+
AfterEach(func() {
56+
ctrl.Finish()
57+
})
58+
59+
It("converts a request into count params with baseline filter prepended", func(ctx SpecContext) {
60+
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
61+
func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
62+
Expect(params.From).To(Equal(int64(946677600000000000))) // 2000-01-01 minus 2h
63+
Expect(params.To).To(Equal(int64(946684800000000000))) // 2000-01-01 00:00:00 UTC
64+
Expect(*params.Filter).To(ContainSubstring(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))
65+
Expect(*params.Filter).To(ContainSubstring(`severity = 4`))
66+
67+
body := map[string]any{
68+
"policyEvents": map[string]any{"countBySeverity": map[string]any{"0": 1.0}},
69+
}
70+
return &sysdig.GetSecureEventsCountResponse{
71+
HTTPResponse: &http.Response{StatusCode: 200},
72+
JSON200: &body,
73+
}, nil
74+
})
75+
76+
result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
77+
Params: mcp.CallToolParams{
78+
Name: "count_runtime_events",
79+
Arguments: map[string]any{
80+
"scope_hours": 2,
81+
"filter_expr": "severity = 4",
82+
},
83+
},
84+
})
85+
86+
Expect(err).NotTo(HaveOccurred())
87+
Expect(result.IsError).To(BeFalse())
88+
})
89+
90+
It("uses defaults (1h window, baseline filter only) when no args provided", func(ctx SpecContext) {
91+
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
92+
func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
93+
Expect(params.From).To(Equal(int64(946681200000000000))) // 2000-01-01 minus 1h
94+
Expect(params.To).To(Equal(int64(946684800000000000)))
95+
Expect(*params.Filter).To(Equal(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))
96+
97+
body := map[string]any{}
98+
return &sysdig.GetSecureEventsCountResponse{
99+
HTTPResponse: &http.Response{StatusCode: 200},
100+
JSON200: &body,
101+
}, nil
102+
})
103+
104+
result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
105+
Params: mcp.CallToolParams{
106+
Name: "count_runtime_events",
107+
Arguments: map[string]any{},
108+
},
109+
})
110+
111+
Expect(err).NotTo(HaveOccurred())
112+
Expect(result.IsError).To(BeFalse())
113+
})
114+
115+
It("surfaces a client error as a tool error", func(ctx SpecContext) {
116+
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("client error"))
117+
118+
result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
119+
Params: mcp.CallToolParams{
120+
Name: "count_runtime_events",
121+
Arguments: map[string]any{},
122+
},
123+
})
124+
125+
Expect(err).NotTo(HaveOccurred())
126+
Expect(result.IsError).To(BeTrue())
127+
})
128+
129+
It("surfaces a non-2xx HTTP response as a tool error", func(ctx SpecContext) {
130+
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetSecureEventsCountResponse{
131+
HTTPResponse: &http.Response{StatusCode: 401},
132+
Body: []byte("Unauthorized"),
133+
}, nil)
134+
135+
result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
136+
Params: mcp.CallToolParams{
137+
Name: "count_runtime_events",
138+
Arguments: map[string]any{},
139+
},
140+
})
141+
142+
Expect(err).NotTo(HaveOccurred())
143+
Expect(result.IsError).To(BeTrue())
144+
})
145+
})
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/mark3labs/mcp-go/mcp"
8+
"github.com/mark3labs/mcp-go/server"
9+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
10+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
11+
)
12+
13+
type ToolDiscoverRuntimeEventFieldValues struct {
14+
sysdigClient sysdig.ExtendedClientWithResponsesInterface
15+
clock clock.Clock
16+
}
17+
18+
func NewToolDiscoverRuntimeEventFieldValues(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolDiscoverRuntimeEventFieldValues {
19+
return &ToolDiscoverRuntimeEventFieldValues{
20+
sysdigClient: client,
21+
clock: clock,
22+
}
23+
}
24+
25+
func (h *ToolDiscoverRuntimeEventFieldValues) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
26+
field := request.GetString("field", "")
27+
if field == "" {
28+
return mcp.NewToolResultErrorf("field is required"), nil
29+
}
30+
31+
scopeHours := request.GetInt("scope_hours", 1)
32+
to := h.clock.Now()
33+
from := to.Add(-time.Duration(scopeHours) * time.Hour)
34+
35+
params := &sysdig.GetEventFieldValuesParams{
36+
Field: field,
37+
From: from.UnixNano(),
38+
To: to.UnixNano(),
39+
}
40+
if filterExpr := request.GetString("filter_expr", ""); filterExpr != "" {
41+
params.Filter = &filterExpr
42+
}
43+
44+
response, err := h.sysdigClient.GetEventFieldValuesWithResponse(ctx, params)
45+
if err != nil {
46+
return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
47+
}
48+
if response.StatusCode() >= 400 {
49+
return mcp.NewToolResultErrorf("error discovering field values, status code: %d, response: %s", response.StatusCode(), response.Body), nil
50+
}
51+
52+
return mcp.NewToolResultJSON(response.JSON200)
53+
}
54+
55+
func (h *ToolDiscoverRuntimeEventFieldValues) RegisterInServer(s *server.MCPServer) {
56+
tool := mcp.NewTool("discover_runtime_event_field_values",
57+
mcp.WithDescription("Discover the distinct values of a runtime-events field present in a time window. Returns two buckets: `suggested` = values producing events in the window (fire order — what's actually happening); `other` = values known to the tenant but inactive in the window (catalog — what's possible). Use BEFORE writing a filter to learn which cluster / rule / image / namespace names are real, instead of guessing and getting empty results. Common fields to discover: kubernetes.cluster.name, kubernetes.namespace.name, ruleName, container.image.repo, host.hostName, aws.accountId, source, engine."),
58+
mcp.WithString("field",
59+
mcp.Description("Field whose distinct values to enumerate. Examples: kubernetes.cluster.name, ruleName, container.image.repo, host.hostName, severity, source, engine."),
60+
mcp.Required(),
61+
Examples(
62+
"kubernetes.cluster.name",
63+
"ruleName",
64+
"container.image.repo",
65+
"host.hostName",
66+
"severity",
67+
"source",
68+
"engine",
69+
"aws.accountId",
70+
),
71+
),
72+
mcp.WithNumber("scope_hours",
73+
mcp.Description("Number of hours back from now to scan. Maximum 336 (14 days). Default 1."),
74+
mcp.DefaultNumber(1),
75+
),
76+
mcp.WithString("filter_expr",
77+
mcp.Description("Optional filter expression to scope the search before enumerating values. Same DSL as other runtime-event tools. Without a filter, the enumeration spans all categories of events in the window."),
78+
Examples(
79+
`kubernetes.cluster.name = "production-gke"`,
80+
`engine = "machineLearning"`,
81+
`severity in ("0","1","2","3")`,
82+
),
83+
),
84+
mcp.WithOutputSchema[map[string]any](),
85+
mcp.WithReadOnlyHintAnnotation(true),
86+
mcp.WithDestructiveHintAnnotation(false),
87+
WithRequiredPermissions("policy-events.read"),
88+
)
89+
90+
s.AddTool(tool, h.handle)
91+
}

0 commit comments

Comments
 (0)