sysdiglabs
diff --git a/‎README.md‎
Lines changed: 15 additions & 0 deletions b/‎README.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎cmd/server/main.go‎
Lines changed: 3 additions & 0 deletions b/‎cmd/server/main.go‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/infra/mcp/tools/secure_events_common.go‎
Lines changed: 38 additions & 0 deletions b/‎internal/infra/mcp/tools/secure_events_common.go‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎internal/infra/mcp/tools/tool_count_runtime_events.go‎
Lines changed: 77 additions & 0 deletions b/‎internal/infra/mcp/tools/tool_count_runtime_events.go‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎internal/infra/mcp/tools/tool_count_runtime_events_test.go‎
Lines changed: 145 additions & 0 deletions b/‎internal/infra/mcp/tools/tool_count_runtime_events_test.go‎
Lines changed: 145 additions & 0 deletions
diff --git a/‎internal/infra/mcp/tools/tool_discover_runtime_event_field_values.go‎
Lines changed: 91 additions & 0 deletions b/‎internal/infra/mcp/tools/tool_discover_runtime_event_field_values.go‎
Lines changed: 91 additions & 0 deletions
@@ -150,6 +150,21 @@ The server dynamically filters the available tools based on the permissions asso
   - **Required Permission**: `policy-events.read`
   - **Sample Prompt**: "Get the process tree for event ID abc123"
 
+- **`count_runtime_events`**
+  - **Description**: Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories, each bucketed by severity codes "0"-"7".
+  - **Required Permission**: `policy-events.read`
+  - **Sample Prompt**: "How many high-severity runtime events fired in cluster 'prod-gke' in the last 24 hours?"
+
+- **`runtime_events_timeseries`**
+  - **Description**: Bucket runtime security event counts over time, grouped by a categorical field (default `severity`). Use to find when a burst started or ended without paginating; minimum bucket width is 1 minute.
+  - **Required Permission**: `policy-events.read`
+  - **Sample Prompt**: "When did the spike in Suspicious Outbound Connection events on cluster 'prod-gke' start and stop?"
+
+- **`discover_runtime_event_field_values`**
+  - **Description**: Discover the distinct values of a runtime-events field present in a time window. Returns `suggested` (values active in the window) and `other` (values known to the tenant but inactive). Use BEFORE writing filters to avoid guessing cluster, rule, or image names.
+  - **Required Permission**: `policy-events.read`
+  - **Sample Prompt**: "Which clusters produced any runtime events in the last hour?" or "What rule names are firing right now?"
+
 - **`run_sysql`**
   - **Description**: Execute a pre-written SysQL query directly (use only when user provides explicit query).
   - **Required Permission**: `sage.exec`, `risks.read`
 
@@ -116,6 +116,9 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
 		tools.NewToolListRuntimeEvents(sysdigClient, systemClock),
 		tools.NewToolGetEventInfo(sysdigClient),
 		tools.NewToolGetEventProcessTree(sysdigClient),
+		tools.NewToolCountRuntimeEvents(sysdigClient, systemClock),
+		tools.NewToolRuntimeEventsTimeseries(sysdigClient, systemClock),
+		tools.NewToolDiscoverRuntimeEventFieldValues(sysdigClient, systemClock),
 		tools.NewToolRunSysql(sysdigClient),
 		tools.NewToolGenerateSysql(sysdigClient),
 
 
@@ -0,0 +1,38 @@
+package tools
+
+// secureEventsBaseFilter is the filter prefix applied to every runtime-events
+// query the server makes on behalf of an LLM. It hides classes of events that
+// are noisy at investigation time (benchmarks, posture findings, scanning
+// activity) so that user-supplied filters target the runtime signal.
+const secureEventsBaseFilter = `not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`
+
+// composeSecureEventsFilter merges the user-supplied filter expression with
+// the baseline. An empty userFilter returns the baseline unchanged.
+func composeSecureEventsFilter(userFilter string) string {
+	if userFilter == "" {
+		return secureEventsBaseFilter
+	}
+	return secureEventsBaseFilter + " and " + userFilter
+}
+
+// secureEventsFilterDSL is the shared filter-expression description used by
+// list_runtime_events, count_runtime_events, runtime_events_timeseries, and
+// discover_runtime_event_field_values. Keeping the prose in one place lets the
+// LLM apply identical filter intuition across all four tools.
+const secureEventsFilterDSL = `Logical filter expression to select runtime security events.
+Supports operators: =, !=, in, contains, starts with, exists.
+Combine with and/or/not.
+Key attributes include: severity (codes "0"-"7"), originator, sourceType, ruleName, rawEventCategory, engine, source, category, kubernetes.cluster.name, host.hostName, container.image.repo, container.image.tag, aws.accountId, azure.subscriptionId, gcp.projectId, policyId, trigger.
+
+To find machine learning (ML) detections (e.g. crypto mining, anomalous logins), use engine or source filters:
+- All ML events: 'engine = "machineLearning"'
+- AWS ML detections: 'source = "agentless-aws-ml"'
+- Okta ML detections: 'source = "agentless-okta-ml"'
+- By category: 'category = "machine-learning"'
+
+You can specify the severity of the events based on the following cases:
+- high-severity: 'severity in ("0","1","2","3")'
+- medium: 'severity in ("4","5")'
+- low: 'severity in ("6")'
+- info: 'severity in ("7")'
+`
@@ -0,0 +1,77 @@
+package tools
+
+import (
+	"context"
+	"time"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
+)
+
+type ToolCountRuntimeEvents struct {
+	sysdigClient sysdig.ExtendedClientWithResponsesInterface
+	clock        clock.Clock
+}
+
+func NewToolCountRuntimeEvents(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolCountRuntimeEvents {
+	return &ToolCountRuntimeEvents{
+		sysdigClient: client,
+		clock:        clock,
+	}
+}
+
+func (h *ToolCountRuntimeEvents) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+	params := toolRequestToCountParams(request, h.clock)
+
+	response, err := h.sysdigClient.GetSecureEventsCountWithResponse(ctx, params)
+	if err != nil {
+		return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
+	}
+	if response.StatusCode() >= 400 {
+		return mcp.NewToolResultErrorf("error counting events, status code: %d, response: %s", response.StatusCode(), response.Body), nil
+	}
+
+	return mcp.NewToolResultJSON(response.JSON200)
+}
+
+func toolRequestToCountParams(request mcp.CallToolRequest, clock clock.Clock) *sysdig.GetSecureEventsCountParams {
+	scopeHours := request.GetInt("scope_hours", 1)
+	to := clock.Now()
+	from := to.Add(-time.Duration(scopeHours) * time.Hour)
+
+	filter := composeSecureEventsFilter(request.GetString("filter_expr", ""))
+
+	return &sysdig.GetSecureEventsCountParams{
+		From:   from.UnixNano(),
+		To:     to.UnixNano(),
+		Filter: &filter,
+	}
+}
+
+func (h *ToolCountRuntimeEvents) RegisterInServer(s *server.MCPServer) {
+	tool := mcp.NewTool("count_runtime_events",
+		mcp.WithDescription("Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories (policyEvents, scanningEvents, cloudTrailEvents, mlCloudEvents, oktaEvents, githubEvents, gcpEvents, falcoCloudEvents, admissionControllerEvents, profilingDetectionEvents, awsMlConsoleLoginEvents, hostScanningEvents, benchmarkEvents, complianceEvents, cloudsecEvents, statefulDetectionEvents) where each category carries a `countBySeverity` map keyed \"0\" (highest) through \"7\" (info). Use this when the question is \"how many\" rather than \"which ones\" — it is one call regardless of result size."),
+		mcp.WithNumber("scope_hours",
+			mcp.Description("Number of hours back from now to count events over. Maximum 336 (14 days) — the backend rejects wider windows. Default 1."),
+			mcp.DefaultNumber(1),
+		),
+		mcp.WithString("filter_expr",
+			mcp.Description(secureEventsFilterDSL),
+			Examples(
+				`severity in ("0","1","2","3")`,
+				`ruleName = "Malware Detection"`,
+				`kubernetes.cluster.name = "cluster1" and severity in ("0","1","2","3")`,
+				`engine = "machineLearning"`,
+				`aws.accountId = "123456789012"`,
+			),
+		),
+		mcp.WithOutputSchema[map[string]any](),
+		mcp.WithReadOnlyHintAnnotation(true),
+		mcp.WithDestructiveHintAnnotation(false),
+		WithRequiredPermissions("policy-events.read"),
+	)
+
+	s.AddTool(tool, h.handle)
+}
@@ -0,0 +1,145 @@
+package tools_test
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/mark3labs/mcp-go/client"
+	"github.com/mark3labs/mcp-go/mcp"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"go.uber.org/mock/gomock"
+
+	mocks_clock "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock/mocks"
+	inframcp "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
+)
+
+var _ = Describe("ToolCountRuntimeEvents", func() {
+	var (
+		mockClient *mocks.MockExtendedClientWithResponsesInterface
+		mockClock  *mocks_clock.MockClock
+		tool       *tools.ToolCountRuntimeEvents
+		ctrl       *gomock.Controller
+		handler    *inframcp.Handler
+		mcpClient  *client.Client
+	)
+
+	BeforeEach(func() {
+		ctrl = gomock.NewController(GinkgoT())
+		mockClient = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
+		mockClient.EXPECT().GetMyPermissionsWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetMyPermissionsResponse{
+			HTTPResponse: &http.Response{StatusCode: 200},
+			JSON200: &sysdig.UserPermissions{
+				Permissions: []string{"policy-events.read"},
+			},
+		}, nil).AnyTimes()
+		mockClock = mocks_clock.NewMockClock(ctrl)
+		mockClock.EXPECT().Now().AnyTimes().Return(time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC))
+		tool = tools.NewToolCountRuntimeEvents(mockClient, mockClock)
+		handler = inframcp.NewHandler("dev", mockClient)
+		handler.RegisterTools(tool)
+
+		var err error
+		mcpClient, err = handler.ServeInProcessClient()
+		Expect(err).NotTo(HaveOccurred())
+
+		_, err = mcpClient.Initialize(context.Background(), mcp.InitializeRequest{})
+		Expect(err).NotTo(HaveOccurred())
+	})
+
+	AfterEach(func() {
+		ctrl.Finish()
+	})
+
+	It("converts a request into count params with baseline filter prepended", func(ctx SpecContext) {
+		mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
+			func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
+				Expect(params.From).To(Equal(int64(946677600000000000))) // 2000-01-01 minus 2h
+				Expect(params.To).To(Equal(int64(946684800000000000)))   // 2000-01-01 00:00:00 UTC
+				Expect(*params.Filter).To(ContainSubstring(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))
+				Expect(*params.Filter).To(ContainSubstring(`severity = 4`))
+
+				body := map[string]any{
+					"policyEvents": map[string]any{"countBySeverity": map[string]any{"0": 1.0}},
+				}
+				return &sysdig.GetSecureEventsCountResponse{
+					HTTPResponse: &http.Response{StatusCode: 200},
+					JSON200:      &body,
+				}, nil
+			})
+
+		result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
+			Params: mcp.CallToolParams{
+				Name: "count_runtime_events",
+				Arguments: map[string]any{
+					"scope_hours": 2,
+					"filter_expr": "severity = 4",
+				},
+			},
+		})
+
+		Expect(err).NotTo(HaveOccurred())
+		Expect(result.IsError).To(BeFalse())
+	})
+
+	It("uses defaults (1h window, baseline filter only) when no args provided", func(ctx SpecContext) {
+		mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
+			func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
+				Expect(params.From).To(Equal(int64(946681200000000000))) // 2000-01-01 minus 1h
+				Expect(params.To).To(Equal(int64(946684800000000000)))
+				Expect(*params.Filter).To(Equal(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))
+
+				body := map[string]any{}
+				return &sysdig.GetSecureEventsCountResponse{
+					HTTPResponse: &http.Response{StatusCode: 200},
+					JSON200:      &body,
+				}, nil
+			})
+
+		result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
+			Params: mcp.CallToolParams{
+				Name:      "count_runtime_events",
+				Arguments: map[string]any{},
+			},
+		})
+
+		Expect(err).NotTo(HaveOccurred())
+		Expect(result.IsError).To(BeFalse())
+	})
+
+	It("surfaces a client error as a tool error", func(ctx SpecContext) {
+		mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("client error"))
+
+		result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
+			Params: mcp.CallToolParams{
+				Name:      "count_runtime_events",
+				Arguments: map[string]any{},
+			},
+		})
+
+		Expect(err).NotTo(HaveOccurred())
+		Expect(result.IsError).To(BeTrue())
+	})
+
+	It("surfaces a non-2xx HTTP response as a tool error", func(ctx SpecContext) {
+		mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetSecureEventsCountResponse{
+			HTTPResponse: &http.Response{StatusCode: 401},
+			Body:         []byte("Unauthorized"),
+		}, nil)
+
+		result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
+			Params: mcp.CallToolParams{
+				Name:      "count_runtime_events",
+				Arguments: map[string]any{},
+			},
+		})
+
+		Expect(err).NotTo(HaveOccurred())
+		Expect(result.IsError).To(BeTrue())
+	})
+})
@@ -0,0 +1,91 @@
+package tools
+
+import (
+	"context"
+	"time"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
+	"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
+)
+
+type ToolDiscoverRuntimeEventFieldValues struct {
+	sysdigClient sysdig.ExtendedClientWithResponsesInterface
+	clock        clock.Clock
+}
+
+func NewToolDiscoverRuntimeEventFieldValues(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolDiscoverRuntimeEventFieldValues {
+	return &ToolDiscoverRuntimeEventFieldValues{
+		sysdigClient: client,
+		clock:        clock,
+	}
+}
+
+func (h *ToolDiscoverRuntimeEventFieldValues) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+	field := request.GetString("field", "")
+	if field == "" {
+		return mcp.NewToolResultErrorf("field is required"), nil
+	}
+
+	scopeHours := request.GetInt("scope_hours", 1)
+	to := h.clock.Now()
+	from := to.Add(-time.Duration(scopeHours) * time.Hour)
+
+	params := &sysdig.GetEventFieldValuesParams{
+		Field: field,
+		From:  from.UnixNano(),
+		To:    to.UnixNano(),
+	}
+	if filterExpr := request.GetString("filter_expr", ""); filterExpr != "" {
+		params.Filter = &filterExpr
+	}
+
+	response, err := h.sysdigClient.GetEventFieldValuesWithResponse(ctx, params)
+	if err != nil {
+		return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
+	}
+	if response.StatusCode() >= 400 {
+		return mcp.NewToolResultErrorf("error discovering field values, status code: %d, response: %s", response.StatusCode(), response.Body), nil
+	}
+
+	return mcp.NewToolResultJSON(response.JSON200)
+}
+
+func (h *ToolDiscoverRuntimeEventFieldValues) RegisterInServer(s *server.MCPServer) {
+	tool := mcp.NewTool("discover_runtime_event_field_values",
+		mcp.WithDescription("Discover the distinct values of a runtime-events field present in a time window. Returns two buckets: `suggested` = values producing events in the window (fire order — what's actually happening); `other` = values known to the tenant but inactive in the window (catalog — what's possible). Use BEFORE writing a filter to learn which cluster / rule / image / namespace names are real, instead of guessing and getting empty results. Common fields to discover: kubernetes.cluster.name, kubernetes.namespace.name, ruleName, container.image.repo, host.hostName, aws.accountId, source, engine."),
+		mcp.WithString("field",
+			mcp.Description("Field whose distinct values to enumerate. Examples: kubernetes.cluster.name, ruleName, container.image.repo, host.hostName, severity, source, engine."),
+			mcp.Required(),
+			Examples(
+				"kubernetes.cluster.name",
+				"ruleName",
+				"container.image.repo",
+				"host.hostName",
+				"severity",
+				"source",
+				"engine",
+				"aws.accountId",
+			),
+		),
+		mcp.WithNumber("scope_hours",
+			mcp.Description("Number of hours back from now to scan. Maximum 336 (14 days). Default 1."),
+			mcp.DefaultNumber(1),
+		),
+		mcp.WithString("filter_expr",
+			mcp.Description("Optional filter expression to scope the search before enumerating values. Same DSL as other runtime-event tools. Without a filter, the enumeration spans all categories of events in the window."),
+			Examples(
+				`kubernetes.cluster.name = "production-gke"`,
+				`engine = "machineLearning"`,
+				`severity in ("0","1","2","3")`,
+			),
+		),
+		mcp.WithOutputSchema[map[string]any](),
+		mcp.WithReadOnlyHintAnnotation(true),
+		mcp.WithDestructiveHintAnnotation(false),
+		WithRequiredPermissions("policy-events.read"),
+	)
+
+	s.AddTool(tool, h.handle)
+}