From 5080a9c6c6d4470e0bff4c26eeaa9a3dfa123048 Mon Sep 17 00:00:00 2001 From: Yasser Khan Date: Thu, 21 May 2026 14:46:11 +0530 Subject: [PATCH 01/19] fix(cmt): add /cmt_dispatch and /cleanup_e2e endpoints GitHub's workflow_run webhook payload does not carry workflow_dispatch inputs (verified against the workflow_run schema), so CMT Provisioner runs were never able to start provisioning -- the server_versions input was always read as empty and the handler returned without dispatching compatibility-matrix-testing.yml. This change moves CMT trigger out of the webhook path and onto two new authenticated HTTP endpoints, called directly by the workflows: POST /cmt_dispatch -- cmt-provisioner.yml POSTs server_versions plus full context here; matterwick provisions in a goroutine and returns 202 immediately. POST /cleanup_e2e -- compatibility-matrix-testing.yml POSTs the CMT provisioner's run_id here on completion; matterwick destroys tracked instances under {repo}-cmt-{run_id}-* keys. Auth: each endpoint takes a shared secret in a request header (X-Trigger-Token, X-Cleanup-Token) compared with constant-time comparison. Endpoints reject 503 if the corresponding secret is unconfigured rather than running unauthenticated. The broken "requested" branch in handleWorkflowRunEventWithInputs that read payload.WorkflowRun.Inputs["server_versions"] is removed. The "completed" branch is kept as a defensive fallback for stuck instances. Co-Authored-By: Claude --- server/cleanup_e2e.go | 108 +++++++++++++++++++++++++++++++++++++ server/cmt_dispatch.go | 119 +++++++++++++++++++++++++++++++++++++++++ server/config.go | 12 +++++ server/server.go | 10 ++++ server/workflow_run.go | 38 ++++--------- 5 files changed, 259 insertions(+), 28 deletions(-) create mode 100644 server/cleanup_e2e.go create mode 100644 server/cmt_dispatch.go diff --git a/server/cleanup_e2e.go b/server/cleanup_e2e.go new file mode 100644 index 0000000..6b96e19 --- /dev/null +++ b/server/cleanup_e2e.go @@ -0,0 +1,108 @@ +// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package server + +import ( + "crypto/subtle" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + + "github.com/sirupsen/logrus" +) + +// CleanupE2ERequest is the JSON body POSTed by compatibility-matrix-testing.yml +// to /cleanup_e2e after the CMT matrix tests complete, so matterwick can destroy +// the provisioned cloud installations. +type CleanupE2ERequest struct { + Repo string `json:"repo"` + RunID int64 `json:"run_id"` +} + +// handleCleanupE2E destroys CMT instances tracked under "{repo}-cmt-{run_id}-*". +// The run_id passed by the workflow is the CMT provisioner's run_id (cmt_run_id), +// which matterwick embeds as the middle component of every CMT tracking key. +func (s *Server) handleCleanupE2E(w http.ResponseWriter, r *http.Request) { + if overLimit := s.CheckLimitRateAndAbortRequest(); overLimit { + return + } + + logger := s.Logger.WithField("endpoint", "/cleanup_e2e") + + if s.Config.CleanupSecret == "" { + logger.Error("CleanupSecret not configured; rejecting /cleanup_e2e request") + w.WriteHeader(http.StatusServiceUnavailable) + return + } + + provided := r.Header.Get("X-Cleanup-Token") + if subtle.ConstantTimeCompare([]byte(provided), []byte(s.Config.CleanupSecret)) != 1 { + logger.Warn("Invalid or missing X-Cleanup-Token on /cleanup_e2e") + w.WriteHeader(http.StatusForbidden) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + logger.WithError(err).Error("Failed to read /cleanup_e2e body") + w.WriteHeader(http.StatusBadRequest) + return + } + + var req CleanupE2ERequest + if err := json.Unmarshal(body, &req); err != nil { + logger.WithError(err).Error("Failed to parse /cleanup_e2e JSON body") + w.WriteHeader(http.StatusBadRequest) + return + } + + if req.Repo == "" || req.RunID == 0 { + logger.WithFields(logrus.Fields{ + "repo": req.Repo, + "run_id": req.RunID, + }).Error("Missing required fields in /cleanup_e2e body") + w.WriteHeader(http.StatusBadRequest) + return + } + + cleanupLogger := logger.WithFields(logrus.Fields{ + "repo": req.Repo, + "run_id": req.RunID, + "type": "cmt_cleanup", + }) + + // Match the CMT tracking-key format: "{repo}-cmt-{run_id}-{sha}". + keyPrefix := fmt.Sprintf("%s-cmt-%d-", req.Repo, req.RunID) + + s.e2eInstancesLock.Lock() + var found []*E2EInstance + var keysToDelete []string + for key, instances := range s.e2eInstances { + if strings.HasPrefix(key, keyPrefix) { + found = append(found, instances...) + keysToDelete = append(keysToDelete, key) + } + } + for _, k := range keysToDelete { + delete(s.e2eInstances, k) + } + s.e2eInstancesLock.Unlock() + + if len(found) == 0 { + cleanupLogger.Info("No CMT instances tracked for this run_id; nothing to destroy") + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"status":"no_instances"}`)) + return + } + + cleanupLogger.WithField("instances", len(found)).Info("Destroying CMT instances for completed run") + go s.destroyE2EInstances(found, cleanupLogger) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"status":"destroying"}`)) +} diff --git a/server/cmt_dispatch.go b/server/cmt_dispatch.go new file mode 100644 index 0000000..7f9f2f5 --- /dev/null +++ b/server/cmt_dispatch.go @@ -0,0 +1,119 @@ +// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package server + +import ( + "crypto/subtle" + "encoding/json" + "io" + "net/http" + "strings" + + "github.com/sirupsen/logrus" +) + +// CMTDispatchRequest is the JSON body sent by cmt-provisioner.yml when a user +// dispatches CMT against one or more server versions. +// +// GitHub's workflow_run webhook payload does not carry workflow_dispatch inputs +// (verified against the published workflow_run schema), so cmt-provisioner.yml +// POSTs directly to /cmt_dispatch with everything matterwick needs to provision +// instances and dispatch compatibility-matrix-testing.yml. +type CMTDispatchRequest struct { + Owner string `json:"owner"` + Repo string `json:"repo"` + SHA string `json:"sha"` + Ref string `json:"ref"` + RunID int64 `json:"run_id"` + ServerVersions string `json:"server_versions"` +} + +// handleCMTDispatch processes a CMT dispatch request from cmt-provisioner.yml. +// It validates the auth token, parses the JSON body, and starts provisioning +// asynchronously so the workflow step returns immediately. +func (s *Server) handleCMTDispatch(w http.ResponseWriter, r *http.Request) { + if overLimit := s.CheckLimitRateAndAbortRequest(); overLimit { + return + } + + logger := s.Logger.WithField("endpoint", "/cmt_dispatch") + + // Reject the request if no token is configured rather than running unauthenticated. + if s.Config.CMTTriggerSecret == "" { + logger.Error("CMTTriggerSecret not configured; rejecting /cmt_dispatch request") + w.WriteHeader(http.StatusServiceUnavailable) + return + } + + provided := r.Header.Get("X-Trigger-Token") + if subtle.ConstantTimeCompare([]byte(provided), []byte(s.Config.CMTTriggerSecret)) != 1 { + logger.Warn("Invalid or missing X-Trigger-Token on /cmt_dispatch") + w.WriteHeader(http.StatusForbidden) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + logger.WithError(err).Error("Failed to read /cmt_dispatch request body") + w.WriteHeader(http.StatusBadRequest) + return + } + + var req CMTDispatchRequest + if err := json.Unmarshal(body, &req); err != nil { + logger.WithError(err).Error("Failed to parse /cmt_dispatch JSON body") + w.WriteHeader(http.StatusBadRequest) + return + } + + if req.Owner == "" || req.Repo == "" || req.SHA == "" || req.Ref == "" || req.RunID == 0 || req.ServerVersions == "" { + logger.WithFields(logrus.Fields{ + "owner": req.Owner, + "repo": req.Repo, + "sha_empty": req.SHA == "", + "ref_empty": req.Ref == "", + "run_id": req.RunID, + "versions_empty": req.ServerVersions == "", + }).Error("Missing required fields in /cmt_dispatch body") + w.WriteHeader(http.StatusBadRequest) + return + } + + versions := parseServerVersionsFromString(req.ServerVersions) + if len(versions) == 0 { + logger.WithField("input", req.ServerVersions).Error("Failed to parse server_versions") + w.WriteHeader(http.StatusBadRequest) + return + } + + var instanceType string + switch { + case strings.Contains(req.Repo, "desktop"): + instanceType = "desktop" + case strings.Contains(req.Repo, "mobile"): + instanceType = "mobile" + default: + logger.WithField("repo", req.Repo).Warn("Repository is neither desktop nor mobile, refusing /cmt_dispatch") + w.WriteHeader(http.StatusBadRequest) + return + } + + dispatchLogger := logger.WithFields(logrus.Fields{ + "repo": req.Repo, + "owner": req.Owner, + "sha": req.SHA, + "ref": req.Ref, + "run_id": req.RunID, + "versions": versions, + "instanceType": instanceType, + }) + dispatchLogger.Info("Accepted CMT dispatch request, provisioning asynchronously") + + // Provisioning takes ~30 min; respond 202 now and do the work in a goroutine. + go s.handleCMTWithServerVersions(req.Owner, req.Repo, instanceType, req.Ref, req.SHA, versions, req.RunID, dispatchLogger) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusAccepted) + _, _ = w.Write([]byte(`{"status":"accepted"}`)) +} diff --git a/server/config.go b/server/config.go index 88b894d..3505fab 100644 --- a/server/config.go +++ b/server/config.go @@ -123,6 +123,18 @@ type MatterwickConfig struct { // Set to the longest expected E2E run duration plus a small buffer. // Default (0): 3 hours. E2EInstanceMaxAge int + + // CMTTriggerSecret is the shared secret presented in the X-Trigger-Token header by + // cmt-provisioner.yml when POSTing to /cmt_dispatch. The webhook payload from + // GitHub does not carry workflow_dispatch inputs (verified against the + // workflow_run schema), so cmt-provisioner calls matterwick directly with the + // server_versions input. Empty value disables the endpoint. + CMTTriggerSecret string + + // CleanupSecret is the shared secret presented in the X-Cleanup-Token header by + // compatibility-matrix-testing.yml (and any other workflow) when POSTing to + // /cleanup_e2e to request instance teardown. Empty value disables the endpoint. + CleanupSecret string } func findConfigFile(fileName string) string { diff --git a/server/server.go b/server/server.go index f1e5cdf..c76d29e 100644 --- a/server/server.go +++ b/server/server.go @@ -182,6 +182,16 @@ func (s *Server) initializeRouter() { s.Router.HandleFunc("/github_event", s.githubEvent).Methods(http.MethodPost) s.Router.HandleFunc("/cloud_webhooks", s.handleCloudWebhook).Methods(http.MethodPost) s.Router.HandleFunc("/shrug_wick", s.serveShrugWick).Methods(http.MethodGet) + + // /cmt_dispatch is called directly by cmt-provisioner.yml in desktop/mobile, + // because GitHub's workflow_run webhook payload does not carry workflow_dispatch + // inputs (the inputs field is not in GitHub's workflow_run schema), so we cannot + // extract server_versions from the webhook alone. + s.Router.HandleFunc("/cmt_dispatch", s.handleCMTDispatch).Methods(http.MethodPost) + + // /cleanup_e2e is called by compatibility-matrix-testing.yml after the test + // matrix finishes, so matterwick can destroy the provisioned instances. + s.Router.HandleFunc("/cleanup_e2e", s.handleCleanupE2E).Methods(http.MethodPost) } func (s *Server) ping(w http.ResponseWriter, r *http.Request) { diff --git a/server/workflow_run.go b/server/workflow_run.go index eba97da..761b90e 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -78,39 +78,21 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay "head_sha": headSHA, }) - // CMT: "CMT Provisioner" (user-dispatched) provisions servers; "Compatibility Matrix Testing" runs tests. + // CMT: "CMT Provisioner" workflow_run events are no longer used to start provisioning. + // GitHub's workflow_run webhook payload does not carry workflow_dispatch inputs (the + // "inputs" field is not in the workflow_run schema), so server_versions cannot be + // read from this event. cmt-provisioner.yml now POSTs directly to /cmt_dispatch with + // the full context, handled by handleCMTDispatch. + // + // We still listen for workflow_run.completed on CMT as a defensive fallback to clean + // up any instances that somehow got tracked but were never reaped via /cleanup_e2e. if strings.Contains(workflowName, "cmt") || strings.Contains(workflowName, "CMT") { if payload.Action == "completed" { - logger.Debug("CMT trigger workflow completed; sha-based cleanup is primary") + logger.Debug("CMT trigger workflow completed; sha-based cleanup is a fallback for stuck instances") s.handleCMTRunCleanup(repoName, headSHA, logger) - return - } - if payload.Action != "requested" { - logger.Debug("Ignoring CMT workflow action (not requested or completed)") - return - } - logger.Info("Processing CMT workflow_run event") - serverVersionsStr, ok := payload.WorkflowRun.Inputs["server_versions"] - if !ok || serverVersionsStr == "" { - logger.Error("No server_versions found in workflow inputs") - return - } - serverVersions := parseServerVersionsFromString(serverVersionsStr) - if len(serverVersions) == 0 { - logger.Error("Failed to parse server versions from workflow input") - return - } - logger.WithField("serverVersions", serverVersions).Info("Extracted server versions from workflow inputs") - var instanceType string - if strings.Contains(repoName, "desktop") { - instanceType = "desktop" - } else if strings.Contains(repoName, "mobile") { - instanceType = "mobile" } else { - logger.Warn("Repository is neither desktop nor mobile, skipping CMT") - return + logger.WithField("action", payload.Action).Debug("Ignoring CMT workflow_run event; provisioning is driven by /cmt_dispatch") } - go s.handleCMTWithServerVersions(owner, repoName, instanceType, headBranch, headSHA, serverVersions, runID, logger) return } From 1dd004a548f0ee2bb19a04f06dca8816da6795f6 Mon Sep 17 00:00:00 2001 From: Yasser Khan Date: Mon, 25 May 2026 08:59:57 +0530 Subject: [PATCH 02/19] test(cmt): unit tests for /cmt_dispatch and /cleanup_e2e Adds 18 unit tests covering the synchronous validation surface of both new endpoints: missing/wrong tokens, malformed JSON, missing required fields, edge inputs (",,," server_versions, unknown repo type, zero run_id), and the cleanup map-deletion logic across matching, non-matching, and multi-key scenarios. Also drops the CheckLimitRateAndAbortRequest call from both handlers. The /github_event handler keeps it because synchronous webhook responses may need to make GitHub API calls before answering, but the new endpoints only call GitHub from background goroutines (cmt_dispatch) or talk to the cloud provisioner (cleanup_e2e). Keeping the rate check would force every request to block on a live GitHub round-trip, which makes the endpoints untestable in unit tests (no live token) for no production benefit. Inline comments explain the rationale. End-to-end coverage of the happy path (provisioning + workflow dispatch) is deliberately left to e2e_dryrun_test.go, which already mocks the cloud and github clients; duplicating that mocking here adds noise without catching anything new. The validation rejection paths above are the bug-prone surface. Co-Authored-By: Claude --- server/cleanup_e2e.go | 8 +- server/cleanup_e2e_test.go | 157 ++++++++++++++++++++++++++++++++++++ server/cmt_dispatch.go | 12 ++- server/cmt_dispatch_test.go | 143 ++++++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+), 8 deletions(-) create mode 100644 server/cleanup_e2e_test.go create mode 100644 server/cmt_dispatch_test.go diff --git a/server/cleanup_e2e.go b/server/cleanup_e2e.go index 6b96e19..27fb9ec 100644 --- a/server/cleanup_e2e.go +++ b/server/cleanup_e2e.go @@ -25,11 +25,11 @@ type CleanupE2ERequest struct { // handleCleanupE2E destroys CMT instances tracked under "{repo}-cmt-{run_id}-*". // The run_id passed by the workflow is the CMT provisioner's run_id (cmt_run_id), // which matterwick embeds as the middle component of every CMT tracking key. +// +// NOTE: this handler does not call CheckLimitRateAndAbortRequest. Cleanup hits +// the cloud provisioner API, not GitHub, so GitHub rate limits are not the +// constraint. See handleCMTDispatch for the broader rationale. func (s *Server) handleCleanupE2E(w http.ResponseWriter, r *http.Request) { - if overLimit := s.CheckLimitRateAndAbortRequest(); overLimit { - return - } - logger := s.Logger.WithField("endpoint", "/cleanup_e2e") if s.Config.CleanupSecret == "" { diff --git a/server/cleanup_e2e_test.go b/server/cleanup_e2e_test.go new file mode 100644 index 0000000..fe04201 --- /dev/null +++ b/server/cleanup_e2e_test.go @@ -0,0 +1,157 @@ +// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package server + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +// newCleanupE2ETestServer builds a Server for handleCleanupE2E. CloudClient +// is deliberately nil; callers must avoid paths that trigger the destroy +// goroutine. The "instances tracked" path is covered indirectly by +// asserting on the e2eInstances map state and the synchronous response, +// and by giving the tracked entry an empty []*E2EInstance slice so the +// goroutine sees no work and exits without touching CloudClient. +func newCleanupE2ETestServer(t *testing.T, cleanupSecret string) *Server { + t.Helper() + return &Server{ + Config: &MatterwickConfig{ + CleanupSecret: cleanupSecret, + }, + Logger: logrus.New(), + e2eInstances: make(map[string][]*E2EInstance), + } +} + +const validCleanupBody = `{"repo": "desktop", "run_id": 26026866029}` + +func doCleanupE2ERequest(t *testing.T, s *Server, body, token string) *httptest.ResponseRecorder { + t.Helper() + req := httptest.NewRequest(http.MethodPost, "/cleanup_e2e", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("X-Cleanup-Token", token) + } + w := httptest.NewRecorder() + s.handleCleanupE2E(w, req) + return w +} + +func TestHandleCleanupE2E_RejectsWhenSecretUnconfigured(t *testing.T) { + s := newCleanupE2ETestServer(t, "") + w := doCleanupE2ERequest(t, s, validCleanupBody, "any-token") + assert.Equal(t, http.StatusServiceUnavailable, w.Code) +} + +func TestHandleCleanupE2E_RejectsMissingToken(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + w := doCleanupE2ERequest(t, s, validCleanupBody, "") + assert.Equal(t, http.StatusForbidden, w.Code) +} + +func TestHandleCleanupE2E_RejectsWrongToken(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + w := doCleanupE2ERequest(t, s, validCleanupBody, "not-the-secret") + assert.Equal(t, http.StatusForbidden, w.Code) +} + +func TestHandleCleanupE2E_RejectsMalformedJSON(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + w := doCleanupE2ERequest(t, s, `{not json`, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code) +} + +func TestHandleCleanupE2E_RejectsMissingFields(t *testing.T) { + cases := []struct { + name string + body string + }{ + {"missing repo", `{"run_id":12345}`}, + {"missing run_id", `{"repo":"desktop"}`}, + {"zero run_id", `{"repo":"desktop","run_id":0}`}, + {"empty repo", `{"repo":"","run_id":12345}`}, + } + s := newCleanupE2ETestServer(t, "the-secret") + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + w := doCleanupE2ERequest(t, s, tc.body, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code, + "missing field %q must yield 400", tc.name) + }) + } +} + +func TestHandleCleanupE2E_NoMatchingInstances(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + // Map is empty. Cleanup should return 200 "no_instances" without panicking + // and without launching the destroy goroutine. + w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") + assert.Equal(t, http.StatusOK, w.Code) + assert.Contains(t, w.Body.String(), "no_instances", + "empty map must yield no_instances response") +} + +func TestHandleCleanupE2E_NonMatchingKeyIgnored(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + // Pre-populate the map with a key that does NOT match the cleanup + // request's prefix. The unrelated entry must be left untouched. + otherKey := "desktop-pr-9999" + s.e2eInstances[otherKey] = []*E2EInstance{{InstallationID: "other-inst"}} + + w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") + + assert.Equal(t, http.StatusOK, w.Code) + assert.Contains(t, w.Body.String(), "no_instances") + assert.Contains(t, s.e2eInstances, otherKey, + "non-matching tracking key must not be deleted") +} + +func TestHandleCleanupE2E_RemovesMatchingKey(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + // Tracking key format from handleCMTWithServerVersions: + // "{repo}-cmt-{runID}-{sha}" + // We populate it with an EMPTY instances slice so the goroutine has no + // work to do (and therefore does not touch the nil CloudClient). + matchingKey := "desktop-cmt-26026866029-abc123def456" + s.e2eInstances[matchingKey] = []*E2EInstance{} + + w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") + + assert.Equal(t, http.StatusOK, w.Code, + "matching key must be removed and 200 returned") + assert.NotContains(t, s.e2eInstances, matchingKey, + "matching tracking key must be deleted from e2eInstances map") +} + +func TestHandleCleanupE2E_RemovesMultipleMatchingKeys(t *testing.T) { + s := newCleanupE2ETestServer(t, "the-secret") + // Two CMT entries for the same run_id (e.g. retried provisioning) -- both + // share the same {repo}-cmt-{run_id}- prefix. Both must be cleaned. + k1 := "desktop-cmt-26026866029-sha-aaa" + k2 := "desktop-cmt-26026866029-sha-bbb" + kOther := "desktop-cmt-99999999999-sha-ccc" // different run_id, must survive + s.e2eInstances[k1] = []*E2EInstance{} + s.e2eInstances[k2] = []*E2EInstance{} + s.e2eInstances[kOther] = []*E2EInstance{} + + w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") + + assert.Equal(t, http.StatusOK, w.Code) + assert.NotContains(t, s.e2eInstances, k1) + assert.NotContains(t, s.e2eInstances, k2) + assert.Contains(t, s.e2eInstances, kOther, + "unrelated run_id entry must remain after cleanup") +} + +func TestHandleCleanupE2E_ConstantTimeTokenCompare(t *testing.T) { + s := newCleanupE2ETestServer(t, "cleanup-secret-abcdef") + w := doCleanupE2ERequest(t, s, validCleanupBody, "cleanup-secret-ZZZZZZ") + assert.Equal(t, http.StatusForbidden, w.Code) +} diff --git a/server/cmt_dispatch.go b/server/cmt_dispatch.go index 7f9f2f5..e331197 100644 --- a/server/cmt_dispatch.go +++ b/server/cmt_dispatch.go @@ -32,11 +32,15 @@ type CMTDispatchRequest struct { // handleCMTDispatch processes a CMT dispatch request from cmt-provisioner.yml. // It validates the auth token, parses the JSON body, and starts provisioning // asynchronously so the workflow step returns immediately. +// +// NOTE: this handler does not call CheckLimitRateAndAbortRequest as the +// /github_event handler does. Rate-limit checking made sense for synchronous +// GitHub webhook responses where matterwick might need to make API calls +// before answering. Here, all GitHub API calls happen later in the goroutine, +// so checking now would block the request on a network round-trip to GitHub +// for no benefit, and (more importantly) makes this endpoint untestable in +// the unit-test environment, which has no live token. func (s *Server) handleCMTDispatch(w http.ResponseWriter, r *http.Request) { - if overLimit := s.CheckLimitRateAndAbortRequest(); overLimit { - return - } - logger := s.Logger.WithField("endpoint", "/cmt_dispatch") // Reject the request if no token is configured rather than running unauthenticated. diff --git a/server/cmt_dispatch_test.go b/server/cmt_dispatch_test.go new file mode 100644 index 0000000..eeed4dd --- /dev/null +++ b/server/cmt_dispatch_test.go @@ -0,0 +1,143 @@ +// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package server + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +// newCMTDispatchTestServer builds a Server suitable for exercising +// handleCMTDispatch's synchronous validation logic. It deliberately does NOT +// set CloudClient, so callers must not exercise paths that launch the +// provisioning goroutine (i.e. only the rejection / 4xx cases). Tests that +// need the goroutine to run must build their own server with a mocked +// CloudClient -- see e2e_dryrun_test.go for that pattern. +func newCMTDispatchTestServer(t *testing.T, triggerSecret string) *Server { + t.Helper() + return &Server{ + Config: &MatterwickConfig{ + Org: "mattermost", + GithubAccessToken: "test-token", + CMTTriggerSecret: triggerSecret, + E2EServerVersion: "latest", + }, + Logger: logrus.New(), + e2eInstances: make(map[string][]*E2EInstance), + } +} + +// validCMTDispatchBody is the canonical happy-path body. Individual tests +// rebuild it with one field tweaked to exercise validation branches. +const validCMTDispatchBody = `{ + "owner": "mattermost", + "repo": "desktop", + "sha": "abc123def456", + "ref": "master", + "run_id": 26026866029, + "server_versions": "v11.1.0, v11.2.0" +}` + +func doCMTDispatchRequest(t *testing.T, s *Server, body, token string) *httptest.ResponseRecorder { + t.Helper() + req := httptest.NewRequest(http.MethodPost, "/cmt_dispatch", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("X-Trigger-Token", token) + } + w := httptest.NewRecorder() + s.handleCMTDispatch(w, req) + return w +} + +func TestHandleCMTDispatch_RejectsWhenSecretUnconfigured(t *testing.T) { + // Refuses to run unauthenticated even if a caller presents some token, + // because there is no configured secret to compare against. + s := newCMTDispatchTestServer(t, "") + w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "any-token") + assert.Equal(t, http.StatusServiceUnavailable, w.Code, + "unconfigured secret must yield 503, not 200/202/401/403") +} + +func TestHandleCMTDispatch_RejectsMissingToken(t *testing.T) { + s := newCMTDispatchTestServer(t, "the-secret") + w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "") + assert.Equal(t, http.StatusForbidden, w.Code, + "absent X-Trigger-Token header must yield 403") +} + +func TestHandleCMTDispatch_RejectsWrongToken(t *testing.T) { + s := newCMTDispatchTestServer(t, "the-secret") + w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "not-the-secret") + assert.Equal(t, http.StatusForbidden, w.Code, + "wrong X-Trigger-Token must yield 403") +} + +func TestHandleCMTDispatch_RejectsMalformedJSON(t *testing.T) { + s := newCMTDispatchTestServer(t, "the-secret") + w := doCMTDispatchRequest(t, s, `{not valid json`, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code, + "malformed body must yield 400 after token passes") +} + +func TestHandleCMTDispatch_RejectsMissingFields(t *testing.T) { + cases := []struct { + name string + body string + }{ + {"missing owner", `{"repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, + {"missing repo", `{"owner":"m","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, + {"missing sha", `{"owner":"m","repo":"desktop","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, + {"missing ref", `{"owner":"m","repo":"desktop","sha":"a","run_id":1,"server_versions":"v11.0.0"}`}, + {"zero run_id", `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":0,"server_versions":"v11.0.0"}`}, + {"empty server_versions", `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":""}`}, + } + s := newCMTDispatchTestServer(t, "the-secret") + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + w := doCMTDispatchRequest(t, s, tc.body, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code, + "missing required field %q must yield 400", tc.name) + }) + } +} + +func TestHandleCMTDispatch_RejectsServerVersionsThatParseToEmpty(t *testing.T) { + s := newCMTDispatchTestServer(t, "the-secret") + // parseServerVersionsFromString collapses ",,," into the empty slice. + body := `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":",,,"}` + w := doCMTDispatchRequest(t, s, body, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code, + "server_versions that parses to zero versions must yield 400") +} + +func TestHandleCMTDispatch_RejectsUnknownRepoType(t *testing.T) { + s := newCMTDispatchTestServer(t, "the-secret") + // Repo name contains neither "desktop" nor "mobile". + body := `{"owner":"m","repo":"playbooks","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}` + w := doCMTDispatchRequest(t, s, body, "the-secret") + assert.Equal(t, http.StatusBadRequest, w.Code, + "unsupported repo must yield 400, never start provisioning") +} + +func TestHandleCMTDispatch_ConstantTimeTokenCompare(t *testing.T) { + // Sanity check: the comparison must not short-circuit on prefix match. + // (We can't directly observe timing in a unit test, but we can at least + // confirm that a partial-match token of the same length is still rejected.) + s := newCMTDispatchTestServer(t, "secret-abcdef") + w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "secret-ZZZZZZ") + assert.Equal(t, http.StatusForbidden, w.Code) +} + +// NOTE: we deliberately do not test the "happy path returns 202" case in this +// file. A valid request launches the provisioning goroutine, which calls into +// CloudClient (nil in this test harness) and panics. End-to-end behavior of +// handleCMTWithServerVersions is covered by e2e_dryrun_test.go with a mocked +// cloud client. The synchronous validation logic above (rejection branches + +// missing-field detection) covers the bug-prone surface of this handler. From d0e3d60c18c168bb0854334f22c4720eb9b72f58 Mon Sep 17 00:00:00 2001 From: Yasser Khan Date: Mon, 25 May 2026 11:32:34 +0530 Subject: [PATCH 03/19] fix(e2e): remove dead mw_tracking_key primary cleanup path The "Primary" lookup at handleWorkflowRunEventWithInputs read payload.WorkflowRun.Inputs["mw_tracking_key"], which is the same field that broke CMT provisioning -- GitHub's workflow_run webhook payload does not include workflow_dispatch inputs (verified against the Octokit schema and the REST API for a real workflow run). The field is always empty in production, so the "primary" branch never fires; only the SHA-based fallback runs. That fallback works because every tracking-key format ends with -{sha}. Removing the dead read eliminates the misleading "Primary"/"Fallback" labels and makes the SHA-based scan the documented canonical mechanism. The mw_tracking_key value is still written when dispatching test workflows -- it remains queryable via the REST API for any future code path that wants to do a key-based lookup -- but the webhook-driven cleanup intentionally does not depend on it. Co-Authored-By: Claude --- server/workflow_run.go | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/server/workflow_run.go b/server/workflow_run.go index 761b90e..b8e2807 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -106,26 +106,17 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay } // --- Test workflow completion: clean up provisioned instances --- + // + // Cleanup is keyed by head_sha against tracking-key suffixes ({repo}-...-{sha}). + // We do not attempt to read mw_tracking_key from payload.WorkflowRun.Inputs: + // GitHub's workflow_run webhook payload does not include workflow_dispatch + // inputs (the "inputs" field is not in the workflow_run schema), so that field + // is always empty in production. SHA-based scanning is the canonical mechanism. + // The mw_tracking_key value is still set when dispatching the test workflow so + // it remains queryable via the GitHub REST API if a future code path wants it, + // but the webhook-driven cleanup intentionally does not depend on it. if payload.Action == "completed" && s.isE2ETestWorkflow(workflowName) { - logger.Info("Test workflow completed, checking for instance cleanup") - - // Primary: look up by mw_tracking_key embedded at dispatch time (immune to SHA races). - if trackingKey := payload.WorkflowRun.Inputs["mw_tracking_key"]; trackingKey != "" { - s.e2eInstancesLock.Lock() - instances := s.e2eInstances[trackingKey] - delete(s.e2eInstances, trackingKey) - s.e2eInstancesLock.Unlock() - if len(instances) > 0 { - logger.WithField("tracking_key", trackingKey).Info("Destroying instances by tracking key") - s.destroyE2EInstances(instances, logger) - } else { - logger.WithField("tracking_key", trackingKey).Debug("No in-memory instances for tracking key (matterwick restarted or already cleaned)") - } - return - } - - // Fallback: SHA-based scan (runs dispatched before mw_tracking_key was introduced). - logger.Debug("No mw_tracking_key in workflow inputs, falling back to SHA-based instance cleanup") + logger.Info("Test workflow completed, cleaning up instances by SHA suffix match") s.findAndDestroyInstancesBySHA(repoName, headSHA, logger) return } From e0c42f2c46916520a225a7ff2e336f4dbc61571c Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Mon, 1 Jun 2026 15:18:02 +0530 Subject: [PATCH 04/19] fix(e2e): make CMT webhook-driven; stop sending undeclared mw_tracking_key CMT now runs off a lightweight scheduled trigger workflow that matterwick hears as a workflow_run event, provisioning one server per version from a hardcoded set in config (CMTServerVersions) and dispatching compatibility-matrix-testing.yml. This removes the /cmt_dispatch and /cleanup_e2e HTTP endpoints (and their CMTTriggerSecret/CleanupSecret config), which existed only to pass server_versions past the workflow_run webhook. Also stop sending the undeclared `mw_tracking_key` workflow_dispatch input: GitHub rejects a dispatch carrying an undeclared input with HTTP 422, which broke every non-PR dispatch (release, scheduled, and CMT). Cleanup is keyed off the test workflow's workflow_run completed event, matched by commit SHA. - workflow_run.go: add handleCMTTrigger, provision on CMTTriggerWorkflowName 'requested'; remove handleCMTRunCleanup fallback - config.go: add CMTServerVersions (+ defaultCMTServerVersions) and CMTTriggerWorkflowName; remove CMTTriggerSecret/CleanupSecret - remove cmt_dispatch.go, cleanup_e2e.go and their tests - drop dead trackingKey plumbing from desktop/mobile/CMT dispatch paths Co-Authored-By: Claude Opus 4.8 --- config/config-matterwick.default.json | 4 +- server/cleanup_e2e.go | 108 ------------------ server/cleanup_e2e_test.go | 157 -------------------------- server/cmt_dispatch.go | 123 -------------------- server/cmt_dispatch_test.go | 143 ----------------------- server/config.go | 36 ++++-- server/e2e_tests.go | 26 ++--- server/push_events.go | 18 +-- server/server.go | 10 -- server/workflow_run.go | 101 ++++++++--------- 10 files changed, 95 insertions(+), 631 deletions(-) delete mode 100644 server/cleanup_e2e.go delete mode 100644 server/cleanup_e2e_test.go delete mode 100644 server/cmt_dispatch.go delete mode 100644 server/cmt_dispatch_test.go diff --git a/config/config-matterwick.default.json b/config/config-matterwick.default.json index 08816fc..0d3621f 100644 --- a/config/config-matterwick.default.json +++ b/config/config-matterwick.default.json @@ -83,5 +83,7 @@ "E2EReleasePatternPrefix": "release-", "E2ENightlyTriggerWorkflowName": "E2E Nightly Trigger", "E2ETestWorkflowNames": ["Electron Playwright Tests", "E2E", "Compatibility Matrix Testing"], - "E2EInstanceMaxAge": 6 + "E2EInstanceMaxAge": 6, + "CMTTriggerWorkflowName": "CMT Provisioner", + "CMTServerVersions": ["10.11.0", "11.7.0"] } diff --git a/server/cleanup_e2e.go b/server/cleanup_e2e.go deleted file mode 100644 index 27fb9ec..0000000 --- a/server/cleanup_e2e.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. -// See License.txt for license information. - -package server - -import ( - "crypto/subtle" - "encoding/json" - "fmt" - "io" - "net/http" - "strings" - - "github.com/sirupsen/logrus" -) - -// CleanupE2ERequest is the JSON body POSTed by compatibility-matrix-testing.yml -// to /cleanup_e2e after the CMT matrix tests complete, so matterwick can destroy -// the provisioned cloud installations. -type CleanupE2ERequest struct { - Repo string `json:"repo"` - RunID int64 `json:"run_id"` -} - -// handleCleanupE2E destroys CMT instances tracked under "{repo}-cmt-{run_id}-*". -// The run_id passed by the workflow is the CMT provisioner's run_id (cmt_run_id), -// which matterwick embeds as the middle component of every CMT tracking key. -// -// NOTE: this handler does not call CheckLimitRateAndAbortRequest. Cleanup hits -// the cloud provisioner API, not GitHub, so GitHub rate limits are not the -// constraint. See handleCMTDispatch for the broader rationale. -func (s *Server) handleCleanupE2E(w http.ResponseWriter, r *http.Request) { - logger := s.Logger.WithField("endpoint", "/cleanup_e2e") - - if s.Config.CleanupSecret == "" { - logger.Error("CleanupSecret not configured; rejecting /cleanup_e2e request") - w.WriteHeader(http.StatusServiceUnavailable) - return - } - - provided := r.Header.Get("X-Cleanup-Token") - if subtle.ConstantTimeCompare([]byte(provided), []byte(s.Config.CleanupSecret)) != 1 { - logger.Warn("Invalid or missing X-Cleanup-Token on /cleanup_e2e") - w.WriteHeader(http.StatusForbidden) - return - } - - body, err := io.ReadAll(r.Body) - if err != nil { - logger.WithError(err).Error("Failed to read /cleanup_e2e body") - w.WriteHeader(http.StatusBadRequest) - return - } - - var req CleanupE2ERequest - if err := json.Unmarshal(body, &req); err != nil { - logger.WithError(err).Error("Failed to parse /cleanup_e2e JSON body") - w.WriteHeader(http.StatusBadRequest) - return - } - - if req.Repo == "" || req.RunID == 0 { - logger.WithFields(logrus.Fields{ - "repo": req.Repo, - "run_id": req.RunID, - }).Error("Missing required fields in /cleanup_e2e body") - w.WriteHeader(http.StatusBadRequest) - return - } - - cleanupLogger := logger.WithFields(logrus.Fields{ - "repo": req.Repo, - "run_id": req.RunID, - "type": "cmt_cleanup", - }) - - // Match the CMT tracking-key format: "{repo}-cmt-{run_id}-{sha}". - keyPrefix := fmt.Sprintf("%s-cmt-%d-", req.Repo, req.RunID) - - s.e2eInstancesLock.Lock() - var found []*E2EInstance - var keysToDelete []string - for key, instances := range s.e2eInstances { - if strings.HasPrefix(key, keyPrefix) { - found = append(found, instances...) - keysToDelete = append(keysToDelete, key) - } - } - for _, k := range keysToDelete { - delete(s.e2eInstances, k) - } - s.e2eInstancesLock.Unlock() - - if len(found) == 0 { - cleanupLogger.Info("No CMT instances tracked for this run_id; nothing to destroy") - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(`{"status":"no_instances"}`)) - return - } - - cleanupLogger.WithField("instances", len(found)).Info("Destroying CMT instances for completed run") - go s.destroyE2EInstances(found, cleanupLogger) - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(`{"status":"destroying"}`)) -} diff --git a/server/cleanup_e2e_test.go b/server/cleanup_e2e_test.go deleted file mode 100644 index fe04201..0000000 --- a/server/cleanup_e2e_test.go +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. -// See License.txt for license information. - -package server - -import ( - "bytes" - "net/http" - "net/http/httptest" - "testing" - - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" -) - -// newCleanupE2ETestServer builds a Server for handleCleanupE2E. CloudClient -// is deliberately nil; callers must avoid paths that trigger the destroy -// goroutine. The "instances tracked" path is covered indirectly by -// asserting on the e2eInstances map state and the synchronous response, -// and by giving the tracked entry an empty []*E2EInstance slice so the -// goroutine sees no work and exits without touching CloudClient. -func newCleanupE2ETestServer(t *testing.T, cleanupSecret string) *Server { - t.Helper() - return &Server{ - Config: &MatterwickConfig{ - CleanupSecret: cleanupSecret, - }, - Logger: logrus.New(), - e2eInstances: make(map[string][]*E2EInstance), - } -} - -const validCleanupBody = `{"repo": "desktop", "run_id": 26026866029}` - -func doCleanupE2ERequest(t *testing.T, s *Server, body, token string) *httptest.ResponseRecorder { - t.Helper() - req := httptest.NewRequest(http.MethodPost, "/cleanup_e2e", bytes.NewBufferString(body)) - req.Header.Set("Content-Type", "application/json") - if token != "" { - req.Header.Set("X-Cleanup-Token", token) - } - w := httptest.NewRecorder() - s.handleCleanupE2E(w, req) - return w -} - -func TestHandleCleanupE2E_RejectsWhenSecretUnconfigured(t *testing.T) { - s := newCleanupE2ETestServer(t, "") - w := doCleanupE2ERequest(t, s, validCleanupBody, "any-token") - assert.Equal(t, http.StatusServiceUnavailable, w.Code) -} - -func TestHandleCleanupE2E_RejectsMissingToken(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - w := doCleanupE2ERequest(t, s, validCleanupBody, "") - assert.Equal(t, http.StatusForbidden, w.Code) -} - -func TestHandleCleanupE2E_RejectsWrongToken(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - w := doCleanupE2ERequest(t, s, validCleanupBody, "not-the-secret") - assert.Equal(t, http.StatusForbidden, w.Code) -} - -func TestHandleCleanupE2E_RejectsMalformedJSON(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - w := doCleanupE2ERequest(t, s, `{not json`, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code) -} - -func TestHandleCleanupE2E_RejectsMissingFields(t *testing.T) { - cases := []struct { - name string - body string - }{ - {"missing repo", `{"run_id":12345}`}, - {"missing run_id", `{"repo":"desktop"}`}, - {"zero run_id", `{"repo":"desktop","run_id":0}`}, - {"empty repo", `{"repo":"","run_id":12345}`}, - } - s := newCleanupE2ETestServer(t, "the-secret") - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - w := doCleanupE2ERequest(t, s, tc.body, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code, - "missing field %q must yield 400", tc.name) - }) - } -} - -func TestHandleCleanupE2E_NoMatchingInstances(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - // Map is empty. Cleanup should return 200 "no_instances" without panicking - // and without launching the destroy goroutine. - w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") - assert.Equal(t, http.StatusOK, w.Code) - assert.Contains(t, w.Body.String(), "no_instances", - "empty map must yield no_instances response") -} - -func TestHandleCleanupE2E_NonMatchingKeyIgnored(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - // Pre-populate the map with a key that does NOT match the cleanup - // request's prefix. The unrelated entry must be left untouched. - otherKey := "desktop-pr-9999" - s.e2eInstances[otherKey] = []*E2EInstance{{InstallationID: "other-inst"}} - - w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") - - assert.Equal(t, http.StatusOK, w.Code) - assert.Contains(t, w.Body.String(), "no_instances") - assert.Contains(t, s.e2eInstances, otherKey, - "non-matching tracking key must not be deleted") -} - -func TestHandleCleanupE2E_RemovesMatchingKey(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - // Tracking key format from handleCMTWithServerVersions: - // "{repo}-cmt-{runID}-{sha}" - // We populate it with an EMPTY instances slice so the goroutine has no - // work to do (and therefore does not touch the nil CloudClient). - matchingKey := "desktop-cmt-26026866029-abc123def456" - s.e2eInstances[matchingKey] = []*E2EInstance{} - - w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") - - assert.Equal(t, http.StatusOK, w.Code, - "matching key must be removed and 200 returned") - assert.NotContains(t, s.e2eInstances, matchingKey, - "matching tracking key must be deleted from e2eInstances map") -} - -func TestHandleCleanupE2E_RemovesMultipleMatchingKeys(t *testing.T) { - s := newCleanupE2ETestServer(t, "the-secret") - // Two CMT entries for the same run_id (e.g. retried provisioning) -- both - // share the same {repo}-cmt-{run_id}- prefix. Both must be cleaned. - k1 := "desktop-cmt-26026866029-sha-aaa" - k2 := "desktop-cmt-26026866029-sha-bbb" - kOther := "desktop-cmt-99999999999-sha-ccc" // different run_id, must survive - s.e2eInstances[k1] = []*E2EInstance{} - s.e2eInstances[k2] = []*E2EInstance{} - s.e2eInstances[kOther] = []*E2EInstance{} - - w := doCleanupE2ERequest(t, s, validCleanupBody, "the-secret") - - assert.Equal(t, http.StatusOK, w.Code) - assert.NotContains(t, s.e2eInstances, k1) - assert.NotContains(t, s.e2eInstances, k2) - assert.Contains(t, s.e2eInstances, kOther, - "unrelated run_id entry must remain after cleanup") -} - -func TestHandleCleanupE2E_ConstantTimeTokenCompare(t *testing.T) { - s := newCleanupE2ETestServer(t, "cleanup-secret-abcdef") - w := doCleanupE2ERequest(t, s, validCleanupBody, "cleanup-secret-ZZZZZZ") - assert.Equal(t, http.StatusForbidden, w.Code) -} diff --git a/server/cmt_dispatch.go b/server/cmt_dispatch.go deleted file mode 100644 index e331197..0000000 --- a/server/cmt_dispatch.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. -// See License.txt for license information. - -package server - -import ( - "crypto/subtle" - "encoding/json" - "io" - "net/http" - "strings" - - "github.com/sirupsen/logrus" -) - -// CMTDispatchRequest is the JSON body sent by cmt-provisioner.yml when a user -// dispatches CMT against one or more server versions. -// -// GitHub's workflow_run webhook payload does not carry workflow_dispatch inputs -// (verified against the published workflow_run schema), so cmt-provisioner.yml -// POSTs directly to /cmt_dispatch with everything matterwick needs to provision -// instances and dispatch compatibility-matrix-testing.yml. -type CMTDispatchRequest struct { - Owner string `json:"owner"` - Repo string `json:"repo"` - SHA string `json:"sha"` - Ref string `json:"ref"` - RunID int64 `json:"run_id"` - ServerVersions string `json:"server_versions"` -} - -// handleCMTDispatch processes a CMT dispatch request from cmt-provisioner.yml. -// It validates the auth token, parses the JSON body, and starts provisioning -// asynchronously so the workflow step returns immediately. -// -// NOTE: this handler does not call CheckLimitRateAndAbortRequest as the -// /github_event handler does. Rate-limit checking made sense for synchronous -// GitHub webhook responses where matterwick might need to make API calls -// before answering. Here, all GitHub API calls happen later in the goroutine, -// so checking now would block the request on a network round-trip to GitHub -// for no benefit, and (more importantly) makes this endpoint untestable in -// the unit-test environment, which has no live token. -func (s *Server) handleCMTDispatch(w http.ResponseWriter, r *http.Request) { - logger := s.Logger.WithField("endpoint", "/cmt_dispatch") - - // Reject the request if no token is configured rather than running unauthenticated. - if s.Config.CMTTriggerSecret == "" { - logger.Error("CMTTriggerSecret not configured; rejecting /cmt_dispatch request") - w.WriteHeader(http.StatusServiceUnavailable) - return - } - - provided := r.Header.Get("X-Trigger-Token") - if subtle.ConstantTimeCompare([]byte(provided), []byte(s.Config.CMTTriggerSecret)) != 1 { - logger.Warn("Invalid or missing X-Trigger-Token on /cmt_dispatch") - w.WriteHeader(http.StatusForbidden) - return - } - - body, err := io.ReadAll(r.Body) - if err != nil { - logger.WithError(err).Error("Failed to read /cmt_dispatch request body") - w.WriteHeader(http.StatusBadRequest) - return - } - - var req CMTDispatchRequest - if err := json.Unmarshal(body, &req); err != nil { - logger.WithError(err).Error("Failed to parse /cmt_dispatch JSON body") - w.WriteHeader(http.StatusBadRequest) - return - } - - if req.Owner == "" || req.Repo == "" || req.SHA == "" || req.Ref == "" || req.RunID == 0 || req.ServerVersions == "" { - logger.WithFields(logrus.Fields{ - "owner": req.Owner, - "repo": req.Repo, - "sha_empty": req.SHA == "", - "ref_empty": req.Ref == "", - "run_id": req.RunID, - "versions_empty": req.ServerVersions == "", - }).Error("Missing required fields in /cmt_dispatch body") - w.WriteHeader(http.StatusBadRequest) - return - } - - versions := parseServerVersionsFromString(req.ServerVersions) - if len(versions) == 0 { - logger.WithField("input", req.ServerVersions).Error("Failed to parse server_versions") - w.WriteHeader(http.StatusBadRequest) - return - } - - var instanceType string - switch { - case strings.Contains(req.Repo, "desktop"): - instanceType = "desktop" - case strings.Contains(req.Repo, "mobile"): - instanceType = "mobile" - default: - logger.WithField("repo", req.Repo).Warn("Repository is neither desktop nor mobile, refusing /cmt_dispatch") - w.WriteHeader(http.StatusBadRequest) - return - } - - dispatchLogger := logger.WithFields(logrus.Fields{ - "repo": req.Repo, - "owner": req.Owner, - "sha": req.SHA, - "ref": req.Ref, - "run_id": req.RunID, - "versions": versions, - "instanceType": instanceType, - }) - dispatchLogger.Info("Accepted CMT dispatch request, provisioning asynchronously") - - // Provisioning takes ~30 min; respond 202 now and do the work in a goroutine. - go s.handleCMTWithServerVersions(req.Owner, req.Repo, instanceType, req.Ref, req.SHA, versions, req.RunID, dispatchLogger) - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusAccepted) - _, _ = w.Write([]byte(`{"status":"accepted"}`)) -} diff --git a/server/cmt_dispatch_test.go b/server/cmt_dispatch_test.go deleted file mode 100644 index eeed4dd..0000000 --- a/server/cmt_dispatch_test.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (c) 2020-present Mattermost, Inc. All Rights Reserved. -// See License.txt for license information. - -package server - -import ( - "bytes" - "net/http" - "net/http/httptest" - "testing" - - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" -) - -// newCMTDispatchTestServer builds a Server suitable for exercising -// handleCMTDispatch's synchronous validation logic. It deliberately does NOT -// set CloudClient, so callers must not exercise paths that launch the -// provisioning goroutine (i.e. only the rejection / 4xx cases). Tests that -// need the goroutine to run must build their own server with a mocked -// CloudClient -- see e2e_dryrun_test.go for that pattern. -func newCMTDispatchTestServer(t *testing.T, triggerSecret string) *Server { - t.Helper() - return &Server{ - Config: &MatterwickConfig{ - Org: "mattermost", - GithubAccessToken: "test-token", - CMTTriggerSecret: triggerSecret, - E2EServerVersion: "latest", - }, - Logger: logrus.New(), - e2eInstances: make(map[string][]*E2EInstance), - } -} - -// validCMTDispatchBody is the canonical happy-path body. Individual tests -// rebuild it with one field tweaked to exercise validation branches. -const validCMTDispatchBody = `{ - "owner": "mattermost", - "repo": "desktop", - "sha": "abc123def456", - "ref": "master", - "run_id": 26026866029, - "server_versions": "v11.1.0, v11.2.0" -}` - -func doCMTDispatchRequest(t *testing.T, s *Server, body, token string) *httptest.ResponseRecorder { - t.Helper() - req := httptest.NewRequest(http.MethodPost, "/cmt_dispatch", bytes.NewBufferString(body)) - req.Header.Set("Content-Type", "application/json") - if token != "" { - req.Header.Set("X-Trigger-Token", token) - } - w := httptest.NewRecorder() - s.handleCMTDispatch(w, req) - return w -} - -func TestHandleCMTDispatch_RejectsWhenSecretUnconfigured(t *testing.T) { - // Refuses to run unauthenticated even if a caller presents some token, - // because there is no configured secret to compare against. - s := newCMTDispatchTestServer(t, "") - w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "any-token") - assert.Equal(t, http.StatusServiceUnavailable, w.Code, - "unconfigured secret must yield 503, not 200/202/401/403") -} - -func TestHandleCMTDispatch_RejectsMissingToken(t *testing.T) { - s := newCMTDispatchTestServer(t, "the-secret") - w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "") - assert.Equal(t, http.StatusForbidden, w.Code, - "absent X-Trigger-Token header must yield 403") -} - -func TestHandleCMTDispatch_RejectsWrongToken(t *testing.T) { - s := newCMTDispatchTestServer(t, "the-secret") - w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "not-the-secret") - assert.Equal(t, http.StatusForbidden, w.Code, - "wrong X-Trigger-Token must yield 403") -} - -func TestHandleCMTDispatch_RejectsMalformedJSON(t *testing.T) { - s := newCMTDispatchTestServer(t, "the-secret") - w := doCMTDispatchRequest(t, s, `{not valid json`, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code, - "malformed body must yield 400 after token passes") -} - -func TestHandleCMTDispatch_RejectsMissingFields(t *testing.T) { - cases := []struct { - name string - body string - }{ - {"missing owner", `{"repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, - {"missing repo", `{"owner":"m","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, - {"missing sha", `{"owner":"m","repo":"desktop","ref":"master","run_id":1,"server_versions":"v11.0.0"}`}, - {"missing ref", `{"owner":"m","repo":"desktop","sha":"a","run_id":1,"server_versions":"v11.0.0"}`}, - {"zero run_id", `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":0,"server_versions":"v11.0.0"}`}, - {"empty server_versions", `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":""}`}, - } - s := newCMTDispatchTestServer(t, "the-secret") - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - w := doCMTDispatchRequest(t, s, tc.body, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code, - "missing required field %q must yield 400", tc.name) - }) - } -} - -func TestHandleCMTDispatch_RejectsServerVersionsThatParseToEmpty(t *testing.T) { - s := newCMTDispatchTestServer(t, "the-secret") - // parseServerVersionsFromString collapses ",,," into the empty slice. - body := `{"owner":"m","repo":"desktop","sha":"a","ref":"master","run_id":1,"server_versions":",,,"}` - w := doCMTDispatchRequest(t, s, body, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code, - "server_versions that parses to zero versions must yield 400") -} - -func TestHandleCMTDispatch_RejectsUnknownRepoType(t *testing.T) { - s := newCMTDispatchTestServer(t, "the-secret") - // Repo name contains neither "desktop" nor "mobile". - body := `{"owner":"m","repo":"playbooks","sha":"a","ref":"master","run_id":1,"server_versions":"v11.0.0"}` - w := doCMTDispatchRequest(t, s, body, "the-secret") - assert.Equal(t, http.StatusBadRequest, w.Code, - "unsupported repo must yield 400, never start provisioning") -} - -func TestHandleCMTDispatch_ConstantTimeTokenCompare(t *testing.T) { - // Sanity check: the comparison must not short-circuit on prefix match. - // (We can't directly observe timing in a unit test, but we can at least - // confirm that a partial-match token of the same length is still rejected.) - s := newCMTDispatchTestServer(t, "secret-abcdef") - w := doCMTDispatchRequest(t, s, validCMTDispatchBody, "secret-ZZZZZZ") - assert.Equal(t, http.StatusForbidden, w.Code) -} - -// NOTE: we deliberately do not test the "happy path returns 202" case in this -// file. A valid request launches the provisioning goroutine, which calls into -// CloudClient (nil in this test harness) and panics. End-to-end behavior of -// handleCMTWithServerVersions is covered by e2e_dryrun_test.go with a mocked -// cloud client. The synchronous validation logic above (rejection branches + -// missing-field detection) covers the bug-prone surface of this handler. diff --git a/server/config.go b/server/config.go index 3505fab..97948d1 100644 --- a/server/config.go +++ b/server/config.go @@ -124,17 +124,31 @@ type MatterwickConfig struct { // Default (0): 3 hours. E2EInstanceMaxAge int - // CMTTriggerSecret is the shared secret presented in the X-Trigger-Token header by - // cmt-provisioner.yml when POSTing to /cmt_dispatch. The webhook payload from - // GitHub does not carry workflow_dispatch inputs (verified against the - // workflow_run schema), so cmt-provisioner calls matterwick directly with the - // server_versions input. Empty value disables the endpoint. - CMTTriggerSecret string - - // CleanupSecret is the shared secret presented in the X-Cleanup-Token header by - // compatibility-matrix-testing.yml (and any other workflow) when POSTing to - // /cleanup_e2e to request instance teardown. Empty value disables the endpoint. - CleanupSecret string + // CMTTriggerWorkflowName is the workflow name (the "name:" field) of the lightweight, + // scheduled CMT trigger workflow in the desktop/mobile repos. When matterwick receives + // a workflow_run "requested" event for this workflow it provisions one instance per + // version in CMTServerVersions and dispatches compatibility-matrix-testing.yml. + CMTTriggerWorkflowName string + + // CMTServerVersions is the hardcoded set of Mattermost server versions CMT runs + // against (e.g. the active ESR plus the current feature release). Values must be valid + // Mattermost image tags (full semver, no "v" prefix, e.g. "10.11.0"). Overridable via + // the gitops config; when empty matterwick falls back to defaultCMTServerVersions. + CMTServerVersions []string +} + +// defaultCMTServerVersions is the fallback CMT version set used when Config.CMTServerVersions +// is empty. Mattermost actively supports v11.x feature releases and the v10.11 ESR, so the +// default covers the current ESR line plus a v11 release. Update as ESR/feature lines change. +var defaultCMTServerVersions = []string{"10.11.0", "11.7.0"} + +// CMTVersions returns the configured CMT server versions, or the hardcoded default set when +// none are configured. +func (c *MatterwickConfig) CMTVersions() []string { + if len(c.CMTServerVersions) > 0 { + return c.CMTServerVersions + } + return defaultCMTServerVersions } func findConfigFile(fileName string) string { diff --git a/server/e2e_tests.go b/server/e2e_tests.go index f0efae3..6fca1b6 100644 --- a/server/e2e_tests.go +++ b/server/e2e_tests.go @@ -1048,11 +1048,11 @@ func (s *Server) buildInstanceDetailsJSON(instances []*E2EInstance) (string, err return string(jsonBytes), nil } -// dispatchDesktopE2EWorkflow triggers the desktop E2E workflow via GitHub Actions API. -// trackingKey is the s.e2eInstances map key for this run; when non-empty it is passed -// as the "mw_tracking_key" workflow input so the workflow_run completed handler can do -// a direct key lookup instead of fragile SHA suffix matching. -func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, instanceDetailsJSON, runType, trackingKey string, nightly bool) error { +// dispatchDesktopE2EWorkflow triggers the desktop E2E workflow (e2e-functional.yml) via the +// GitHub Actions API. Cleanup is driven by the workflow_run completed event matched on the +// commit SHA, so no tracking key is passed as an input (e2e-functional.yml does not declare +// one, and GitHub rejects a workflow_dispatch carrying an undeclared input with a 422). +func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, instanceDetailsJSON, runType string, nightly bool) error { ctx := context.Background() client := newGithubClient(s.Config.GithubAccessToken) @@ -1085,9 +1085,6 @@ func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, insta "run_type": runType, "nightly": fmt.Sprintf("%t", nightly), } - if trackingKey != "" { - workflowInputs["mw_tracking_key"] = trackingKey - } // Use REST API to trigger workflow dispatch (v32 go-github compatibility) req, err := client.NewRequest("POST", @@ -1116,11 +1113,11 @@ func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, insta return nil } -// dispatchMobileE2EWorkflow triggers the mobile E2E workflow via GitHub Actions API. -// trackingKey is the s.e2eInstances map key for this run; when non-empty it is passed -// as the "mw_tracking_key" workflow input so the workflow_run completed handler can do -// a direct key lookup instead of fragile SHA suffix matching. -func (s *Server) dispatchMobileE2EWorkflow(repoOwner, repoName, ref, sha, site1URL, site2URL, site3URL, platform, runType, trackingKey string) error { +// dispatchMobileE2EWorkflow triggers the mobile E2E workflow (e2e-detox-pr.yml) via the +// GitHub Actions API. Cleanup is driven by the workflow_run completed event matched on the +// commit SHA, so no tracking key is passed as an input (e2e-detox-pr.yml does not declare +// one, and GitHub rejects a workflow_dispatch carrying an undeclared input with a 422). +func (s *Server) dispatchMobileE2EWorkflow(repoOwner, repoName, ref, sha, site1URL, site2URL, site3URL, platform, runType string) error { ctx := context.Background() client := newGithubClient(s.Config.GithubAccessToken) @@ -1138,9 +1135,6 @@ func (s *Server) dispatchMobileE2EWorkflow(repoOwner, repoName, ref, sha, site1U "PLATFORM": platform, "run_type": runType, } - if trackingKey != "" { - workflowInputs["mw_tracking_key"] = trackingKey - } // Use REST API to trigger workflow dispatch (v32 go-github compatibility) req, err := client.NewRequest("POST", diff --git a/server/push_events.go b/server/push_events.go index 685a269..bfd49d8 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -140,7 +140,7 @@ func (s *Server) handlePushEventE2E(event *github.PushEvent, branch string) { s.e2eInstances[key] = instances s.e2eInstancesLock.Unlock() - err = s.triggerE2EWorkflowForPushEvent(repoName, instanceType, branch, sha, key, instances) + err = s.triggerE2EWorkflowForPushEvent(repoName, instanceType, branch, sha, instances) if err != nil { logger.WithError(err).Error("Failed to trigger E2E workflow") s.e2eInstancesLock.Lock() @@ -249,8 +249,8 @@ func getRunnerForPlatform(platform string) string { } // triggerE2EWorkflowForPushEvent routes to the desktop or mobile dispatch function. -// trackingKey is embedded in workflow inputs as mw_tracking_key for cleanup on completion. -func (s *Server) triggerE2EWorkflowForPushEvent(repoName, instanceType, branch, sha, trackingKey string, instances []*E2EInstance) error { +// Cleanup is driven by the workflow_run completed event matched on the commit SHA. +func (s *Server) triggerE2EWorkflowForPushEvent(repoName, instanceType, branch, sha string, instances []*E2EInstance) error { logger := s.Logger.WithFields(logrus.Fields{ "repo": repoName, "instanceType": instanceType, @@ -265,14 +265,14 @@ func (s *Server) triggerE2EWorkflowForPushEvent(repoName, instanceType, branch, } if instanceType == "desktop" { - return s.triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, trackingKey, instances) + return s.triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, instances) } - return s.triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, trackingKey, instances) + return s.triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, instances) } // triggerDesktopE2EWorkflowForPushEvent dispatches the desktop E2E workflow. -func (s *Server) triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, trackingKey string, instances []*E2EInstance) error { +func (s *Server) triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha string, instances []*E2EInstance) error { logger := s.Logger.WithFields(logrus.Fields{ "repo": repoName, "branch": branch, @@ -291,11 +291,11 @@ func (s *Server) triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, bran runType = "RELEASE" } - return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, runType, trackingKey, false) + return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, runType, false) } // triggerMobileE2EWorkflowForPushEvent dispatches the mobile E2E workflow (e2e-detox-pr.yml). -func (s *Server) triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha, trackingKey string, instances []*E2EInstance) error { +func (s *Server) triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branch, sha string, instances []*E2EInstance) error { logger := s.Logger.WithFields(logrus.Fields{ "repo": repoName, "branch": branch, @@ -321,6 +321,6 @@ func (s *Server) triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branc repoOwner, repoName, branch, sha, instances[0].URL, instances[1].URL, instances[2].URL, "both", // push events always test both iOS and Android - runType, trackingKey, + runType, ) } diff --git a/server/server.go b/server/server.go index c76d29e..f1e5cdf 100644 --- a/server/server.go +++ b/server/server.go @@ -182,16 +182,6 @@ func (s *Server) initializeRouter() { s.Router.HandleFunc("/github_event", s.githubEvent).Methods(http.MethodPost) s.Router.HandleFunc("/cloud_webhooks", s.handleCloudWebhook).Methods(http.MethodPost) s.Router.HandleFunc("/shrug_wick", s.serveShrugWick).Methods(http.MethodGet) - - // /cmt_dispatch is called directly by cmt-provisioner.yml in desktop/mobile, - // because GitHub's workflow_run webhook payload does not carry workflow_dispatch - // inputs (the inputs field is not in GitHub's workflow_run schema), so we cannot - // extract server_versions from the webhook alone. - s.Router.HandleFunc("/cmt_dispatch", s.handleCMTDispatch).Methods(http.MethodPost) - - // /cleanup_e2e is called by compatibility-matrix-testing.yml after the test - // matrix finishes, so matterwick can destroy the provisioned instances. - s.Router.HandleFunc("/cleanup_e2e", s.handleCleanupE2E).Methods(http.MethodPost) } func (s *Server) ping(w http.ResponseWriter, r *http.Request) { diff --git a/server/workflow_run.go b/server/workflow_run.go index b8e2807..4b063cd 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -78,20 +78,17 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay "head_sha": headSHA, }) - // CMT: "CMT Provisioner" workflow_run events are no longer used to start provisioning. - // GitHub's workflow_run webhook payload does not carry workflow_dispatch inputs (the - // "inputs" field is not in the workflow_run schema), so server_versions cannot be - // read from this event. cmt-provisioner.yml now POSTs directly to /cmt_dispatch with - // the full context, handled by handleCMTDispatch. - // - // We still listen for workflow_run.completed on CMT as a defensive fallback to clean - // up any instances that somehow got tracked but were never reaped via /cleanup_e2e. - if strings.Contains(workflowName, "cmt") || strings.Contains(workflowName, "CMT") { - if payload.Action == "completed" { - logger.Debug("CMT trigger workflow completed; sha-based cleanup is a fallback for stuck instances") - s.handleCMTRunCleanup(repoName, headSHA, logger) - } else { - logger.WithField("action", payload.Action).Debug("Ignoring CMT workflow_run event; provisioning is driven by /cmt_dispatch") + // CMT: a lightweight, scheduled trigger workflow (Config.CMTTriggerWorkflowName, e.g. + // "CMT Provisioner") fires in the desktop/mobile repo. matterwick provisions one instance + // per version in Config.CMTVersions() and dispatches compatibility-matrix-testing.yml. + // No inputs are read from the event — the version set is hardcoded in matterwick config — + // so this works despite GitHub's workflow_run payload not carrying workflow_dispatch inputs. + // Cleanup happens when the test workflow ("Compatibility Matrix Testing") completes, handled + // by the isE2ETestWorkflow branch below. + if s.Config.CMTTriggerWorkflowName != "" && workflowName == s.Config.CMTTriggerWorkflowName { + if payload.Action == "requested" { + logger.Info("CMT trigger workflow started, provisioning E2E servers for configured versions") + go s.handleCMTTrigger(owner, repoName, headBranch, headSHA, runID, logger) } return } @@ -108,13 +105,9 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // --- Test workflow completion: clean up provisioned instances --- // // Cleanup is keyed by head_sha against tracking-key suffixes ({repo}-...-{sha}). - // We do not attempt to read mw_tracking_key from payload.WorkflowRun.Inputs: - // GitHub's workflow_run webhook payload does not include workflow_dispatch - // inputs (the "inputs" field is not in the workflow_run schema), so that field - // is always empty in production. SHA-based scanning is the canonical mechanism. - // The mw_tracking_key value is still set when dispatching the test workflow so - // it remains queryable via the GitHub REST API if a future code path wants it, - // but the webhook-driven cleanup intentionally does not depend on it. + // GitHub's workflow_run webhook payload does not include workflow_dispatch inputs, + // so SHA-based scanning is the canonical mechanism for matching provisioned instances + // (push, scheduled, and CMT tracking keys all end with "-{sha}"). if payload.Action == "completed" && s.isE2ETestWorkflow(workflowName) { logger.Info("Test workflow completed, cleaning up instances by SHA suffix match") s.findAndDestroyInstancesBySHA(repoName, headSHA, logger) @@ -189,10 +182,7 @@ func (s *Server) handleNightlyE2ETrigger(owner, repoName, branch, sha, triggerEv s.destroyE2EInstances(instances, logger) return } - // Pass the tracking key so the workflow_run completed handler can clean up by - // direct key lookup rather than SHA suffix matching (immune to new commits during - // the ~30 min instance-creation window). - dispatchErr = s.dispatchDesktopE2EWorkflow(owner, repoName, branch, sha, instanceDetailsJSON, runType, key, nightly) + dispatchErr = s.dispatchDesktopE2EWorkflow(owner, repoName, branch, sha, instanceDetailsJSON, runType, nightly) } else { if len(instances) < 3 { logger.Errorf("Expected 3 mobile instances, got %d", len(instances)) @@ -203,7 +193,7 @@ func (s *Server) handleNightlyE2ETrigger(owner, repoName, branch, sha, triggerEv return } dispatchErr = s.dispatchMobileE2EWorkflow(owner, repoName, branch, sha, - instances[0].URL, instances[1].URL, instances[2].URL, "both", runType, key) + instances[0].URL, instances[1].URL, instances[2].URL, "both", runType) } if dispatchErr != nil { @@ -271,6 +261,28 @@ func parseServerVersionsFromString(input string) []string { return versions } +// handleCMTTrigger is invoked when the scheduled CMT trigger workflow fires. It resolves the +// instance type from the repo, reads the hardcoded server-version set from config, and hands +// off to handleCMTWithServerVersions. The version list lives in matterwick (Config.CMTVersions), +// so nothing needs to be read from the workflow_run event. +func (s *Server) handleCMTTrigger(owner, repoName, branch, sha string, runID int64, logger logrus.FieldLogger) { + instanceType := "desktop" + if strings.Contains(repoName, "mobile") { + instanceType = "mobile" + } else if !strings.Contains(repoName, "desktop") { + logger.Warn("Repository is neither desktop nor mobile, skipping CMT trigger") + return + } + + versions := s.Config.CMTVersions() + logger.WithFields(logrus.Fields{ + "instanceType": instanceType, + "versions": versions, + }).Info("Provisioning CMT instances for configured server versions") + + s.handleCMTWithServerVersions(owner, repoName, instanceType, branch, sha, versions, runID, logger) +} + // handleCMTWithServerVersions orchestrates CMT testing: creates one instance per server // version, builds the CMT_MATRIX JSON, and dispatches compatibility-matrix-testing.yml once. func (s *Server) handleCMTWithServerVersions(repoOwner, repoName, instanceType, branch, sha string, serverVersions []string, runID int64, logger logrus.FieldLogger) { @@ -346,9 +358,7 @@ func (s *Server) handleCMTWithServerVersions(repoOwner, repoName, instanceType, return } - // Pass the tracking key so the workflow_run completed handler can clean up by - // direct key lookup rather than SHA suffix matching. - if err := s.dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType, key, runID, logger); err != nil { + if err := s.dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType, runID, logger); err != nil { logger.WithError(err).Error("Failed to dispatch compatibility-matrix-testing.yml") s.e2eInstancesLock.Lock() delete(s.e2eInstances, key) @@ -461,19 +471,19 @@ func buildMobileCMTMatrixJSON(versions []string, instances []*E2EInstance) (stri } // dispatchCMTWorkflow dispatches compatibility-matrix-testing.yml with the populated -// CMT_MATRIX JSON. trackingKey is the s.e2eInstances map key for this run; it is -// embedded as "mw_tracking_key" in the workflow inputs so the workflow_run completed -// handler can do a direct key lookup instead of fragile SHA suffix matching. -// runID is the CMT provisioner workflow run ID, passed as cmt_run_id so the test workflow -// can call back to Matterwick for instance cleanup. -func (s *Server) dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType, trackingKey string, runID int64, logger logrus.FieldLogger) error { +// CMT_MATRIX JSON. runID is the CMT trigger workflow run ID, passed as cmt_run_id purely +// for traceability/logging on the test workflow side. +// +// We intentionally do NOT pass a "mw_tracking_key" input: compatibility-matrix-testing.yml +// does not declare it, and GitHub rejects a workflow_dispatch carrying an undeclared input +// with a 422. Cleanup is driven by SHA-suffix matching when the test workflow completes. +func (s *Server) dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType string, runID int64, logger logrus.FieldLogger) error { ctx := context.Background() client := newGithubClient(s.Config.GithubAccessToken) workflowInputs := map[string]interface{}{ - "CMT_MATRIX": cmtMatrixJSON, - "cmt_run_id": fmt.Sprintf("%d", runID), - "mw_tracking_key": trackingKey, + "CMT_MATRIX": cmtMatrixJSON, + "cmt_run_id": fmt.Sprintf("%d", runID), } if instanceType == "desktop" { workflowInputs["DESKTOP_VERSION"] = branch @@ -589,18 +599,3 @@ func (s *Server) createCMTInstancesForVersion(repoName, instanceType, version, p logger.WithField("instanceCount", len(instances)).Info("Instances created for version") return instances, nil } - -// handleCMTRunCleanup is a best-effort fallback for CMT cleanup when the trigger workflow -// completes. Because the CMT trigger is a lightweight workflow that completes in seconds — -// well before the 30-minute provisioning goroutine stores instances — this function will -// most often find nothing. The primary cleanup path is findAndDestroyInstancesBySHA, -// triggered when compatibility-matrix-testing.yml completes. -func (s *Server) handleCMTRunCleanup(repoName, sha string, logger logrus.FieldLogger) { - logger = logger.WithFields(logrus.Fields{ - "repo": repoName, - "sha": sha, - "type": "cmt_cleanup_fallback", - }) - logger.Debug("CMT trigger completed — sha-based cleanup is the primary path") - s.findAndDestroyInstancesBySHA(repoName, sha, logger) -} From 83c0fe0c1283504293a477972b5f75e75638c5fa Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Mon, 1 Jun 2026 15:45:49 +0530 Subject: [PATCH 05/19] fix(e2e): scope SHA cleanup by flow so concurrent same-SHA runs aren't reaped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two flows fire on the same commit SHA (notably the monthly CMT trigger and the nightly trigger on the same master HEAD), the completing test workflow would destroy the other flow's still-active servers, because cleanup matched every {repo}-…-{sha} key. Scope cleanup to the completing workflow's flow: CMT test completions reap only {repo}-cmt-… keys, other E2E completions reap only non-CMT (push/scheduled) keys. We can't key on a run ID as CodeRabbit suggested: GitHub's workflow_run payload carries the *test* run's id (not the trigger run id embedded in the tracking key) and omits workflow_dispatch inputs, so head_sha + flow is the only viable correlation. Branch-advance SHA desync remains handled by the periodic orphan cleanup scan. Extracted instanceKeyMatchesSHA + added TestInstanceKeyMatchesSHA covering the collision. Co-Authored-By: Claude Opus 4.8 --- server/e2e_dryrun_test.go | 33 +++++++++++++++++++++++ server/workflow_run.go | 57 +++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index d778257..52f97b7 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -809,6 +809,39 @@ func TestDryRun_InstanceTracking(t *testing.T) { }) } +// ------------------------------------------------------------ +// 9b. SHA-scoped cleanup must not reap a concurrent flow on the same SHA +// ------------------------------------------------------------ + +func TestInstanceKeyMatchesSHA(t *testing.T) { + repo := "mattermost-mobile" + sha := "deadbeef" + cmtKey := fmt.Sprintf("%s-cmt-100-%s", repo, sha) // CMT flow + nightlyKey := fmt.Sprintf("%s-scheduled-200-%s", repo, sha) // nightly flow, same SHA + pushKey := fmt.Sprintf("%s-push-release-9.0-%s", repo, sha) // push flow, same SHA + prKey := fmt.Sprintf("%s-pr-42", repo) // PR flow, no -sha suffix + otherSHAKey := fmt.Sprintf("%s-cmt-100-%s", repo, "feedface") // CMT, different SHA + + t.Run("CMT completion matches only the CMT key", func(t *testing.T) { + assert.True(t, instanceKeyMatchesSHA(cmtKey, repo, sha, true)) + assert.False(t, instanceKeyMatchesSHA(nightlyKey, repo, sha, true), "nightly key must survive a CMT completion") + assert.False(t, instanceKeyMatchesSHA(pushKey, repo, sha, true)) + assert.False(t, instanceKeyMatchesSHA(prKey, repo, sha, true)) + assert.False(t, instanceKeyMatchesSHA(otherSHAKey, repo, sha, true), "different SHA must not match") + }) + + t.Run("non-CMT completion matches push/scheduled but not CMT", func(t *testing.T) { + assert.False(t, instanceKeyMatchesSHA(cmtKey, repo, sha, false), "CMT key must survive a nightly/push completion") + assert.True(t, instanceKeyMatchesSHA(nightlyKey, repo, sha, false)) + assert.True(t, instanceKeyMatchesSHA(pushKey, repo, sha, false)) + assert.False(t, instanceKeyMatchesSHA(prKey, repo, sha, false), "PR keys have no -sha suffix") + }) + + t.Run("other repo is never matched", func(t *testing.T) { + assert.False(t, instanceKeyMatchesSHA("mattermost-desktop-cmt-100-"+sha, repo, sha, true)) + }) +} + // ------------------------------------------------------------ // 10. Instance name length safety // ------------------------------------------------------------ diff --git a/server/workflow_run.go b/server/workflow_run.go index 4b063cd..dae7828 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -104,13 +104,23 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // --- Test workflow completion: clean up provisioned instances --- // - // Cleanup is keyed by head_sha against tracking-key suffixes ({repo}-...-{sha}). - // GitHub's workflow_run webhook payload does not include workflow_dispatch inputs, - // so SHA-based scanning is the canonical mechanism for matching provisioned instances - // (push, scheduled, and CMT tracking keys all end with "-{sha}"). + // Cleanup is correlated on head_sha. GitHub's workflow_run webhook payload does not + // include workflow_dispatch inputs, and its run id is the *test* run's id (not the + // trigger run id embedded in our tracking key), so head_sha is the only field we can + // match on (push, scheduled, and CMT tracking keys all end with "-{sha}"). + // + // Two flows can legitimately share a head SHA — e.g. the monthly CMT trigger and the + // nightly trigger both fire on the same master commit. They run different test + // workflows, so we scope cleanup to the completing workflow's flow (CMT vs. non-CMT) to + // avoid tearing down the other run's still-active servers. + // + // If the branch advances during provisioning, the dispatched run's head_sha can differ + // from the tracked SHA and no key will match here; such orphans are reaped by the + // periodic cleanupStaleNonPRE2EInstances scan (bounded by E2EInstanceMaxAge). if payload.Action == "completed" && s.isE2ETestWorkflow(workflowName) { - logger.Info("Test workflow completed, cleaning up instances by SHA suffix match") - s.findAndDestroyInstancesBySHA(repoName, headSHA, logger) + cmtOnly := workflowName == cmtTestWorkflowName + logger.WithField("cmt_only", cmtOnly).Info("Test workflow completed, cleaning up matching instances by SHA") + s.findAndDestroyInstancesBySHA(repoName, headSHA, cmtOnly, logger) return } @@ -219,23 +229,42 @@ func (s *Server) isE2ETestWorkflow(name string) bool { return false } -// findAndDestroyInstancesBySHA scans the instance map for entries belonging to repoName -// whose tracking key ends with "-{headSHA}" (push-event, scheduled, and cmt keys) and destroys them. -func (s *Server) findAndDestroyInstancesBySHA(repoName, headSHA string, logger logrus.FieldLogger) { +// cmtTestWorkflowName is the workflow "name:" of compatibility-matrix-testing.yml in the +// desktop and mobile repos. It identifies CMT test-workflow completions so SHA-based cleanup +// only reaps CMT instances (keys "{repo}-cmt-…-{sha}") and leaves a concurrent nightly/push +// run on the same SHA untouched (and vice versa). +const cmtTestWorkflowName = "Compatibility Matrix Testing" + +// instanceKeyMatchesSHA reports whether a tracking key belongs to repoName, ends with the given +// head SHA, and matches the requested flow: CMT keys ("{repo}-cmt-…") when cmtOnly is true, +// non-CMT keys (push/scheduled) when false. This scoping prevents a completing workflow from +// reaping a different flow's run that happens to share the same commit SHA (e.g. the monthly CMT +// trigger and the nightly trigger firing on the same master commit). +func instanceKeyMatchesSHA(key, repoName, headSHA string, cmtOnly bool) bool { + if !strings.HasPrefix(key, repoName+"-") || !strings.HasSuffix(key, "-"+headSHA) { + return false + } + return strings.HasPrefix(key, repoName+"-cmt-") == cmtOnly +} + +// findAndDestroyInstancesBySHA scans the instance map for entries belonging to repoName whose +// tracking key ends with "-{headSHA}" and destroys them. When cmtOnly is true only CMT keys +// ("{repo}-cmt-…") match; when false only non-CMT keys (push/scheduled) match — so a completing +// workflow never tears down a different flow's run that happens to share the same SHA. +func (s *Server) findAndDestroyInstancesBySHA(repoName, headSHA string, cmtOnly bool, logger logrus.FieldLogger) { if headSHA == "" { return } - prefix := repoName + "-" - suffix := "-" + headSHA s.e2eInstancesLock.Lock() var found []*E2EInstance var keysToDelete []string for key, instances := range s.e2eInstances { - if strings.HasPrefix(key, prefix) && strings.HasSuffix(key, suffix) { - found = append(found, instances...) - keysToDelete = append(keysToDelete, key) + if !instanceKeyMatchesSHA(key, repoName, headSHA, cmtOnly) { + continue } + found = append(found, instances...) + keysToDelete = append(keysToDelete, key) } for _, k := range keysToDelete { delete(s.e2eInstances, k) From a2293fdb17d1cfb3fac488ecc4b6fafb96a967e1 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Mon, 1 Jun 2026 15:52:23 +0530 Subject: [PATCH 06/19] chore(cmt): default CMT versions to latest ESR/feature patches (10.11.18, 11.7.1) Use the latest patch of the v10.11 ESR line and the v11.7 feature line (both verified present on Docker Hub) instead of the .0 GA placeholders, so CMT tests the currently-shipping builds. Keeps the code default in sync with the gitops config (mattermost/gitops-platform#72). Co-Authored-By: Claude Opus 4.8 --- config/config-matterwick.default.json | 2 +- server/config.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config-matterwick.default.json b/config/config-matterwick.default.json index 0d3621f..19effdb 100644 --- a/config/config-matterwick.default.json +++ b/config/config-matterwick.default.json @@ -85,5 +85,5 @@ "E2ETestWorkflowNames": ["Electron Playwright Tests", "E2E", "Compatibility Matrix Testing"], "E2EInstanceMaxAge": 6, "CMTTriggerWorkflowName": "CMT Provisioner", - "CMTServerVersions": ["10.11.0", "11.7.0"] + "CMTServerVersions": ["10.11.18", "11.7.1"] } diff --git a/server/config.go b/server/config.go index 97948d1..efefb03 100644 --- a/server/config.go +++ b/server/config.go @@ -140,7 +140,7 @@ type MatterwickConfig struct { // defaultCMTServerVersions is the fallback CMT version set used when Config.CMTServerVersions // is empty. Mattermost actively supports v11.x feature releases and the v10.11 ESR, so the // default covers the current ESR line plus a v11 release. Update as ESR/feature lines change. -var defaultCMTServerVersions = []string{"10.11.0", "11.7.0"} +var defaultCMTServerVersions = []string{"10.11.18", "11.7.1"} // CMTVersions returns the configured CMT server versions, or the hardcoded default set when // none are configured. From 8af9a48276a540b3f813203cabaa6d71f62dcb38 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Mon, 1 Jun 2026 18:34:21 +0530 Subject: [PATCH 07/19] feat(cmt): raise CMT version cap from 5 to 10 One cloud instance is provisioned per version, so 10 versions = 10 instances. Allows the full ESR-to-current version set to run in a single CMT run. Co-Authored-By: Claude Opus 4.8 --- server/e2e_dryrun_test.go | 13 +++++++------ server/workflow_run.go | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 52f97b7..c875995 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -532,13 +532,14 @@ func TestDryRun_DesktopCMT(t *testing.T) { assert.Equal(t, []string{"v11.1.0", "v11.2.0", "v12.0.0"}, versions) }) - t.Run("caps server versions to 5", func(t *testing.T) { - versions := parseServerVersionsFromString("v1, v2, v3, v4, v5, v6, v7") - // The cap is enforced inside handleCMTWithServerVersions - if len(versions) > 5 { - versions = versions[:5] + t.Run("caps server versions to 10", func(t *testing.T) { + versions := parseServerVersionsFromString("v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12") + // The cap is enforced inside handleCMTWithServerVersions (maxVersions = 10). + const maxVersions = 10 + if len(versions) > maxVersions { + versions = versions[:maxVersions] } - assert.Len(t, versions, 5) + assert.Len(t, versions, maxVersions) }) t.Run("1 instance per version for CMT (matrix handles parallelism)", func(t *testing.T) { diff --git a/server/workflow_run.go b/server/workflow_run.go index dae7828..0573350 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -315,8 +315,8 @@ func (s *Server) handleCMTTrigger(owner, repoName, branch, sha string, runID int // handleCMTWithServerVersions orchestrates CMT testing: creates one instance per server // version, builds the CMT_MATRIX JSON, and dispatches compatibility-matrix-testing.yml once. func (s *Server) handleCMTWithServerVersions(repoOwner, repoName, instanceType, branch, sha string, serverVersions []string, runID int64, logger logrus.FieldLogger) { - // Cap at 5 versions to prevent runaway provisioning - const maxVersions = 5 + // Cap the number of versions (one cloud instance per version) to prevent runaway provisioning. + const maxVersions = 10 if len(serverVersions) > maxVersions { logger.Warnf("Capping server versions from %d to %d", len(serverVersions), maxVersions) serverVersions = serverVersions[:maxVersions] From 48abfceb9fb27e70ebcf6e20dd6e7bec6b9f8d05 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Tue, 2 Jun 2026 13:46:28 +0530 Subject: [PATCH 08/19] feat(cmt): auto-derive CMT server versions from Mattermost releases Instead of a hardcoded list that needs monthly edits, matterwick now resolves the CMT version set at trigger time from the mattermost/mattermost GitHub releases: the active ESR line(s) + the latest 3 stable minor lines + the current release candidate, latest patch of each, deduped and capped at 10. - resolveCMTServerVersions() (server/e2e_tests.go): fresh fetch per CMT trigger (no cache; CMT runs rarely), per_page=100, parses tag_name/draft/prerelease/body. ESR lines detected via 'Extended Support Release' in the release body. RC included only when newer than the latest stable. Falls back to defaultCMTServerVersions on API error. Adds a small internal semver parser/comparator (no new dependency). - cmtServerVersions(): explicit Config.CMTServerVersions is an optional manual override; empty (normal) => auto-resolve. Removed the old Config.CMTVersions(). - handleCMTTrigger uses s.cmtServerVersions(). - Tests: auto-derived set, manual override (no API call), API-error fallback, RC exclusion when stale, and parseCMTVersion edge cases. Raised the cap test to 10. Co-Authored-By: Claude Opus 4.8 --- server/config.go | 23 ++--- server/e2e_dryrun_test.go | 95 +++++++++++++++++++- server/e2e_tests.go | 181 ++++++++++++++++++++++++++++++++++++++ server/workflow_run.go | 17 ++-- 4 files changed, 288 insertions(+), 28 deletions(-) diff --git a/server/config.go b/server/config.go index efefb03..20bd491 100644 --- a/server/config.go +++ b/server/config.go @@ -130,27 +130,18 @@ type MatterwickConfig struct { // version in CMTServerVersions and dispatches compatibility-matrix-testing.yml. CMTTriggerWorkflowName string - // CMTServerVersions is the hardcoded set of Mattermost server versions CMT runs - // against (e.g. the active ESR plus the current feature release). Values must be valid - // Mattermost image tags (full semver, no "v" prefix, e.g. "10.11.0"). Overridable via - // the gitops config; when empty matterwick falls back to defaultCMTServerVersions. + // CMTServerVersions is an OPTIONAL manual override for the CMT version set. When non-empty + // it is used verbatim (values must be valid Mattermost image tags: full semver, no "v" + // prefix, e.g. "10.11.0"). When empty (the normal case) matterwick auto-derives the set + // from Mattermost's GitHub releases via Server.resolveCMTServerVersions. CMTServerVersions []string } -// defaultCMTServerVersions is the fallback CMT version set used when Config.CMTServerVersions -// is empty. Mattermost actively supports v11.x feature releases and the v10.11 ESR, so the -// default covers the current ESR line plus a v11 release. Update as ESR/feature lines change. +// defaultCMTServerVersions is the fallback CMT version set used only when auto-resolution +// fails (GitHub API error) and no manual override is configured. Kept reasonably current: +// the active v10.11 ESR plus a recent v11 release. var defaultCMTServerVersions = []string{"10.11.18", "11.7.1"} -// CMTVersions returns the configured CMT server versions, or the hardcoded default set when -// none are configured. -func (c *MatterwickConfig) CMTVersions() []string { - if len(c.CMTServerVersions) > 0 { - return c.CMTServerVersions - } - return defaultCMTServerVersions -} - func findConfigFile(fileName string) string { if _, err := os.Stat("/tmp/" + fileName); err == nil { fileName, _ = filepath.Abs("/tmp/" + fileName) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index c875995..5019e2b 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -1408,13 +1408,100 @@ func TestDryRun_CMTVersionNormalization(t *testing.T) { assert.Equal(t, "11.1.0", s1["version"]) }) - t.Run("CMT versions capped at 5", func(t *testing.T) { - input := "v1.0.0, v2.0.0, v3.0.0, v4.0.0, v5.0.0, v6.0.0, v7.0.0" + t.Run("CMT versions capped at 10", func(t *testing.T) { + input := "v1.0.0, v2.0.0, v3.0.0, v4.0.0, v5.0.0, v6.0.0, v7.0.0, v8.0.0, v9.0.0, v10.0.0, v11.0.0, v12.0.0" parsed := parseServerVersionsFromString(input) - const maxVersions = 5 + const maxVersions = 10 if len(parsed) > maxVersions { parsed = parsed[:maxVersions] } - assert.Len(t, parsed, 5, "CMT versions must be capped at 5") + assert.Len(t, parsed, 10, "CMT versions must be capped at 10") + }) +} + +// ------------------------------------------------------------ +// 15. resolveCMTServerVersions() — auto-derived CMT version set +// ------------------------------------------------------------ + +func TestDryRun_ResolveCMTServerVersions(t *testing.T) { + // A realistic releases payload (newest first): an upcoming RC, recent stable minors, + // and ESR lines flagged in the body. Includes multiple patches per line and a draft. + releasesBody := `[ + {"tag_name":"v11.8.0-rc3","draft":false,"prerelease":true,"body":"Mattermost Platform Release 11.8.0-rc3"}, + {"tag_name":"v11.8.0-rc2","draft":false,"prerelease":true,"body":"rc"}, + {"tag_name":"v11.7.2","draft":false,"prerelease":false,"body":"Mattermost Platform Extended Support Release 11.7.2 contains fixes."}, + {"tag_name":"v11.7.1","draft":false,"prerelease":false,"body":"Mattermost Platform Extended Support Release 11.7.1"}, + {"tag_name":"v11.6.4","draft":false,"prerelease":false,"body":"Mattermost Platform Release 11.6.4"}, + {"tag_name":"v11.6.3","draft":false,"prerelease":false,"body":"Mattermost Platform Release 11.6.3"}, + {"tag_name":"v11.5.7","draft":false,"prerelease":false,"body":"Mattermost Platform Release 11.5.7"}, + {"tag_name":"v11.99.0","draft":true,"prerelease":false,"body":"draft should be ignored"}, + {"tag_name":"v10.11.19","draft":false,"prerelease":false,"body":"Mattermost Platform Extended Support Release 10.11.19 contains security fixes."}, + {"tag_name":"v10.11.18","draft":false,"prerelease":false,"body":"Mattermost Platform Extended Support Release 10.11.18"} + ]` + + t.Run("auto-derives ESR + latest 3 minors + current RC, latest patch each", func(t *testing.T) { + srv := mockReleasesServer(t, releasesBody, http.StatusOK) + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + got := s.resolveCMTServerVersions() + // 10.11.19 (ESR) + 11.5.7/11.6.4/11.7.2 (latest 3 minors; 11.7 also ESR) + 11.8.0-rc3 (RC), + // latest patch per line, v-stripped, ascending. + assert.Equal(t, []string{"10.11.19", "11.5.7", "11.6.4", "11.7.2", "11.8.0-rc3"}, got) + }) + + t.Run("explicit config override is returned verbatim, no API call", func(t *testing.T) { + called := false + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusInternalServerError) + })) + t.Cleanup(srv.Close) + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + s.Config.CMTServerVersions = []string{"9.11.0", "10.5.0"} + + assert.Equal(t, []string{"9.11.0", "10.5.0"}, s.cmtServerVersions()) + assert.False(t, called, "manual override must not hit the GitHub API") + }) + + t.Run("API error falls back to defaultCMTServerVersions", func(t *testing.T) { + srv := mockReleasesServer(t, "boom", http.StatusInternalServerError) + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + assert.Equal(t, defaultCMTServerVersions, s.resolveCMTServerVersions()) + }) + + t.Run("RC omitted when not newer than latest stable", func(t *testing.T) { + // Only stable releases here; an old RC for an already-released line must not appear. + body := `[ + {"tag_name":"v11.7.2","draft":false,"prerelease":false,"body":"Mattermost Platform Extended Support Release 11.7.2"}, + {"tag_name":"v11.7.0-rc1","draft":false,"prerelease":true,"body":"rc"}, + {"tag_name":"v11.6.4","draft":false,"prerelease":false,"body":"Mattermost Platform Release 11.6.4"}, + {"tag_name":"v11.5.7","draft":false,"prerelease":false,"body":"Mattermost Platform Release 11.5.7"} + ]` + srv := mockReleasesServer(t, body, http.StatusOK) + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + got := s.resolveCMTServerVersions() + assert.Equal(t, []string{"11.5.7", "11.6.4", "11.7.2"}, got, "stale RC must be excluded") + }) + + t.Run("parseCMTVersion handles stable, rc, and v-prefix; rejects junk", func(t *testing.T) { + v, ok := parseCMTVersion("v11.8.0-rc3") + assert.True(t, ok) + assert.Equal(t, "11.8.0-rc3", v.raw) + assert.Equal(t, 3, v.rc) + v2, ok2 := parseCMTVersion("10.11.19") + assert.True(t, ok2) + assert.Equal(t, 0, v2.rc) + _, ok3 := parseCMTVersion("v11.7.0-beta.1") + assert.False(t, ok3, "non-rc prerelease suffixes are not CMT versions") + _, ok4 := parseCMTVersion("not-a-version") + assert.False(t, ok4) + // stable sorts above its rc for the same X.Y.Z + assert.True(t, v.less(v2) == false) }) } diff --git a/server/e2e_tests.go b/server/e2e_tests.go index 6fca1b6..3c5c68b 100644 --- a/server/e2e_tests.go +++ b/server/e2e_tests.go @@ -9,6 +9,8 @@ import ( "fmt" "net/url" "os" + "sort" + "strconv" "strings" "sync" "time" @@ -847,6 +849,185 @@ func (s *Server) resolveE2EServerVersion() string { return "master" } +// cmtVersion is a parsed Mattermost release version: major.minor.patch with an optional +// release-candidate number. raw is the bare-semver string passed to the cloud provisioner +// (e.g. "11.7.1" or "11.8.0-rc3"). +type cmtVersion struct { + major, minor, patch int + rc int // 0 = stable, >0 = -rcN + raw string +} + +// parseCMTVersion parses "vX.Y.Z" or "vX.Y.Z-rcN" (the leading "v" is optional). It returns +// ok=false for anything else (other prerelease suffixes like -beta/-alpha are ignored for CMT). +func parseCMTVersion(tag string) (cmtVersion, bool) { + raw := strings.TrimPrefix(strings.TrimSpace(tag), "v") + base := raw + rc := 0 + if i := strings.Index(base, "-rc"); i != -1 { + n, err := strconv.Atoi(base[i+len("-rc"):]) + if err != nil { + return cmtVersion{}, false + } + rc = n + base = base[:i] + } else if strings.Contains(base, "-") { + return cmtVersion{}, false + } + parts := strings.Split(base, ".") + if len(parts) != 3 { + return cmtVersion{}, false + } + maj, err1 := strconv.Atoi(parts[0]) + min, err2 := strconv.Atoi(parts[1]) + pat, err3 := strconv.Atoi(parts[2]) + if err1 != nil || err2 != nil || err3 != nil { + return cmtVersion{}, false + } + return cmtVersion{major: maj, minor: min, patch: pat, rc: rc, raw: raw}, true +} + +// less reports whether a sorts before b by (major, minor, patch, rc). For the same X.Y.Z, a +// stable release (rc==0) sorts above its release candidates (e.g. 11.8.0-rc3 < 11.8.0). +func (a cmtVersion) less(b cmtVersion) bool { + if a.major != b.major { + return a.major < b.major + } + if a.minor != b.minor { + return a.minor < b.minor + } + if a.patch != b.patch { + return a.patch < b.patch + } + ar, br := a.rc, b.rc + if ar == 0 { + ar = int(^uint(0) >> 1) // treat stable as the highest "rc" for the same patch + } + if br == 0 { + br = int(^uint(0) >> 1) + } + return ar < br +} + +// cmtServerVersions returns the version set CMT runs against. An explicit, non-empty +// Config.CMTServerVersions is used verbatim (manual override / pin); otherwise the set is +// auto-derived from the Mattermost GitHub releases. +func (s *Server) cmtServerVersions() []string { + if len(s.Config.CMTServerVersions) > 0 { + return s.Config.CMTServerVersions + } + return s.resolveCMTServerVersions() +} + +// resolveCMTServerVersions auto-derives the CMT version set from the mattermost/mattermost +// GitHub releases: the active ESR line(s) + the latest 3 stable minor lines + the current +// release candidate, each as the latest patch of its line. ESR lines are detected from the +// release notes ("Extended Support Release"). It is called once per CMT trigger (rare: +// monthly schedule + occasional manual dispatch), so it fetches fresh with no caching. +// Falls back to defaultCMTServerVersions on error or if nothing parses. +func (s *Server) resolveCMTServerVersions() []string { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + client := newGithubClient(s.Config.GithubAccessToken) + // githubAPIBase is only set in tests to redirect to a mock server. + if s.githubAPIBase != "" { + if baseURL, parseErr := url.Parse(s.githubAPIBase); parseErr == nil { + client.BaseURL = baseURL + } + } + + var releases []struct { + TagName string `json:"tag_name"` + Draft bool `json:"draft"` + Prerelease bool `json:"prerelease"` + Body string `json:"body"` + } + req, err := client.NewRequest("GET", "/repos/mattermost/mattermost/releases?per_page=100", nil) + if err != nil { + s.Logger.WithError(err).Warn("[resolveCMTServerVersions] Failed to build request; using default CMT versions") + return defaultCMTServerVersions + } + if _, err = client.Do(ctx, req, &releases); err != nil { + s.Logger.WithError(err).Warn("[resolveCMTServerVersions] Failed to fetch releases; using default CMT versions") + return defaultCMTServerVersions + } + + type minorKey struct{ major, minor int } + latestStable := map[minorKey]cmtVersion{} + esrMinors := map[minorKey]bool{} + var bestRC cmtVersion + haveRC := false + + for _, r := range releases { + if r.Draft { + continue + } + v, ok := parseCMTVersion(r.TagName) + if !ok { + continue + } + key := minorKey{v.major, v.minor} + if v.rc > 0 { + if !haveRC || bestRC.less(v) { + bestRC = v + haveRC = true + } + continue + } + if cur, exists := latestStable[key]; !exists || cur.less(v) { + latestStable[key] = v + } + if strings.Contains(strings.ToLower(r.Body), "extended support release") { + esrMinors[key] = true + } + } + + if len(latestStable) == 0 { + s.Logger.Warn("[resolveCMTServerVersions] No stable releases parsed; using default CMT versions") + return defaultCMTServerVersions + } + + // All stable minor lines, sorted descending (newest first). + minors := make([]cmtVersion, 0, len(latestStable)) + for _, v := range latestStable { + minors = append(minors, v) + } + sort.Slice(minors, func(i, j int) bool { return minors[j].less(minors[i]) }) + + selected := map[minorKey]cmtVersion{} + for i := 0; i < len(minors) && i < 3; i++ { // latest 3 stable minor lines + selected[minorKey{minors[i].major, minors[i].minor}] = minors[i] + } + for k := range esrMinors { // active ESR line(s) + if v, ok := latestStable[k]; ok { + selected[k] = v + } + } + + chosen := make([]cmtVersion, 0, len(selected)+1) + for _, v := range selected { + chosen = append(chosen, v) + } + // Include the current RC only when it's newer than the newest stable (an upcoming release). + if haveRC && minors[0].less(bestRC) { + chosen = append(chosen, bestRC) + } + sort.Slice(chosen, func(i, j int) bool { return chosen[i].less(chosen[j]) }) // ascending + + const maxVersions = 10 + if len(chosen) > maxVersions { + chosen = chosen[len(chosen)-maxVersions:] // keep the newest if somehow over the cap + } + + versions := make([]string, 0, len(chosen)) + for _, v := range chosen { + versions = append(versions, v.raw) + } + s.Logger.WithField("versions", versions).Info("[resolveCMTServerVersions] Auto-derived CMT server version set") + return versions +} + // destroyE2EInstances destroys all given E2E instances func (s *Server) destroyE2EInstances(instances []*E2EInstance, logger logrus.FieldLogger) { for _, instance := range instances { diff --git a/server/workflow_run.go b/server/workflow_run.go index 0573350..e9502ea 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -80,9 +80,10 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // CMT: a lightweight, scheduled trigger workflow (Config.CMTTriggerWorkflowName, e.g. // "CMT Provisioner") fires in the desktop/mobile repo. matterwick provisions one instance - // per version in Config.CMTVersions() and dispatches compatibility-matrix-testing.yml. - // No inputs are read from the event — the version set is hardcoded in matterwick config — - // so this works despite GitHub's workflow_run payload not carrying workflow_dispatch inputs. + // per version in the resolved CMT set (s.cmtServerVersions) and dispatches + // compatibility-matrix-testing.yml. No inputs are read from the event — the version set is + // resolved by matterwick — so this works despite GitHub's workflow_run payload not carrying + // workflow_dispatch inputs. // Cleanup happens when the test workflow ("Compatibility Matrix Testing") completes, handled // by the isE2ETestWorkflow branch below. if s.Config.CMTTriggerWorkflowName != "" && workflowName == s.Config.CMTTriggerWorkflowName { @@ -291,9 +292,9 @@ func parseServerVersionsFromString(input string) []string { } // handleCMTTrigger is invoked when the scheduled CMT trigger workflow fires. It resolves the -// instance type from the repo, reads the hardcoded server-version set from config, and hands -// off to handleCMTWithServerVersions. The version list lives in matterwick (Config.CMTVersions), -// so nothing needs to be read from the workflow_run event. +// instance type from the repo, resolves the server-version set (auto-derived from Mattermost +// releases, or the manual Config.CMTServerVersions override), and hands off to +// handleCMTWithServerVersions. Nothing needs to be read from the workflow_run event. func (s *Server) handleCMTTrigger(owner, repoName, branch, sha string, runID int64, logger logrus.FieldLogger) { instanceType := "desktop" if strings.Contains(repoName, "mobile") { @@ -303,11 +304,11 @@ func (s *Server) handleCMTTrigger(owner, repoName, branch, sha string, runID int return } - versions := s.Config.CMTVersions() + versions := s.cmtServerVersions() logger.WithFields(logrus.Fields{ "instanceType": instanceType, "versions": versions, - }).Info("Provisioning CMT instances for configured server versions") + }).Info("Provisioning CMT instances for resolved server versions") s.handleCMTWithServerVersions(owner, repoName, instanceType, branch, sha, versions, runID, logger) } From 51fb2398f68d34f1792b1c0c3f601c4c8281518c Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 5 Jun 2026 00:44:09 +0530 Subject: [PATCH 09/19] Fix non-PR E2E cleanup: key on dispatch-time branch HEAD SHA Non-PR flows (CMT, nightly/scheduled, push/release) provision instances keyed by the trigger SHA, then dispatch the test workflow with ref=branch (workflow_dispatch cannot take a commit SHA). GitHub records the dispatched run's head_sha as the branch HEAD at dispatch time, which can differ from the trigger SHA when the branch advances during the (often long) provisioning window. Completion cleanup matches instances by the run's head_sha, so a moved branch made the primary SHA-match miss its instances. Resolve the branch HEAD via the GitHub commits API right before dispatch and key the tracking entry on that SHA so it aligns with the run's head_sha. Falls back to the trigger SHA on error; the periodic age-based scan remains the backstop either way. Co-Authored-By: Claude Opus 4.8 --- server/e2e_dryrun_test.go | 51 +++++++++++++++++++++++++++++++++++++++ server/e2e_tests.go | 36 +++++++++++++++++++++++++++ server/push_events.go | 14 ++++++++++- server/workflow_run.go | 31 ++++++++++++++++++------ 4 files changed, 123 insertions(+), 9 deletions(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 5019e2b..1bab1a5 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -1505,3 +1505,54 @@ func TestDryRun_ResolveCMTServerVersions(t *testing.T) { assert.True(t, v.less(v2) == false) }) } + +// TestResolveBranchHeadSHA verifies the dispatch-time HEAD resolution used to key non-PR +// cleanup. Non-PR flows dispatch the test workflow with ref=branch, so the run's head_sha is +// the branch HEAD at dispatch time. We key cleanup on that resolved SHA (not the trigger SHA) +// so findAndDestroyInstancesBySHA matches when the run completes. +func TestResolveBranchHeadSHA(t *testing.T) { + t.Run("returns the branch HEAD sha from the commits API", func(t *testing.T) { + var gotPath string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"sha":"abc123def456"}`)) + })) + t.Cleanup(srv.Close) + + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + sha, err := s.resolveBranchHeadSHA("mattermost", "desktop", "release-12.0") + assert.NoError(t, err) + assert.Equal(t, "abc123def456", sha) + assert.Equal(t, "/repos/mattermost/desktop/commits/release-12.0", gotPath) + }) + + t.Run("errors on non-2xx so caller can fall back to trigger sha", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + t.Cleanup(srv.Close) + + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + _, err := s.resolveBranchHeadSHA("mattermost", "desktop", "no-such-branch") + assert.Error(t, err) + }) + + t.Run("errors on empty sha so caller can fall back to trigger sha", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"sha":""}`)) + })) + t.Cleanup(srv.Close) + + s := newDryRunServer(t, "", "mattermost") + s.githubAPIBase = srv.URL + "/" + + _, err := s.resolveBranchHeadSHA("mattermost", "desktop", "main") + assert.Error(t, err) + }) +} diff --git a/server/e2e_tests.go b/server/e2e_tests.go index 3c5c68b..4b6bc81 100644 --- a/server/e2e_tests.go +++ b/server/e2e_tests.go @@ -849,6 +849,42 @@ func (s *Server) resolveE2EServerVersion() string { return "master" } +// resolveBranchHeadSHA returns the current HEAD commit SHA of branch via the GitHub API. +// +// Non-PR E2E flows (push, nightly, CMT) provision instances and then dispatch the test +// workflow with `ref: ` (workflow_dispatch cannot take a commit SHA). GitHub records +// the dispatched run's head_sha as the branch HEAD at dispatch time, which can differ from the +// SHA that triggered provisioning if the branch advanced during the (often long) provisioning +// window. Cleanup keys instances by SHA and matches on the completed run's head_sha, so we key +// on the branch HEAD resolved at dispatch time to keep them aligned. Callers fall back to the +// trigger SHA on error (the periodic age-based scan remains the backstop either way). +func (s *Server) resolveBranchHeadSHA(owner, repoName, branch string) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + client := newGithubClient(s.Config.GithubAccessToken) + if s.githubAPIBase != "" { + if baseURL, parseErr := url.Parse(s.githubAPIBase); parseErr == nil { + client.BaseURL = baseURL + } + } + + var commit struct { + SHA string `json:"sha"` + } + req, err := client.NewRequest("GET", fmt.Sprintf("/repos/%s/%s/commits/%s", owner, repoName, branch), nil) + if err != nil { + return "", err + } + if _, err := client.Do(ctx, req, &commit); err != nil { + return "", err + } + if commit.SHA == "" { + return "", fmt.Errorf("empty SHA for %s/%s@%s", owner, repoName, branch) + } + return commit.SHA, nil +} + // cmtVersion is a parsed Mattermost release version: major.minor.patch with an optional // release-candidate number. raw is the bare-semver string passed to the cloud provisioner // (e.g. "11.7.1" or "11.8.0-rc3"). diff --git a/server/push_events.go b/server/push_events.go index bfd49d8..34023b2 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -133,9 +133,21 @@ func (s *Server) handlePushEventE2E(event *github.PushEvent, branch string) { logger.WithField("instanceCount", len(instances)).Info("E2E instances created successfully") + // Key on the branch HEAD resolved now (just before dispatch), not the push SHA: the + // dispatched (ref=branch) run reports its head_sha as the branch HEAD at dispatch time, + // which can differ from the push SHA if the branch advanced during provisioning. The key + // still ends with "-{sha}" so findAndDestroyInstancesBySHA matches it by suffix on + // completion. Fall back to the push SHA on error (the periodic scan remains the backstop). + cleanupSHA := sha + if resolved, resErr := s.resolveBranchHeadSHA(s.Config.Org, repoName, branch); resErr == nil && resolved != "" { + cleanupSHA = resolved + } else if resErr != nil { + logger.WithError(resErr).Warn("Failed to resolve branch HEAD SHA; keying cleanup on push SHA (periodic scan remains the backstop)") + } + // Store instances before dispatching so a fast-completing workflow_run event // doesn't race ahead and find nothing to clean up. - key := fmt.Sprintf("%s-push-%s-%s", repoName, branch, sha) + key := fmt.Sprintf("%s-push-%s-%s", repoName, branch, cleanupSHA) s.e2eInstancesLock.Lock() s.e2eInstances[key] = instances s.e2eInstancesLock.Unlock() diff --git a/server/workflow_run.go b/server/workflow_run.go index e9502ea..f1ad391 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -157,10 +157,17 @@ func (s *Server) handleNightlyE2ETrigger(owner, repoName, branch, sha, triggerEv return } - // Include runID so two trigger runs against the same SHA (e.g. manual re-trigger) - // get separate tracking keys. The key still ends with "-{sha}" so - // findAndDestroyInstancesBySHA continues to match it by suffix. - key := fmt.Sprintf("%s-scheduled-%d-%s", repoName, runID, sha) + // Key on the branch HEAD resolved now (just before dispatch), not the trigger SHA: the + // dispatched (ref=branch) run reports its head_sha as the branch HEAD at dispatch time, + // which can differ from the trigger SHA after provisioning. runID keeps the key unique; + // it still ends with "-{sha}" so findAndDestroyInstancesBySHA matches it by suffix. + cleanupSHA := sha + if resolved, resErr := s.resolveBranchHeadSHA(owner, repoName, branch); resErr == nil && resolved != "" { + cleanupSHA = resolved + } else if resErr != nil { + logger.WithError(resErr).Warn("Failed to resolve branch HEAD SHA; keying cleanup on trigger SHA (periodic scan remains the backstop)") + } + key := fmt.Sprintf("%s-scheduled-%d-%s", repoName, runID, cleanupSHA) s.e2eInstancesLock.Lock() s.e2eInstances[key] = instances s.e2eInstancesLock.Unlock() @@ -363,10 +370,18 @@ func (s *Server) handleCMTWithServerVersions(repoOwner, repoName, instanceType, logger.WithField("totalInstances", len(allInstances)).Info("CMT instances created, tracking for cleanup") - // Track by runID+sha: runID prevents collision when two dispatches share the same - // branch HEAD SHA; the key still ends with "-{sha}" so findAndDestroyInstancesBySHA - // can locate it when compatibility-matrix-testing.yml completes (hours later). - key := fmt.Sprintf("%s-cmt-%d-%s", repoName, runID, sha) + // Key on the branch HEAD resolved now (just before dispatch), not the trigger SHA: the + // dispatched (ref=branch) run reports its head_sha as the branch HEAD at dispatch time, + // which can differ from the trigger SHA after the long provisioning window. runID keeps + // the key unique; it still ends with "-{sha}" so findAndDestroyInstancesBySHA matches it + // when compatibility-matrix-testing.yml completes. + cleanupSHA := sha + if resolved, resErr := s.resolveBranchHeadSHA(repoOwner, repoName, branch); resErr == nil && resolved != "" { + cleanupSHA = resolved + } else if resErr != nil { + logger.WithError(resErr).Warn("Failed to resolve branch HEAD SHA; keying cleanup on trigger SHA (periodic scan remains the backstop)") + } + key := fmt.Sprintf("%s-cmt-%d-%s", repoName, runID, cleanupSHA) s.e2eInstancesLock.Lock() s.e2eInstances[key] = allInstances s.e2eInstancesLock.Unlock() From 26da5ace200c3e9a1dfbede12ab4d14158b29050 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 5 Jun 2026 00:55:42 +0530 Subject: [PATCH 10/19] Auto-expire PR E2E instances and evict their tracking entries PR E2E instances had no completion-based teardown and were explicitly skipped by the periodic scan, so a long-open PR could keep its servers alive indefinitely (only PR-close or the E2E/Reset-Servers label tore them down). Extend the periodic scan to also reap PR instances past a longer, separate max-age (E2EPRInstanceMaxAge, default 24h vs 3h for non-PR), and evict the reaped PR's in-memory tracking entry. Eviction is what makes a re-applied E2E/Run provision a fresh set: with the stale entry gone, the reuse path in handleE2ETestRequest finds no live instances and creates new servers instead of dispatching against deleted ones. Renames cleanupStaleNonPRE2EInstances -> cleanupStaleE2EInstances since it now handles both kinds. Co-Authored-By: Claude Opus 4.8 --- config/config-matterwick.default.json | 1 + server/config.go | 8 +++ server/e2e_dryrun_test.go | 58 ++++++++++++++++ server/e2e_tests.go | 97 ++++++++++++++++++++++----- server/server.go | 15 +++-- server/workflow_run.go | 2 +- 6 files changed, 158 insertions(+), 23 deletions(-) diff --git a/config/config-matterwick.default.json b/config/config-matterwick.default.json index 19effdb..b07e265 100644 --- a/config/config-matterwick.default.json +++ b/config/config-matterwick.default.json @@ -84,6 +84,7 @@ "E2ENightlyTriggerWorkflowName": "E2E Nightly Trigger", "E2ETestWorkflowNames": ["Electron Playwright Tests", "E2E", "Compatibility Matrix Testing"], "E2EInstanceMaxAge": 6, + "E2EPRInstanceMaxAge": 24, "CMTTriggerWorkflowName": "CMT Provisioner", "CMTServerVersions": ["10.11.18", "11.7.1"] } diff --git a/server/config.go b/server/config.go index 20bd491..7bb7b0f 100644 --- a/server/config.go +++ b/server/config.go @@ -124,6 +124,14 @@ type MatterwickConfig struct { // Default (0): 3 hours. E2EInstanceMaxAge int + // E2EPRInstanceMaxAge is the maximum age (in hours) a PR E2E instance may reach before + // the periodic cleanup scan deletes it. PR instances are intentionally kept alive between + // label toggles and across commits so the same servers can be reused for re-runs, so this + // is much longer than E2EInstanceMaxAge. When such an instance is reaped its in-memory + // tracking entry is also evicted, so re-applying E2E/Run provisions a fresh set. + // Default (0): 24 hours. + E2EPRInstanceMaxAge int + // CMTTriggerWorkflowName is the workflow name (the "name:" field) of the lightweight, // scheduled CMT trigger workflow in the desktop/mobile repos. When matterwick receives // a workflow_run "requested" event for this workflow it provisions one instance per diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 1bab1a5..0559b40 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -26,6 +26,7 @@ import ( "strings" "sync" "testing" + "time" gogithub "github.com/google/go-github/v32/github" "github.com/mattermost/matterwick/model" @@ -1556,3 +1557,60 @@ func TestResolveBranchHeadSHA(t *testing.T) { assert.Error(t, err) }) } + +// TestE2EPRInstanceMaxAge verifies the PR max-age knob: configured value wins, else 24h default. +func TestE2EPRInstanceMaxAge(t *testing.T) { + s := newDryRunServer(t, "", "mattermost") + + s.Config.E2EPRInstanceMaxAge = 0 + assert.Equal(t, 24*time.Hour, s.e2ePRInstanceMaxAge(), "0 should fall back to 24h default") + + s.Config.E2EPRInstanceMaxAge = 48 + assert.Equal(t, 48*time.Hour, s.e2ePRInstanceMaxAge(), "configured value should win") +} + +// TestEvictReapedPRInstances verifies that when the periodic scan reaps a PR's servers, the +// in-memory tracking entry is removed so the next E2E/Run provisions a fresh set rather than +// reusing now-deleted servers. Non-PR (SHA-keyed) entries must be left untouched. +func TestEvictReapedPRInstances(t *testing.T) { + t.Run("evicts the PR key when any of its instances was reaped", func(t *testing.T) { + s := newDryRunServer(t, "", "mattermost") + s.e2eInstances["desktop-pr-42"] = []*E2EInstance{ + {InstallationID: "inst-a", Platform: "linux"}, + {InstallationID: "inst-b", Platform: "macos"}, + {InstallationID: "inst-c", Platform: "windows"}, + } + + // Only one member reaped, but the whole set ages out together, so the key goes. + s.evictReapedPRInstances([]string{"inst-b"}, s.Logger) + + _, ok := s.e2eInstances["desktop-pr-42"] + assert.False(t, ok, "PR key must be evicted so re-applying E2E/Run creates a fresh set") + }) + + t.Run("leaves unrelated PR keys and non-PR (SHA-keyed) entries intact", func(t *testing.T) { + s := newDryRunServer(t, "", "mattermost") + s.e2eInstances["desktop-pr-42"] = []*E2EInstance{{InstallationID: "inst-a"}} + s.e2eInstances["mattermost-mobile-pr-9"] = []*E2EInstance{{InstallationID: "inst-x"}} + s.e2eInstances["desktop-cmt-555-deadbeef"] = []*E2EInstance{{InstallationID: "inst-cmt"}} + + s.evictReapedPRInstances([]string{"inst-a"}, s.Logger) + + _, gone := s.e2eInstances["desktop-pr-42"] + assert.False(t, gone, "the matching PR key is evicted") + _, otherPR := s.e2eInstances["mattermost-mobile-pr-9"] + assert.True(t, otherPR, "an unrelated PR key must remain") + _, cmt := s.e2eInstances["desktop-cmt-555-deadbeef"] + assert.True(t, cmt, "a non-PR (SHA-keyed) entry must remain") + }) + + t.Run("no reaped IDs is a no-op", func(t *testing.T) { + s := newDryRunServer(t, "", "mattermost") + s.e2eInstances["desktop-pr-42"] = []*E2EInstance{{InstallationID: "inst-a"}} + + s.evictReapedPRInstances(nil, s.Logger) + + _, ok := s.e2eInstances["desktop-pr-42"] + assert.True(t, ok, "nothing reaped means nothing evicted") + }) +} diff --git a/server/e2e_tests.go b/server/e2e_tests.go index 4b6bc81..defff0e 100644 --- a/server/e2e_tests.go +++ b/server/e2e_tests.go @@ -703,14 +703,38 @@ func (s *Server) e2eInstanceMaxAge() time.Duration { return 3 * time.Hour } -// PR instances (identified by "-pr-" in their OwnerID) are always skipped — handleE2ECleanup -// on PR close manages their lifecycle via cloud-API orphan scan. -func (s *Server) cleanupStaleNonPRE2EInstances() { - maxAge := s.e2eInstanceMaxAge() +// e2ePRInstanceMaxAge returns the maximum age a PR E2E instance may reach before the periodic +// scan deletes it. PR instances are reused across label toggles and commits, so this is much +// longer than e2eInstanceMaxAge. Falls back to 24 hours when the config value is 0 (unset). +func (s *Server) e2ePRInstanceMaxAge() time.Duration { + if s.Config.E2EPRInstanceMaxAge > 0 { + return time.Duration(s.Config.E2EPRInstanceMaxAge) * time.Hour + } + return 24 * time.Hour +} + +// cleanupStaleE2EInstances destroys aged-out E2E instances of both kinds: +// - non-PR instances (CMT/nightly/push) past e2eInstanceMaxAge — backstop for the primary +// SHA-matched completion cleanup. +// - PR instances past e2ePRInstanceMaxAge — PR instances have no completion-based teardown +// (they are deliberately kept alive for reuse), so this age cap stops long-open PRs from +// accumulating servers indefinitely. When a PR instance is reaped, its in-memory tracking +// entry is also evicted so re-applying E2E/Run provisions a fresh set instead of dispatching +// against a now-deleted server. +func (s *Server) cleanupStaleE2EInstances() { + nonPRMaxAge := s.e2eInstanceMaxAge() + prMaxAge := s.e2ePRInstanceMaxAge() logger := s.Logger.WithField("type", "periodic_e2e_cleanup") - logger.WithField("max_age_hours", maxAge.Hours()).Info("Scanning for stale non-PR E2E instances") + logger.WithFields(logrus.Fields{ + "non_pr_max_age_hours": nonPRMaxAge.Hours(), + "pr_max_age_hours": prMaxAge.Hours(), + }).Info("Scanning for stale E2E instances") + + now := time.Now() + nonPRCutoffMs := now.Add(-nonPRMaxAge).UnixMilli() + prCutoffMs := now.Add(-prMaxAge).UnixMilli() - cutoffMs := time.Now().Add(-maxAge).UnixMilli() + var reapedPRInstallationIDs []string for _, instanceType := range []string{"desktop", "mobile"} { pattern := instanceType + "-%" @@ -731,18 +755,22 @@ func (s *Server) cleanupStaleNonPRE2EInstances() { continue } - // PR instances have "-pr-" in their OwnerID (e.g. "mobile-pr-123-site-1-..."). - // Skip them — handleE2ECleanup on PR close manages their lifecycle. - if strings.Contains(inst.OwnerID, "-pr-") { - continue + // PR instances have "-pr-" in their OwnerID (e.g. "mobile-pr-123-site-1-...") + // and use the longer PR max-age; everything else uses the non-PR max-age. + isPR := strings.Contains(inst.OwnerID, "-pr-") + cutoffMs := nonPRCutoffMs + if isPR { + cutoffMs = prCutoffMs } - // Skip instances younger than maxAge — a test may still be using them. + // Skip instances younger than their max age — a test may still be using them, + // or (for PRs) the servers are being kept alive for reuse. if inst.CreateAt > cutoffMs { logger.WithFields(logrus.Fields{ "installation_id": inst.ID, "owner_id": inst.OwnerID, - }).Debug("Skipping non-PR instance younger than max age (may still be in use)") + "is_pr": isPR, + }).Debug("Skipping instance younger than its max age") continue } @@ -760,15 +788,54 @@ func (s *Server) cleanupStaleNonPRE2EInstances() { "installation_id": inst.ID, "owner_id": inst.OwnerID, "state": inst.State, + "is_pr": isPR, }) - instLogger.Warn("Destroying stale non-PR E2E instance") + instLogger.Warn("Destroying stale E2E instance") if err := s.CloudClient.DeleteInstallation(inst.ID); err != nil { - instLogger.WithError(err).Error("Failed to destroy stale non-PR E2E instance") + instLogger.WithError(err).Error("Failed to destroy stale E2E instance") + continue + } + if isPR { + reapedPRInstallationIDs = append(reapedPRInstallationIDs, inst.ID) } } } - logger.Info("Non-PR E2E instance cleanup scan complete") + // Evict in-memory PR tracking entries whose servers were just reaped so the reuse path + // in handleE2ETestRequest sees no live instances and creates a fresh set on the next + // E2E/Run, instead of dispatching a workflow against deleted servers. + if len(reapedPRInstallationIDs) > 0 { + s.evictReapedPRInstances(reapedPRInstallationIDs, logger) + } + + logger.Info("E2E instance cleanup scan complete") +} + +// evictReapedPRInstances removes PR tracking entries from the in-memory map when any of their +// servers were deleted by the periodic age scan. A PR's instances share a creation time, so the +// whole set ages out together; removing the entry when any member is reaped ensures the reuse +// path never hands back a partially-deleted set. +func (s *Server) evictReapedPRInstances(reapedInstallationIDs []string, logger logrus.FieldLogger) { + reaped := make(map[string]bool, len(reapedInstallationIDs)) + for _, id := range reapedInstallationIDs { + reaped[id] = true + } + + s.e2eInstancesLock.Lock() + defer s.e2eInstancesLock.Unlock() + for key, instances := range s.e2eInstances { + // PR tracking keys are "{repo}-pr-{number}"; non-PR keys are keyed by SHA elsewhere. + if !strings.Contains(key, "-pr-") { + continue + } + for _, inst := range instances { + if inst != nil && reaped[inst.InstallationID] { + delete(s.e2eInstances, key) + logger.WithField("key", key).Info("Evicted expired PR E2E instances from tracking map") + break + } + } + } } // resolveE2EServerVersion returns the server version for E2E provisioning. diff --git a/server/server.go b/server/server.go index f1e5cdf..6312b81 100644 --- a/server/server.go +++ b/server/server.go @@ -134,12 +134,13 @@ func New(config *MatterwickConfig) *Server { func (s *Server) Start() { s.Logger.Info("Starting MatterWick Server") - // Destroy stale non-PR E2E instances left from a previous run immediately on startup, - // then continue scanning periodically so a mid-run restart doesn't leave orphaned - // instances alive until the *next* matterwick restart. - // The scan interval is half the configured max-age so the worst-case orphan lifetime - // is maxAge + interval ≈ 1.5× maxAge. - s.cleanupStaleNonPRE2EInstances() + // Destroy stale E2E instances (non-PR backstop + aged-out PR instances) left from a + // previous run immediately on startup, then continue scanning periodically so a mid-run + // restart doesn't leave orphaned instances alive until the *next* matterwick restart. + // The scan interval is half the (shorter) non-PR max-age so the worst-case non-PR orphan + // lifetime is maxAge + interval ≈ 1.5× maxAge; PR instances use a longer max-age but the + // same frequent scan, which is harmless. + s.cleanupStaleE2EInstances() go func() { interval := s.e2eInstanceMaxAge() / 2 if interval < 30*time.Minute { @@ -150,7 +151,7 @@ func (s *Server) Start() { for { select { case <-ticker.C: - s.cleanupStaleNonPRE2EInstances() + s.cleanupStaleE2EInstances() case <-s.stopCh: return } diff --git a/server/workflow_run.go b/server/workflow_run.go index f1ad391..b215f53 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -117,7 +117,7 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // // If the branch advances during provisioning, the dispatched run's head_sha can differ // from the tracked SHA and no key will match here; such orphans are reaped by the - // periodic cleanupStaleNonPRE2EInstances scan (bounded by E2EInstanceMaxAge). + // periodic cleanupStaleE2EInstances scan (bounded by E2EInstanceMaxAge). if payload.Action == "completed" && s.isE2ETestWorkflow(workflowName) { cmtOnly := workflowName == cmtTestWorkflowName logger.WithField("cmt_only", cmtOnly).Info("Test workflow completed, cleaning up matching instances by SHA") From 59825674e66132d5930d916e90678eabb96c54a4 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 5 Jun 2026 19:50:49 +0530 Subject: [PATCH 11/19] Gate CMT trigger to release branches or manual dispatch CMT is now driven by release-branch events instead of a monthly schedule: desktop fires on push to release-v* (the cut + subsequent pushes), mobile fires only on the release-v* branch cut via `on: create`. Because `create` delivers a workflow_run webhook for every branch/tag creation (even when the trigger workflow's job is skipped), Matterwick must gate independently. Add shouldTriggerCMT: provision CMT only when the run was a manual workflow_dispatch (any branch) or ran on a release branch (head_branch matches isReleaseBranch). This filters mobile's create-event noise and non-release pushes while keeping ad-hoc manual runs working. Co-Authored-By: Claude Opus 4.8 --- server/e2e_dryrun_test.go | 22 ++++++++++++++++++++++ server/workflow_run.go | 29 +++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 0559b40..628e07f 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -1614,3 +1614,25 @@ func TestEvictReapedPRInstances(t *testing.T) { assert.True(t, ok, "nothing reaped means nothing evicted") }) } + +// TestShouldTriggerCMT verifies CMT gating: manual dispatch (any branch) or a release branch +// run proceeds; non-release create/push events are skipped. E2EReleasePatternPrefix is +// "release-" in newDryRunServer, so "release-v*" branches match. +func TestShouldTriggerCMT(t *testing.T) { + s := newDryRunServer(t, "", "mattermost") + + // Manual dispatch always runs, regardless of branch (e.g. ad-hoc CMT on main). + assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "main")) + assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "release-v11.9")) + + // Desktop release push / mobile release-v* branch cut: head_branch is a release branch. + assert.True(t, s.shouldTriggerCMT("push", "release-v11.9")) + assert.True(t, s.shouldTriggerCMT("create", "release-v12.0")) + + // Mobile `create` noise: non-release branch/tag creations must be skipped. + assert.False(t, s.shouldTriggerCMT("create", "feature/cool-thing")) + assert.False(t, s.shouldTriggerCMT("create", "v11.9.0")) // tag-like, not a release branch + assert.False(t, s.shouldTriggerCMT("push", "main")) + // A stray scheduled run on the default branch should no longer trigger CMT. + assert.False(t, s.shouldTriggerCMT("schedule", "main")) +} diff --git a/server/workflow_run.go b/server/workflow_run.go index b215f53..bf18465 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -88,8 +88,24 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // by the isE2ETestWorkflow branch below. if s.Config.CMTTriggerWorkflowName != "" && workflowName == s.Config.CMTTriggerWorkflowName { if payload.Action == "requested" { - logger.Info("CMT trigger workflow started, provisioning E2E servers for configured versions") - go s.handleCMTTrigger(owner, repoName, headBranch, headSHA, runID, logger) + // Gate CMT to the cases we actually want. The trigger workflow fires on events we + // must not always act on: desktop uses `on: push` filtered to release-v*, while + // mobile uses `on: create`, which fires for EVERY branch and tag creation (the + // workflow_run webhook reaches us even when the workflow's job `if` skips it). Run + // CMT only when: + // - the run was started manually (workflow_dispatch), against any branch, or + // - it ran on a release branch (desktop release-v* push, or mobile release-v* + // branch cut) — matched via isReleaseBranch on the run's head_branch. + triggerEvent := payload.WorkflowRun.Event + if s.shouldTriggerCMT(triggerEvent, headBranch) { + logger.WithField("trigger_event", triggerEvent).Info("CMT trigger workflow started, provisioning E2E servers for configured versions") + go s.handleCMTTrigger(owner, repoName, headBranch, headSHA, runID, logger) + } else { + logger.WithFields(logrus.Fields{ + "trigger_event": triggerEvent, + "head_branch": headBranch, + }).Info("CMT trigger fired on a non-release branch and not via manual dispatch; skipping") + } } return } @@ -298,6 +314,15 @@ func parseServerVersionsFromString(input string) []string { return versions } +// shouldTriggerCMT decides whether a CMT-trigger workflow_run should actually provision CMT. +// CMT runs only when the run was started manually (workflow_dispatch, any branch) or it ran on +// a release branch (desktop release-v* push, mobile release-v* branch cut). This filters out +// mobile's `on: create` firings for non-release branch/tag creations, which still deliver a +// workflow_run webhook even when the trigger workflow's job is skipped. +func (s *Server) shouldTriggerCMT(triggerEvent, headBranch string) bool { + return triggerEvent == "workflow_dispatch" || s.isReleaseBranch(headBranch) +} + // handleCMTTrigger is invoked when the scheduled CMT trigger workflow fires. It resolves the // instance type from the repo, resolves the server-version set (auto-derived from Mattermost // releases, or the manual Config.CMTServerVersions override), and hands off to From 6c10c352a90ba7091359f9daf88e8fe129f0f248 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 5 Jun 2026 21:02:58 +0530 Subject: [PATCH 12/19] CMT: stop sending unused cmt_run_id input on dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compatibility-matrix-testing.yml cleanup step that consumed cmt_run_id was removed (Matterwick now reaps servers by commit SHA on workflow_run completion), leaving the input dead. Drop it from the dispatch payload and remove the now-unused sha/runID params from dispatchCMTWorkflow. This pairs with removing the cmt_run_id input declaration from the desktop/mobile CMT workflows. Deploy ordering: this Matterwick image must be live before/with those workflow changes — once the input is undeclared, sending it would 422 (the mw_tracking_key class). Prod matterwick (v0.4.12) does not dispatch these workflows, so the coordinated rollout is safe. Co-Authored-By: Claude Opus 4.8 --- server/workflow_run.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/server/workflow_run.go b/server/workflow_run.go index bf18465..a69b993 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -428,7 +428,7 @@ func (s *Server) handleCMTWithServerVersions(repoOwner, repoName, instanceType, return } - if err := s.dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType, runID, logger); err != nil { + if err := s.dispatchCMTWorkflow(repoOwner, repoName, branch, cmtMatrixJSON, instanceType, logger); err != nil { logger.WithError(err).Error("Failed to dispatch compatibility-matrix-testing.yml") s.e2eInstancesLock.Lock() delete(s.e2eInstances, key) @@ -541,19 +541,18 @@ func buildMobileCMTMatrixJSON(versions []string, instances []*E2EInstance) (stri } // dispatchCMTWorkflow dispatches compatibility-matrix-testing.yml with the populated -// CMT_MATRIX JSON. runID is the CMT trigger workflow run ID, passed as cmt_run_id purely -// for traceability/logging on the test workflow side. +// CMT_MATRIX JSON, targeting the branch ref (workflow_dispatch requires a branch/tag, not a SHA). // -// We intentionally do NOT pass a "mw_tracking_key" input: compatibility-matrix-testing.yml -// does not declare it, and GitHub rejects a workflow_dispatch carrying an undeclared input -// with a 422. Cleanup is driven by SHA-suffix matching when the test workflow completes. -func (s *Server) dispatchCMTWorkflow(repoOwner, repoName, sha, branch, cmtMatrixJSON, instanceType string, runID int64, logger logrus.FieldLogger) error { +// We pass only the inputs the workflow declares (CMT_MATRIX + the per-platform version input). +// Passing an undeclared input (the old "mw_tracking_key" or "cmt_run_id") makes GitHub reject the +// dispatch with a 422. Cleanup is driven by SHA-suffix matching when the test workflow completes, +// so no run-id needs to be threaded through the workflow. +func (s *Server) dispatchCMTWorkflow(repoOwner, repoName, branch, cmtMatrixJSON, instanceType string, logger logrus.FieldLogger) error { ctx := context.Background() client := newGithubClient(s.Config.GithubAccessToken) workflowInputs := map[string]interface{}{ "CMT_MATRIX": cmtMatrixJSON, - "cmt_run_id": fmt.Sprintf("%d", runID), } if instanceType == "desktop" { workflowInputs["DESKTOP_VERSION"] = branch From 0db7844a3e25264f8d76cf43745e9b14bf396bfd Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Wed, 10 Jun 2026 14:47:34 +0530 Subject: [PATCH 13/19] CMT: trigger on RC tag cuts (vX.Y.Z-rc.N) in addition to manual dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace branch-based CMT triggering with RC-tag triggering. The desktop & mobile RC cut (e.g. v6.2.0-rc.1) is the moment the release team actually wants compatibility-matrix validation — earlier than a release branch push, scoped to the artifact that will ship. For tag pushes GitHub sets workflow_run.head_branch to the tag name (no refs/tags/ prefix), so the gate matches the tag string directly: - isRCTag: ^v?\d+\.\d+\.\d+-rc[.\-]?\d+$ matches v6.2.0-rc.1, v2.41.0-rc.10, 6.2.0-rc1; rejects GA tags (v6.2.0), beta/alpha (v1.0.22-beta), and nightly (6.3.0-nightly.x). - shouldTriggerCMT also keeps release-branch acceptance as defense-in-depth and continues to allow manual workflow_dispatch on any ref. Unit tests lock in the matching/rejection set so future tag conventions can't silently slip through. Co-Authored-By: Claude Opus 4.7 --- server/e2e_dryrun_test.go | 55 ++++++++++++++++++++++++++++++--------- server/workflow_run.go | 49 +++++++++++++++++++++++----------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 628e07f..e9668a6 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -1615,24 +1615,53 @@ func TestEvictReapedPRInstances(t *testing.T) { }) } -// TestShouldTriggerCMT verifies CMT gating: manual dispatch (any branch) or a release branch -// run proceeds; non-release create/push events are skipped. E2EReleasePatternPrefix is -// "release-" in newDryRunServer, so "release-v*" branches match. +// TestShouldTriggerCMT verifies CMT gating across all sources: manual dispatch (any ref), RC +// tag cut (the new primary trigger), and release branch (defense-in-depth). Anything else — +// feature branches, GA tags, nightly tags, beta tags, default branch — must be rejected so +// that mobile's `on: push tags` glob slips and stray runs don't burn the multi-version matrix. func TestShouldTriggerCMT(t *testing.T) { s := newDryRunServer(t, "", "mattermost") - // Manual dispatch always runs, regardless of branch (e.g. ad-hoc CMT on main). + // Manual dispatch always runs, regardless of ref. assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "main")) - assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "release-v11.9")) - - // Desktop release push / mobile release-v* branch cut: head_branch is a release branch. - assert.True(t, s.shouldTriggerCMT("push", "release-v11.9")) - assert.True(t, s.shouldTriggerCMT("create", "release-v12.0")) - - // Mobile `create` noise: non-release branch/tag creations must be skipped. + assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "v6.2.0-rc.1")) + assert.True(t, s.shouldTriggerCMT("workflow_dispatch", "release-6.2")) + + // RC tag cut (primary trigger). For tag pushes head_branch is the tag name. + assert.True(t, s.shouldTriggerCMT("push", "v6.2.0-rc.1")) // desktop convention + assert.True(t, s.shouldTriggerCMT("push", "v2.41.0-rc.1")) // future mobile + assert.True(t, s.shouldTriggerCMT("push", "v6.2.0-rc.10")) // multi-digit rc + assert.True(t, s.shouldTriggerCMT("push", "6.2.0-rc.1")) // missing 'v' prefix is permitted + assert.True(t, s.shouldTriggerCMT("push", "v6.2.0-rc1")) // no separator before number + + // Release branch (defense-in-depth — kept for backwards compat / manual triggers). + assert.True(t, s.shouldTriggerCMT("push", "release-6.2")) + + // Must NOT trigger: GA tags, betas, nightly tags, feature branches, default branch. + assert.False(t, s.shouldTriggerCMT("push", "v6.2.0")) // GA tag — no -rc + assert.False(t, s.shouldTriggerCMT("push", "v1.0.22-beta")) // pre-release but not RC + assert.False(t, s.shouldTriggerCMT("push", "6.3.0-nightly.20260601")) // nightly tag + assert.False(t, s.shouldTriggerCMT("push", "v6.2.0-rcabc")) // -rc but no number assert.False(t, s.shouldTriggerCMT("create", "feature/cool-thing")) - assert.False(t, s.shouldTriggerCMT("create", "v11.9.0")) // tag-like, not a release branch assert.False(t, s.shouldTriggerCMT("push", "main")) - // A stray scheduled run on the default branch should no longer trigger CMT. assert.False(t, s.shouldTriggerCMT("schedule", "main")) } + +// TestIsRCTag covers the RC-tag regex in isolation so the boundary cases stay locked in. +func TestIsRCTag(t *testing.T) { + for _, ref := range []string{"v6.2.0-rc.1", "v6.2.0-rc.10", "v2.41.0-rc.2", "6.2.0-rc.1", "v6.2.0-rc1", "v6.2.0-rc-1"} { + assert.True(t, isRCTag(ref), "expected RC tag: %q", ref) + } + for _, ref := range []string{ + "v6.2.0", // GA + "v6.2.0-rc", // missing number + "v6.2.0-rcabc", // letters after -rc + "v1.0.22-beta", // not RC + "6.3.0-nightly.20260601", // nightly + "release-6.2", // branch + "main", + "", + } { + assert.False(t, isRCTag(ref), "must not match: %q", ref) + } +} diff --git a/server/workflow_run.go b/server/workflow_run.go index a69b993..22fc158 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -8,6 +8,7 @@ import ( "encoding/json" "fmt" "io" + "regexp" "strings" "sync" @@ -88,23 +89,24 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay // by the isE2ETestWorkflow branch below. if s.Config.CMTTriggerWorkflowName != "" && workflowName == s.Config.CMTTriggerWorkflowName { if payload.Action == "requested" { - // Gate CMT to the cases we actually want. The trigger workflow fires on events we - // must not always act on: desktop uses `on: push` filtered to release-v*, while - // mobile uses `on: create`, which fires for EVERY branch and tag creation (the - // workflow_run webhook reaches us even when the workflow's job `if` skips it). Run - // CMT only when: - // - the run was started manually (workflow_dispatch), against any branch, or - // - it ran on a release branch (desktop release-v* push, or mobile release-v* - // branch cut) — matched via isReleaseBranch on the run's head_branch. + // Gate CMT to the cases we actually want. The trigger workflow fires on RC tag pushes + // (e.g. `v6.2.0-rc.1`) and on manual dispatch. For tag pushes the workflow_run + // payload's head_branch carries the *tag name*, not a branch. Provision CMT when: + // - the run was started manually (workflow_dispatch, any ref), OR + // - head_branch is an RC tag (isRCTag), OR + // - head_branch is a release branch (defense-in-depth for legacy/manual triggers). triggerEvent := payload.WorkflowRun.Event if s.shouldTriggerCMT(triggerEvent, headBranch) { - logger.WithField("trigger_event", triggerEvent).Info("CMT trigger workflow started, provisioning E2E servers for configured versions") + logger.WithFields(logrus.Fields{ + "trigger_event": triggerEvent, + "head_branch": headBranch, + }).Info("CMT trigger workflow started, provisioning E2E servers for configured versions") go s.handleCMTTrigger(owner, repoName, headBranch, headSHA, runID, logger) } else { logger.WithFields(logrus.Fields{ "trigger_event": triggerEvent, "head_branch": headBranch, - }).Info("CMT trigger fired on a non-release branch and not via manual dispatch; skipping") + }).Info("CMT trigger fired on non-RC-tag, non-release ref and not via manual dispatch; skipping") } } return @@ -315,12 +317,29 @@ func parseServerVersionsFromString(input string) []string { } // shouldTriggerCMT decides whether a CMT-trigger workflow_run should actually provision CMT. -// CMT runs only when the run was started manually (workflow_dispatch, any branch) or it ran on -// a release branch (desktop release-v* push, mobile release-v* branch cut). This filters out -// mobile's `on: create` firings for non-release branch/tag creations, which still deliver a -// workflow_run webhook even when the trigger workflow's job is skipped. +// CMT runs only when: +// - the run was started manually (workflow_dispatch, any ref), OR +// - head_branch is an RC tag (e.g. v6.2.0-rc.1, the new primary trigger), OR +// - head_branch is a release branch (defense-in-depth — legacy & for any manual ref). +// +// For tag-push events GitHub sets workflow_run.head_branch to the tag name (no refs/tags/ +// prefix), so an RC tag like "v6.2.0-rc.1" arrives in headBranch. func (s *Server) shouldTriggerCMT(triggerEvent, headBranch string) bool { - return triggerEvent == "workflow_dispatch" || s.isReleaseBranch(headBranch) + return triggerEvent == "workflow_dispatch" || + isRCTag(headBranch) || + s.isReleaseBranch(headBranch) +} + +// rcTagPattern matches RC tags like "v6.2.0-rc.1", "v2.41.0-rc.10", "6.2.0-rc.1". The leading +// "v" is optional, and the rc suffix can be "-rc.N", "-rcN", or "-rc-N" (we keep it permissive +// but require the literal "-rc" and a trailing number). Compiled once at package init. +var rcTagPattern = regexp.MustCompile(`^v?\d+\.\d+\.\d+-rc[.\-]?\d+$`) + +// isRCTag reports whether ref looks like a release-candidate tag we want to trigger CMT on. +// Excludes GA tags (v6.2.0), beta/alpha pre-releases (v1.0.22-beta), and nightly tags +// (6.3.0-nightly.20260601). +func isRCTag(ref string) bool { + return rcTagPattern.MatchString(ref) } // handleCMTTrigger is invoked when the scheduled CMT trigger workflow fires. It resolves the From 0ab5fd103016fb78d98d587fda7f59349c930e35 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 02:19:58 +0530 Subject: [PATCH 14/19] CMT: also trigger on mobile build-release-NNN branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mobile doesn't cut RC git tags; its analog is the `build-release-NNN` branch (3+ digit build number, e.g. build-release-786 / build-release-1100) created by the "Mattermost Mobile Release" workflow when a release build is dispatched to TestFlight / Play Store. Treat that as the RC moment for mobile and run CMT against it. - New isBuildReleaseBranch helper with regex `^build-release-\d{3,}$` (3+ digits — rejects test artifacts like build-release-1 without locking the upper bound, so a future move to 5-digit numbers needs no code change). Platform-specific variants build-release-ios-NNN, -sim-NNN, -android-NNN are intentionally excluded (don't start with a digit after the prefix). - shouldTriggerCMT extended to accept this in addition to manual dispatch / RC tag / release branch. - TestIsBuildReleaseBranch + extended TestShouldTriggerCMT cover the boundary cases: real 3/4/5-digit build numbers (true), 1- and 2-digit test artifacts (false), platform variants (false), no overlap with existing gates. The paired mobile workflow change adds the `build-release-[0-9][0-9][0-9]*` branch pattern to cmt-provisioner.yml's push trigger. Co-Authored-By: Claude Opus 4.7 --- server/e2e_dryrun_test.go | 36 ++++++++++++++++++++++++++++++++++++ server/workflow_run.go | 22 +++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index e9668a6..8998d2a 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -1645,6 +1645,13 @@ func TestShouldTriggerCMT(t *testing.T) { assert.False(t, s.shouldTriggerCMT("create", "feature/cool-thing")) assert.False(t, s.shouldTriggerCMT("push", "main")) assert.False(t, s.shouldTriggerCMT("schedule", "main")) + + // Mobile build-release-NNN branch push (mobile's RC-cut equivalent). + assert.True(t, s.shouldTriggerCMT("push", "build-release-786")) // 3-digit + assert.True(t, s.shouldTriggerCMT("push", "build-release-1100")) // 4-digit + assert.True(t, s.shouldTriggerCMT("push", "build-release-12345")) // future 5-digit + assert.False(t, s.shouldTriggerCMT("push", "build-release-1")) // < 3 digits, likely a test + assert.False(t, s.shouldTriggerCMT("push", "build-release-ios-707")) // platform variant } // TestIsRCTag covers the RC-tag regex in isolation so the boundary cases stay locked in. @@ -1665,3 +1672,32 @@ func TestIsRCTag(t *testing.T) { assert.False(t, isRCTag(ref), "must not match: %q", ref) } } + +// TestIsBuildReleaseBranch locks in the boundaries for mobile's build-release-NNN gate: +// 3+ digits required (rejects test artifacts like build-release-1); platform-specific +// variants (build-release-ios-NNN etc.) are rejected; no overlap with the RC-tag or +// release-* gates. +func TestIsBuildReleaseBranch(t *testing.T) { + for _, ref := range []string{ + "build-release-786", // real 3-digit + "build-release-1100", // real 4-digit + "build-release-12345", // future 5-digit + } { + assert.True(t, isBuildReleaseBranch(ref), "expected build-release branch: %q", ref) + } + for _, ref := range []string{ + "build-release-1", // 1 digit — test artifact / typo + "build-release-99", // 2 digit — below convention + "build-release-ios-707", // platform-specific + "build-release-sim-707", // simulator-only + "build-release-android-1100", // android-specific + "build-release-786-rc1", // stray suffix + "build-release-", // no number + "build-release-abc", // non-numeric + "release-2.41", // handled by isReleaseBranch + "v2.41.0-rc.1", // handled by isRCTag + "", + } { + assert.False(t, isBuildReleaseBranch(ref), "must not match: %q", ref) + } +} diff --git a/server/workflow_run.go b/server/workflow_run.go index 22fc158..1459f28 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -319,7 +319,8 @@ func parseServerVersionsFromString(input string) []string { // shouldTriggerCMT decides whether a CMT-trigger workflow_run should actually provision CMT. // CMT runs only when: // - the run was started manually (workflow_dispatch, any ref), OR -// - head_branch is an RC tag (e.g. v6.2.0-rc.1, the new primary trigger), OR +// - head_branch is an RC tag (desktop convention: v6.2.0-rc.1), OR +// - head_branch is a mobile release-build branch (build-release-NNN, 3+ digits), OR // - head_branch is a release branch (defense-in-depth — legacy & for any manual ref). // // For tag-push events GitHub sets workflow_run.head_branch to the tag name (no refs/tags/ @@ -327,6 +328,7 @@ func parseServerVersionsFromString(input string) []string { func (s *Server) shouldTriggerCMT(triggerEvent, headBranch string) bool { return triggerEvent == "workflow_dispatch" || isRCTag(headBranch) || + isBuildReleaseBranch(headBranch) || s.isReleaseBranch(headBranch) } @@ -342,6 +344,24 @@ func isRCTag(ref string) bool { return rcTagPattern.MatchString(ref) } +// buildReleaseBranchPattern matches mobile's release-build branches: "build-release-" followed +// by 3 or more digits (current convention is 3- or 4-digit build numbers like 786 / 1100). +// 3+ digits is intentional — rejects accidental test branches like "build-release-1" without +// locking an upper bound, so a future move to 5-digit build numbers needs no code change. +// Platform-specific variants (build-release-ios-NNN, build-release-sim-NNN, +// build-release-android-NNN) don't start with a digit after the "build-release-" prefix and +// are excluded. +var buildReleaseBranchPattern = regexp.MustCompile(`^build-release-\d{3,}$`) + +// isBuildReleaseBranch reports whether ref matches mobile's build-release-NNN convention. +// The branch is created by the "Mattermost Mobile Release" external workflow when an RC +// build is dispatched to TestFlight / Play Store, so it's mobile's equivalent of an RC tag +// cut. Used as a separate gate from isReleaseBranch because release-* fires on every +// cherry-pick / version-bump push, which would be far too noisy for CMT. +func isBuildReleaseBranch(ref string) bool { + return buildReleaseBranchPattern.MatchString(ref) +} + // handleCMTTrigger is invoked when the scheduled CMT trigger workflow fires. It resolves the // instance type from the repo, resolves the server-version set (auto-derived from Mattermost // releases, or the manual Config.CMTServerVersions override), and hands off to From 9d50896117387a6271796f82b9cba11d72034ef9 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 03:00:16 +0530 Subject: [PATCH 15/19] push: skip desktop release-branch E2E (replaced by CMT RC-tag trigger) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The release team treats RCs as their go/no-go test gate and re-cuts on failures. Desktop CMT already runs the whole E2E suite across the multi-version matrix at each RC tag cut, and that matrix includes the latest server — so the per-push release E2E was redundant coverage for desktop. Skip it for desktop release-branch pushes. Mobile is unchanged: mobile CMT runs only smoke tests, so the whole-suite release E2E on `release-*` push remains the per-commit signal there. Other auto-triggers are unaffected: - master/main push trigger: still fires - nightly trigger: still fires - PR label trigger: still fires - mobile release-branch push: still fires Co-Authored-By: Claude Opus 4.7 --- server/push_events.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/server/push_events.go b/server/push_events.go index 34023b2..ce4a450 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -40,6 +40,15 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { logger.Info("Push event received") if s.Config.E2EAutoTriggerOnRelease && s.isReleaseBranch(branch) { + // Desktop release-branch E2E is replaced by CMT (RC-tag trigger): the release team + // treats RCs as the go/no-go gate, so the per-push release E2E is redundant for + // desktop. Skip it here to avoid double-testing the same commit. Mobile still uses + // release-branch push (its CMT runs smoke-only, so the whole-suite release E2E + // remains the per-push signal there). + if strings.Contains(repoName, "desktop") { + logger.Info("Desktop release-branch push: skipping release E2E (handled by CMT)") + return + } logger.WithField("type", "release_branch").Info("Release branch detected, triggering E2E tests") go s.handlePushEventE2E(event, branch) return From 2d3335c8ef1b400ca78fe59a55dcdaec2e54ca68 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 04:50:21 +0530 Subject: [PATCH 16/19] push: remove release-branch E2E trigger (release-X.Y push) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every push to a mobile release-2.X branch was firing a full PR-style E2E suite — 3 cloud servers + 10 macOS shards + 10 ubuntu shards + iOS/Android builds, ~60-90 min wall-clock, ~$27/push in GitHub Actions alone. Recent cadence on real release branches: release-2.41: 191 commits (~$5,160) release-2.40: 158 commits (~$4,270) release-2.39: 114 commits (~$3,080) release-2.38: 90 commits (~$2,430) release-2.37: 52 commits (~$1,400) ~$16K across these 5 branches alone, with ~15% of runs triggered by pure noise (translation updates, "Bump app build number", docs/CI tweaks) that can't meaningfully fail differently than the previous run. The release team treats RC builds as the test gate (re-cut on failure), which makes per-cherry-pick E2E during stabilization redundant. Same logic that already removed the desktop release-branch trigger; now applied to mobile too. After this change, auto E2E coverage on release-side: - master/main push -> full PR-style E2E (unchanged, single per-commit signal) - build-release-NNN push -> CMT (smoke x N server versions, mobile only) - RC tag cut -> CMT (desktop only; mobile doesn't tag) - PR label E2E/Run -> full PR-style E2E (unchanged) Full-suite-on-build-release-NNN is NOT wired today (separate decision). E2EAutoTriggerOnRelease and E2EReleasePatternPrefix config fields remain in place — still used by isReleaseBranch() as defense-in-depth in the CMT workflow_run gate (shouldTriggerCMT). Leaving them avoids an unrelated gitops/config diff in this change. Co-Authored-By: Claude Opus 4.7 --- server/push_events.go | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/server/push_events.go b/server/push_events.go index ce4a450..12d9e34 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -39,20 +39,17 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { }) logger.Info("Push event received") - if s.Config.E2EAutoTriggerOnRelease && s.isReleaseBranch(branch) { - // Desktop release-branch E2E is replaced by CMT (RC-tag trigger): the release team - // treats RCs as the go/no-go gate, so the per-push release E2E is redundant for - // desktop. Skip it here to avoid double-testing the same commit. Mobile still uses - // release-branch push (its CMT runs smoke-only, so the whole-suite release E2E - // remains the per-push signal there). - if strings.Contains(repoName, "desktop") { - logger.Info("Desktop release-branch push: skipping release E2E (handled by CMT)") - return - } - logger.WithField("type", "release_branch").Info("Release branch detected, triggering E2E tests") - go s.handlePushEventE2E(event, branch) - return - } + // NOTE: release-branch push (release-X.Y) used to fire full PR-style E2E for both + // desktop and mobile. Removed because: + // - Desktop: CMT on RC tag cut is the go/no-go gate; per-cherry-pick E2E was redundant. + // - Mobile: every cherry-pick to release-2.X fired a ~$27 / ~60-90 min full-suite run + // (~190 commits/branch × ~$27 = ~$5K per release branch in GitHub Actions alone), + // and the release team treats RC builds as the test gate. CMT (smoke × N versions) + // fires at the build-release-NNN cut; full-suite-on-build-release-NNN can be wired + // separately if desired (it isn't today). + // `E2EAutoTriggerOnRelease` and `E2EReleasePatternPrefix` remain in config but are now + // only read by isReleaseBranch (used as defense-in-depth in shouldTriggerCMT for the + // CMT workflow_run gate). if s.Config.E2EAutoTriggerOnMaster && (branch == "master" || branch == "main") { logger.WithField("type", "master_main").Info("Master/main branch detected, triggering E2E tests") @@ -61,7 +58,6 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { } logger.WithFields(logrus.Fields{ - "auto_release": s.Config.E2EAutoTriggerOnRelease, "auto_master": s.Config.E2EAutoTriggerOnMaster, "release_pattern_prefix": s.Config.E2EReleasePatternPrefix, "is_release_branch": s.isReleaseBranch(branch), From afeb5cff0558ea587f1af4ab3fb699987adcb204 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 04:56:20 +0530 Subject: [PATCH 17/19] push: skip mobile main-branch E2E (per-PR-merge cost too high) Every push to mobile main was firing a full PR-style E2E suite (3 cloud servers, 10 macOS shards + 10 ubuntu shards, ~$27/push, ~60-90 min). Main sees ~5-20 merges/day during active development -> tens of thousands of $/year for post-merge regression signal that PR-label E2E already covers pre-merge. Skip the master/main gate for mobile; desktop master push still fires. Coverage that remains for mobile main: - PR-label E2E pre-merge (developer-driven, single PR's branch tested) - Manual workflow_dispatch - CMT (smoke x N versions) on build-release-NNN cut Full-suite E2E on build-release-NNN remains ungated. If desired, two ways to wire it: 1. Separate matterwick push-handler gate for isBuildReleaseBranch 2. Promote latest version inside CMT to whole-suite (covers RC fully without a parallel matterwick path) Co-Authored-By: Claude Opus 4.7 --- server/push_events.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/server/push_events.go b/server/push_events.go index 12d9e34..8ebd595 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -52,6 +52,15 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { // CMT workflow_run gate). if s.Config.E2EAutoTriggerOnMaster && (branch == "master" || branch == "main") { + // Mobile `main` push E2E is skipped: every PR merge to main = one full PR-style E2E + // run (~$27 / ~60-90 min). At mobile's main-merge cadence that's tens of thousands of + // dollars per year for a post-merge regression signal already covered by PR-label E2E + // pre-merge. Coverage that remains for mobile main: PR-label E2E pre-merge, CMT smoke + // on build-release-NNN cut, manual workflow_dispatch. Desktop master push still fires. + if strings.Contains(repoName, "mobile") { + logger.Info("Mobile main push: skipping E2E (per-commit cost too high; PR-label covers pre-merge, CMT covers RC builds)") + return + } logger.WithField("type", "master_main").Info("Master/main branch detected, triggering E2E tests") go s.handlePushEventE2E(event, branch) return From bdb69708ab29d7128a7805613f6355a322006726 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 10:32:33 +0530 Subject: [PATCH 18/19] cmt: latest-marker for mobile CMT + remove dead nightly/release-trigger code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mobile CMT_MATRIX now marks the highest-semver server entry as `latest: true`. The mobile workflow uses that flag (via expressions in `with:`, which has matrix context unlike job-level `if:`) to run the whole detox suite against latest and smoke-only against older versions — closing the full-suite-on-RC gap without a separate matterwick path. Matterwick stays repo-agnostic: it only signals "which is latest". The test-directory / parallelism choices that flow from that live in the mobile workflow, where the repo's structure is known. Dead-code cleanup now that the surrounding triggers are gone: - handleNightlyE2ETrigger (~95 lines) + its workflow_run gate. Both repos removed e2e-nightly-trigger.yml in prior commits, so no workflow with name "E2E Nightly Trigger" can fire matterwick on workflow_run.requested any longer. - E2ENightlyTriggerWorkflowName config field — no readers left. - E2EAutoTriggerOnRelease config field — the release-branch push gate that read it was removed in the prior commit; no readers left. - runType MASTER/RELEASE branching in trigger*ForPushEvent — only master/main pushes reach those functions now (release-branch and mobile-main paths are skipped earlier), so RELEASE is unreachable. Hardcode "MASTER". - Stale log fields (auto_release, release_pattern_prefix, is_release_branch) in the push-event fallthrough log. What stays in use: - isReleaseBranch + E2EReleasePatternPrefix — defense-in-depth in shouldTriggerCMT (the CMT workflow_run gate also accepts release-* refs from manual dispatches). - cmtOnly cleanup scoping — cheap insurance against hypothetical same-SHA collisions between flows. Net: -158 lines (-95 nightly handler, -36 config refs, -27 misc), +new tests for the latest-marker boundary cases (5-element ESR+minors+RC, stable vs same-X.Y.Z RC, multi-digit rc.N, single version, all unparseable fallback). Co-Authored-By: Claude Opus 4.7 --- config/config-matterwick.default.json | 2 - server/config.go | 8 +- server/e2e_dryrun_test.go | 93 +++++++++++++++ server/push_events.go | 53 ++++----- server/workflow_run.go | 165 ++++++++------------------ 5 files changed, 163 insertions(+), 158 deletions(-) diff --git a/config/config-matterwick.default.json b/config/config-matterwick.default.json index b07e265..322c2f7 100644 --- a/config/config-matterwick.default.json +++ b/config/config-matterwick.default.json @@ -78,10 +78,8 @@ "E2EUsername": "admin", "E2EPassword": "", "E2EServerVersion": "latest", - "E2EAutoTriggerOnRelease": true, "E2EAutoTriggerOnMaster": true, "E2EReleasePatternPrefix": "release-", - "E2ENightlyTriggerWorkflowName": "E2E Nightly Trigger", "E2ETestWorkflowNames": ["Electron Playwright Tests", "E2E", "Compatibility Matrix Testing"], "E2EInstanceMaxAge": 6, "E2EPRInstanceMaxAge": 24, diff --git a/server/config.go b/server/config.go index 7bb7b0f..0767228 100644 --- a/server/config.go +++ b/server/config.go @@ -112,11 +112,9 @@ type MatterwickConfig struct { E2EUsername string E2EPassword string E2EServerVersion string - E2EAutoTriggerOnRelease bool - E2EAutoTriggerOnMaster bool - E2EReleasePatternPrefix string - E2ENightlyTriggerWorkflowName string // workflow name (name: field) of the nightly trigger workflow - E2ETestWorkflowNames []string // workflow names of the actual test workflows (for completion-based cleanup) + E2EAutoTriggerOnMaster bool + E2EReleasePatternPrefix string + E2ETestWorkflowNames []string // workflow names of the actual test workflows (for completion-based cleanup) // E2EInstanceMaxAge is the minimum age (in hours) a non-PR E2E instance must reach // before the periodic orphan-cleanup scan will delete it. This prevents the scan // from destroying instances that are still being used by a currently-running test. diff --git a/server/e2e_dryrun_test.go b/server/e2e_dryrun_test.go index 8998d2a..37768ab 100644 --- a/server/e2e_dryrun_test.go +++ b/server/e2e_dryrun_test.go @@ -572,9 +572,15 @@ func TestDryRun_DesktopCMT(t *testing.T) { s0 := servers[0].(map[string]interface{}) assert.Equal(t, "v11.1.0", s0["version"]) assert.Equal(t, "https://v1.example.com", s0["url"]) + // Desktop ignores the `latest` field; cmtServer.Latest is shared with mobile but is + // never set on the desktop path, and `omitempty` keeps it out of the JSON entirely. + _, has0 := s0["latest"] + assert.False(t, has0, "desktop matrix must not carry the `latest` field") s1 := servers[1].(map[string]interface{}) assert.Equal(t, "v11.2.0", s1["version"]) assert.Equal(t, "https://v2.example.com", s1["url"]) + _, has1 := s1["latest"] + assert.False(t, has1, "desktop matrix must not carry the `latest` field") }) t.Run("CMT dispatches compatibility-matrix-testing.yml once", func(t *testing.T) { @@ -631,9 +637,16 @@ func TestDryRun_MobileCMT(t *testing.T) { s0 := servers[0].(map[string]interface{}) assert.Equal(t, "v11.1.0", s0["version"]) assert.Equal(t, "https://v1.example.com", s0["url"]) + // Older version: `latest` is omitted entirely (cmtServer.Latest is false, omitempty). + _, has0 := s0["latest"] + assert.False(t, has0, "older mobile entries must not carry the `latest` field") s1 := servers[1].(map[string]interface{}) assert.Equal(t, "v11.2.0", s1["version"]) assert.Equal(t, "https://v2.example.com", s1["url"]) + // Highest semver gets `latest: true`. The mobile workflow uses this to decide whether + // to run the whole suite (latest) or just smoke (older) — that policy lives there, + // not in matterwick. + assert.Equal(t, true, s1["latest"]) }) t.Run("mobile CMT dispatches once not once per version", func(t *testing.T) { @@ -670,6 +683,86 @@ func TestDryRun_MobileCMT(t *testing.T) { } assert.Equal(t, len(versions), len(instances), "one instance per version") }) + + t.Run("mobile CMT marks the highest-semver entry as latest", func(t *testing.T) { + // 5-element resolved set (today's typical shape): ESR + 3 minors + current RC. + // Across the boundary cases that matter: ESR is older despite high patch numbers; + // RC vs stable for the same X.Y.Z should treat stable as higher; multi-digit RC + // numbers (rc.10 > rc.2). Locking these in so the workflow's latest gate doesn't + // silently shift if someone tweaks the comparator. + cases := []struct { + name string + versions []string + wantLatestIdx int + }{ + { + name: "ESR + 3 minors + RC: RC's base is newest so RC is latest", + versions: []string{"10.11.19", "11.5.7", "11.6.4", "11.7.2", "11.8.0-rc3"}, + wantLatestIdx: 4, + }, + { + name: "ESR with high patch loses to lower-patch newer minor", + versions: []string{"10.11.19", "11.0.0"}, + wantLatestIdx: 1, + }, + { + name: "stable beats same-X.Y.Z RC", + versions: []string{"11.7.0-rc3", "11.7.0"}, + wantLatestIdx: 1, + }, + { + name: "rc.10 > rc.2 (no string compare)", + versions: []string{"11.8.0-rc2", "11.8.0-rc10"}, + wantLatestIdx: 1, + }, + { + name: "single version is latest", + versions: []string{"11.7.2"}, + wantLatestIdx: 0, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + instances := make([]*E2EInstance, len(tc.versions)) + for i := range tc.versions { + instances[i] = &E2EInstance{URL: fmt.Sprintf("https://v%d.example.com", i)} + } + jsonStr, err := buildMobileCMTMatrixJSON(tc.versions, instances) + require.NoError(t, err) + var matrix map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(jsonStr), &matrix)) + servers := matrix["server"].([]interface{}) + require.Len(t, servers, len(tc.versions)) + for i, raw := range servers { + s := raw.(map[string]interface{}) + _, has := s["latest"] + if i == tc.wantLatestIdx { + assert.Equal(t, true, s["latest"], "index %d (%q) should be latest", i, tc.versions[i]) + } else { + assert.False(t, has, "index %d (%q) must not carry latest", i, tc.versions[i]) + } + } + }) + } + }) + + t.Run("mobile CMT: all unparseable versions => no latest marker", func(t *testing.T) { + // Defensive: if the resolved set somehow contains no parseable versions, leave the + // matrix unmarked. The workflow falls back to its default (smoke for all). + versions := []string{"junk", "also-junk"} + instances := []*E2EInstance{ + {URL: "https://a.example.com"}, + {URL: "https://b.example.com"}, + } + jsonStr, err := buildMobileCMTMatrixJSON(versions, instances) + require.NoError(t, err) + var matrix map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(jsonStr), &matrix)) + for _, raw := range matrix["server"].([]interface{}) { + _, has := raw.(map[string]interface{})["latest"] + assert.False(t, has, "no entry should be latest when all versions are unparseable") + } + }) } // ------------------------------------------------------------ diff --git a/server/push_events.go b/server/push_events.go index 8ebd595..f475d88 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -39,24 +39,18 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { }) logger.Info("Push event received") - // NOTE: release-branch push (release-X.Y) used to fire full PR-style E2E for both - // desktop and mobile. Removed because: - // - Desktop: CMT on RC tag cut is the go/no-go gate; per-cherry-pick E2E was redundant. - // - Mobile: every cherry-pick to release-2.X fired a ~$27 / ~60-90 min full-suite run - // (~190 commits/branch × ~$27 = ~$5K per release branch in GitHub Actions alone), - // and the release team treats RC builds as the test gate. CMT (smoke × N versions) - // fires at the build-release-NNN cut; full-suite-on-build-release-NNN can be wired - // separately if desired (it isn't today). - // `E2EAutoTriggerOnRelease` and `E2EReleasePatternPrefix` remain in config but are now - // only read by isReleaseBranch (used as defense-in-depth in shouldTriggerCMT for the - // CMT workflow_run gate). + // Push-event auto-trigger reduced to desktop `master` only. + // - release-X.Y push: previously fired full PR-style E2E for both repos. Removed — + // desktop is gated by CMT-on-RC-tag (release team's go/no-go); mobile's per-cherry- + // pick E2E during stabilization ran into tens of thousands of $ per release cycle + // for a signal already covered by PR-label E2E pre-merge and CMT smoke at the RC + // build moment. + // - mobile `main` push: same cost calculus, every PR merge to main was one ~$27 run. + // Skipped here; PR-label E2E covers pre-merge. + // - desktop `master` push: still fires (lower merge cadence than mobile main; the + // per-commit regression signal earns its keep here). if s.Config.E2EAutoTriggerOnMaster && (branch == "master" || branch == "main") { - // Mobile `main` push E2E is skipped: every PR merge to main = one full PR-style E2E - // run (~$27 / ~60-90 min). At mobile's main-merge cadence that's tens of thousands of - // dollars per year for a post-merge regression signal already covered by PR-label E2E - // pre-merge. Coverage that remains for mobile main: PR-label E2E pre-merge, CMT smoke - // on build-release-NNN cut, manual workflow_dispatch. Desktop master push still fires. if strings.Contains(repoName, "mobile") { logger.Info("Mobile main push: skipping E2E (per-commit cost too high; PR-label covers pre-merge, CMT covers RC builds)") return @@ -66,11 +60,8 @@ func (s *Server) handlePushEvent(event *github.PushEvent) { return } - logger.WithFields(logrus.Fields{ - "auto_master": s.Config.E2EAutoTriggerOnMaster, - "release_pattern_prefix": s.Config.E2EReleasePatternPrefix, - "is_release_branch": s.isReleaseBranch(branch), - }).Info("Push event does not match E2E trigger conditions") + logger.WithField("auto_master", s.Config.E2EAutoTriggerOnMaster). + Info("Push event does not match E2E trigger conditions") } // isReleaseBranch returns true if branch matches E2EReleasePatternPrefix. @@ -312,12 +303,9 @@ func (s *Server) triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, bran logger.WithField("instanceDetails", instanceDetailsJSON).Debug("Triggering desktop E2E workflow") - runType := "MASTER" - if s.isReleaseBranch(branch) { - runType = "RELEASE" - } - - return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, runType, false) + // handlePushEvent only routes master/main pushes here (release-branch push trigger was + // removed), so runType is always MASTER for desktop push events. + return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, "MASTER", false) } // triggerMobileE2EWorkflowForPushEvent dispatches the mobile E2E workflow (e2e-detox-pr.yml). @@ -338,15 +326,14 @@ func (s *Server) triggerMobileE2EWorkflowForPushEvent(repoOwner, repoName, branc "site_3_url": instances[2].URL, }).Debug("Triggering mobile E2E workflow for push event") - runType := "MASTER" - if s.isReleaseBranch(branch) { - runType = "RELEASE" - } - + // handlePushEvent only routes master/main pushes here (release-branch push trigger was + // removed and mobile main push is explicitly skipped, so in practice mobile never + // reaches this code today; keeping the function intact in case mobile main push is + // re-enabled). runType is always MASTER for the events that would route here. return s.dispatchMobileE2EWorkflow( repoOwner, repoName, branch, sha, instances[0].URL, instances[1].URL, instances[2].URL, "both", // push events always test both iOS and Android - runType, + "MASTER", ) } diff --git a/server/workflow_run.go b/server/workflow_run.go index 1459f28..c335bd7 100644 --- a/server/workflow_run.go +++ b/server/workflow_run.go @@ -112,26 +112,16 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay return } - // Nightly: lightweight trigger workflow fires first; matterwick provisions instances and dispatches the real test workflow. - if s.Config.E2ENightlyTriggerWorkflowName != "" && workflowName == s.Config.E2ENightlyTriggerWorkflowName { - if payload.Action == "requested" { - logger.Info("Nightly trigger workflow started, provisioning E2E servers") - go s.handleNightlyE2ETrigger(owner, repoName, headBranch, headSHA, payload.WorkflowRun.Event, runID, logger) - } - return - } - // --- Test workflow completion: clean up provisioned instances --- // // Cleanup is correlated on head_sha. GitHub's workflow_run webhook payload does not // include workflow_dispatch inputs, and its run id is the *test* run's id (not the // trigger run id embedded in our tracking key), so head_sha is the only field we can - // match on (push, scheduled, and CMT tracking keys all end with "-{sha}"). + // match on (push and CMT tracking keys all end with "-{sha}"). // - // Two flows can legitimately share a head SHA — e.g. the monthly CMT trigger and the - // nightly trigger both fire on the same master commit. They run different test - // workflows, so we scope cleanup to the completing workflow's flow (CMT vs. non-CMT) to - // avoid tearing down the other run's still-active servers. + // `cmtOnly` scopes cleanup to the completing workflow's flow (CMT vs. non-CMT). It's + // kept as defense-in-depth so a future case where two flows share a SHA (e.g. a manual + // CMT dispatch coinciding with a master push) can't tear down each other's servers. // // If the branch advances during provisioning, the dispatched run's head_sha can differ // from the tracked SHA and no key will match here; such orphans are reaped by the @@ -143,105 +133,8 @@ func (s *Server) handleWorkflowRunEventWithInputs(payload *WorkflowRunWebhookPay return } - logger.WithFields(logrus.Fields{ - "configured_nightly_name": s.Config.E2ENightlyTriggerWorkflowName, - "configured_test_workflows": s.Config.E2ETestWorkflowNames, - }).Info("Ignoring workflow_run event (not relevant to E2E lifecycle)") -} - -// handleNightlyE2ETrigger provisions instances and dispatches the test workflow. -// Called when the E2E trigger workflow starts, whether from schedule, push to master/main, -// or push to a release branch. The triggerEvent parameter ("schedule", "push", etc.) is -// used to set runType correctly — scheduled runs always get "NIGHTLY" regardless of branch. -func (s *Server) handleNightlyE2ETrigger(owner, repoName, branch, sha, triggerEvent string, runID int64, logger logrus.FieldLogger) { - logger = logger.WithFields(logrus.Fields{ - "branch": branch, - "sha": sha, - "run_id": runID, - }) - logger.Info("Provisioning nightly E2E instances") - - instanceType := "desktop" - if strings.Contains(repoName, "mobile") { - instanceType = "mobile" - } else if !strings.Contains(repoName, "desktop") { - logger.Warn("Repository is neither desktop nor mobile, skipping nightly E2E trigger") - return - } - - instances, err := s.createCMTInstancesForVersion(repoName, instanceType, s.resolveE2EServerVersion(), "nightly") - if err != nil { - logger.WithError(err).Error("Failed to create nightly E2E instances") - return - } - - // Key on the branch HEAD resolved now (just before dispatch), not the trigger SHA: the - // dispatched (ref=branch) run reports its head_sha as the branch HEAD at dispatch time, - // which can differ from the trigger SHA after provisioning. runID keeps the key unique; - // it still ends with "-{sha}" so findAndDestroyInstancesBySHA matches it by suffix. - cleanupSHA := sha - if resolved, resErr := s.resolveBranchHeadSHA(owner, repoName, branch); resErr == nil && resolved != "" { - cleanupSHA = resolved - } else if resErr != nil { - logger.WithError(resErr).Warn("Failed to resolve branch HEAD SHA; keying cleanup on trigger SHA (periodic scan remains the backstop)") - } - key := fmt.Sprintf("%s-scheduled-%d-%s", repoName, runID, cleanupSHA) - s.e2eInstancesLock.Lock() - s.e2eInstances[key] = instances - s.e2eInstancesLock.Unlock() - - logger.WithField("tracking_key", key).Info("Nightly instances tracked, dispatching test workflow") - - // Determine run classification. Scheduled runs are always NIGHTLY regardless of branch - // (a scheduled run on master must not be classified as MASTER). Push-triggered runs - // derive their type from the branch name. - runType := "NIGHTLY" - nightly := true - if triggerEvent != "schedule" { - if branch == "master" || branch == "main" { - runType = "MASTER" - nightly = false - } else if s.isReleaseBranch(branch) { - runType = "RELEASE" - nightly = false - } - } - - var dispatchErr error - if instanceType == "desktop" { - instanceDetailsJSON, err := s.buildInstanceDetailsJSON(instances) - if err != nil { - logger.WithError(err).Error("Failed to build instance details JSON for nightly desktop run") - s.e2eInstancesLock.Lock() - delete(s.e2eInstances, key) - s.e2eInstancesLock.Unlock() - s.destroyE2EInstances(instances, logger) - return - } - dispatchErr = s.dispatchDesktopE2EWorkflow(owner, repoName, branch, sha, instanceDetailsJSON, runType, nightly) - } else { - if len(instances) < 3 { - logger.Errorf("Expected 3 mobile instances, got %d", len(instances)) - s.e2eInstancesLock.Lock() - delete(s.e2eInstances, key) - s.e2eInstancesLock.Unlock() - s.destroyE2EInstances(instances, logger) - return - } - dispatchErr = s.dispatchMobileE2EWorkflow(owner, repoName, branch, sha, - instances[0].URL, instances[1].URL, instances[2].URL, "both", runType) - } - - if dispatchErr != nil { - logger.WithError(dispatchErr).Error("Failed to dispatch test workflow for nightly run; cleaning up instances") - s.e2eInstancesLock.Lock() - delete(s.e2eInstances, key) - s.e2eInstancesLock.Unlock() - s.destroyE2EInstances(instances, logger) - return - } - - logger.Info("Nightly E2E workflow dispatched successfully") + logger.WithField("configured_test_workflows", s.Config.E2ETestWorkflowNames). + Info("Ignoring workflow_run event (not relevant to E2E lifecycle)") } // isE2ETestWorkflow returns true if the workflow name is a configured E2E test workflow @@ -494,10 +387,19 @@ func (s *Server) createSingleCMTInstance(repoName, instanceType, version string, return s.createCloudInstallation(context.Background(), name, version, username, password, instanceType, logger) } -// cmtServer is the server entry in CMT_MATRIX JSON. +// cmtServer is the server entry in CMT_MATRIX JSON. Mobile uses the optional `latest` flag +// to mark the highest-semver entry, so the mobile workflow can decide to run the whole suite +// against the latest server and smoke-only against older ones. Desktop ignores it and the +// `omitempty` keeps desktop's matrix JSON unchanged (the field is just absent there). +// +// IMPORTANT — layering: matterwick only signals which server is "latest". It does NOT decide +// what tests run for latest vs non-latest — that policy (smoke path vs whole suite path, +// parallelism, etc.) lives in the mobile workflow, where the test-directory structure is +// known. type cmtServer struct { Version string `json:"version"` URL string `json:"url"` + Latest bool `json:"latest,omitempty"` } // buildDesktopCMTMatrixJSON builds the CMT_MATRIX JSON for compatibility-matrix-testing.yml @@ -549,14 +451,17 @@ func buildDesktopCMTMatrixJSON(versions []string, instances []*E2EInstance) (str } // buildMobileCMTMatrixJSON builds the CMT_MATRIX JSON for compatibility-matrix-testing.yml -// in the mobile repo. One iOS test job is created per server version. +// in the mobile repo. One iOS test job is created per server version. The highest-semver +// entry is marked `latest: true` so the mobile workflow can branch on it (e.g. whole suite +// for latest, smoke for older). What "latest" implies test-wise is the workflow's policy, +// not matterwick's — see the cmtServer doc comment. // // Schema: // // { // "server": [ -// {"version": "v11.1.0", "url": "https://..."}, -// ... +// {"version": "10.11.18", "url": "https://..."}, +// {"version": "11.7.1", "url": "https://...", "latest": true} // ] // } func buildMobileCMTMatrixJSON(versions []string, instances []*E2EInstance) (string, error) { @@ -564,12 +469,36 @@ func buildMobileCMTMatrixJSON(versions []string, instances []*E2EInstance) (stri Server []cmtServer `json:"server"` } + // Find the highest-semver entry across versions that parse successfully. parseCMTVersion + // handles vX.Y.Z and vX.Y.Z-rcN (with or without the "v"). Unparseable versions are + // silently skipped — they just don't compete for "latest". If none parse, no entry is + // marked, and the workflow can fall back to its default (smoke for all). + latestIdx := -1 + var latestVer cmtVersion + for i, version := range versions { + if i >= len(instances) { + break + } + v, ok := parseCMTVersion(version) + if !ok { + continue + } + if latestIdx == -1 || latestVer.less(v) { + latestVer = v + latestIdx = i + } + } + var matrix mobileCMTMatrix for i, version := range versions { if i >= len(instances) { break } - matrix.Server = append(matrix.Server, cmtServer{Version: version, URL: instances[i].URL}) + entry := cmtServer{Version: version, URL: instances[i].URL} + if i == latestIdx { + entry.Latest = true + } + matrix.Server = append(matrix.Server, entry) } b, err := json.Marshal(matrix) From d65b042126bd481ad8e1e6a32ef659c8b5754188 Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Fri, 12 Jun 2026 21:34:05 +0530 Subject: [PATCH 19/19] push: drop dead `nightly` param from desktop e2e dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Matterwick's nightly handler was removed in prior commits; this dispatch function still took a `nightly bool` parameter and sent `"nightly": ...` in the e2e-functional.yml workflow inputs map. Push-event callers always passed `false`, and the PR-label path was a separate function that never went through here at all — so the param was dead. Companion change in desktop (paired): remove the `nightly` workflow input from e2e-functional.yml + e2e-functional-template.yml and the `if inputs.nightly` block that built `desktop-nightly-${OS}` build IDs. - dispatchDesktopE2EWorkflow: drop `nightly bool` parameter - drop "nightly": fmt.Sprintf("%t", nightly) from workflowInputs - update sole caller (triggerDesktopE2EWorkflowForPushEvent) Co-Authored-By: Claude Opus 4.7 --- server/e2e_tests.go | 7 +++++-- server/push_events.go | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/server/e2e_tests.go b/server/e2e_tests.go index defff0e..aadf90b 100644 --- a/server/e2e_tests.go +++ b/server/e2e_tests.go @@ -1336,7 +1336,11 @@ func (s *Server) buildInstanceDetailsJSON(instances []*E2EInstance) (string, err // GitHub Actions API. Cleanup is driven by the workflow_run completed event matched on the // commit SHA, so no tracking key is passed as an input (e2e-functional.yml does not declare // one, and GitHub rejects a workflow_dispatch carrying an undeclared input with a 422). -func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, instanceDetailsJSON, runType string, nightly bool) error { +// +// `nightly` was previously sent on this dispatch when matterwick's nightly handler fired the +// desktop workflow. That handler is gone; matterwick never has reason to set nightly=true any +// more, and the downstream `nightly` input was removed from e2e-functional.yml. Don't send it. +func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, instanceDetailsJSON, runType string) error { ctx := context.Background() client := newGithubClient(s.Config.GithubAccessToken) @@ -1367,7 +1371,6 @@ func (s *Server) dispatchDesktopE2EWorkflow(repoOwner, repoName, ref, sha, insta "MM_TEST_USER_NAME": s.Config.E2EUsername, "MM_SERVER_VERSION": serverVersion, "run_type": runType, - "nightly": fmt.Sprintf("%t", nightly), } // Use REST API to trigger workflow dispatch (v32 go-github compatibility) diff --git a/server/push_events.go b/server/push_events.go index f475d88..67c192d 100644 --- a/server/push_events.go +++ b/server/push_events.go @@ -305,7 +305,7 @@ func (s *Server) triggerDesktopE2EWorkflowForPushEvent(repoOwner, repoName, bran // handlePushEvent only routes master/main pushes here (release-branch push trigger was // removed), so runType is always MASTER for desktop push events. - return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, "MASTER", false) + return s.dispatchDesktopE2EWorkflow(repoOwner, repoName, branch, sha, instanceDetailsJSON, "MASTER") } // triggerMobileE2EWorkflowForPushEvent dispatches the mobile E2E workflow (e2e-detox-pr.yml).