diff --git a/cmd/deploy.go b/cmd/deploy.go index a5a6131a..a324a5cc 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -76,14 +76,6 @@ func runDeploy(cmd *cobra.Command, args []string) error { return errors.New("running without a controlling terminal requires --envrc to be set") } - if envrc != "" && portForwarding { - return errors.New("cannot use --envrc with --port-forwarding. The --envrc flag is for non-interactive mode with remote cluster access") - } - - if envrc != "" && exposure == "none" { - return errors.New("cannot use --envrc with --exposure=none. The --envrc flag requires a remotely accessible endpoint (e.g., --exposure=loadbalancer)") - } - portForwardEnabledFinal := portForwarding || exposure == "none" if env.RunningInRoxieContainer { diff --git a/internal/deployer/deploy_via_operator.go b/internal/deployer/deploy_via_operator.go index 2bc1ca73..6365a870 100644 --- a/internal/deployer/deploy_via_operator.go +++ b/internal/deployer/deploy_via_operator.go @@ -607,11 +607,20 @@ func (d *Deployer) configureCentralEndpoint(ctx context.Context, exposure string } } - endpoint, err := d.portForward.Start(d.centralNamespace, serviceName, 443, 8443) - if err != nil { - return fmt.Errorf("failed to start port-forward: %w", err) + if d.envrcFile != "" { + endpoint, pid, err := d.portForward.StartDetached(d.centralNamespace, serviceName, 443, 8443) + if err != nil { + return fmt.Errorf("failed to start detached port-forward: %w", err) + } + d.centralEndpoint = endpoint + d.portForwardPID = pid + } else { + endpoint, err := d.portForward.Start(d.centralNamespace, serviceName, 443, 8443) + if err != nil { + return fmt.Errorf("failed to start port-forward: %w", err) + } + d.centralEndpoint = endpoint } - d.centralEndpoint = endpoint } else if exposure == "loadbalancer" { endpoint, err := d.waitForLoadBalancer(ctx, d.centralNamespace, "central-loadbalancer", 300) if err != nil { diff --git a/internal/deployer/deployer.go b/internal/deployer/deployer.go index 69b77c19..a30372a0 100644 --- a/internal/deployer/deployer.go +++ b/internal/deployer/deployer.go @@ -7,8 +7,10 @@ import ( "fmt" "os" "os/exec" + "strconv" "strings" "sync" + "syscall" "time" "github.com/fatih/color" @@ -64,6 +66,7 @@ type Deployer struct { securedClusterOverrides map[string]interface{} featureFlagOverrides map[string]interface{} envrcFile string + portForwardPID int useOLM bool useKonflux bool shouldDeployOperator bool @@ -434,6 +437,12 @@ func New(log *logger.Logger) (*Deployer, error) { d.roxCACertFile = caCert } + if pidStr := os.Getenv("ROXIE_PORT_FORWARD_PID"); pidStr != "" { + if pid, err := strconv.Atoi(pidStr); err == nil { + d.portForwardPID = pid + } + } + d.kubeContext = env.GetCurrentContext() clusterResourceKinds, err := d.getClusterResourceKinds() @@ -480,6 +489,22 @@ func (d *Deployer) Cleanup() { } } +func (d *Deployer) stopDetachedPortForward() { + if d.portForwardPID == 0 { + return + } + proc, err := os.FindProcess(d.portForwardPID) + if err != nil { + return + } + if err := proc.Signal(syscall.SIGKILL); err != nil { + d.logger.Dimf("Detached port-forward (pid %d) already gone", d.portForwardPID) + return + } + d.logger.Dimf("Stopped detached port-forward (pid %d)", d.portForwardPID) + d.portForwardPID = 0 +} + // Deploy deploys the specified components to the cluster. func (d *Deployer) Deploy(ctx context.Context, components component.Component, resources, exposure string) error { adjustedResources, adjustedExposure, adjustedPortForward := d.clusterDefaults.ApplyConvenienceDefaults( @@ -559,7 +584,6 @@ func (d *Deployer) deployCentral(ctx context.Context, resources, exposure string return err } - // envrc may be used from different processes, so use actual endpoint not port-forward if d.envrcFile != "" { d.logger.Dimf("Writing environment variables to %s", d.envrcFile) if err := d.writeEnvrcFile(ctx, exposure, portForwardWanted); err != nil { @@ -636,6 +660,7 @@ func (d *Deployer) teardownCentral(ctx context.Context) error { } d.portForward.Stop() + d.stopDetachedPortForward() // Add pause-reconcile annotation to not have the operator interfere during resource deletion. if d.doesResourceExist(ctx, "central", "stackrox-central-services", d.centralNamespace) { @@ -998,6 +1023,9 @@ func (d *Deployer) writeEnvrcFile(ctx context.Context, exposure string, portForw fmt.Fprintf(&content, "export ROX_USERNAME=%q\n", AdminUsername) fmt.Fprintf(&content, "export ROX_ADMIN_PASSWORD=%q\n", d.centralPassword) fmt.Fprintf(&content, "export ROX_CA_CERT_FILE=%q\n", d.roxCACertFile) + if d.portForwardPID != 0 { + fmt.Fprintf(&content, "export ROXIE_PORT_FORWARD_PID=%d\n", d.portForwardPID) + } if err := os.WriteFile(d.envrcFile, []byte(content.String()), 0600); err != nil { return fmt.Errorf("failed to write envrc file: %w", err) diff --git a/internal/portforward/portforward.go b/internal/portforward/portforward.go index 4dcbd0cf..b62f69cb 100644 --- a/internal/portforward/portforward.go +++ b/internal/portforward/portforward.go @@ -120,6 +120,51 @@ func (m *Manager) Start(namespace, serviceName string, remotePort, preferredLoca return endpoint, nil } +// StartDetached starts port-forward as a detached process that survives the +// parent process exiting. Returns the endpoint and the PID of the subprocess. +// The caller is responsible for killing the process when done. +func (m *Manager) StartDetached(namespace, serviceName string, remotePort, preferredLocalPort int) (string, int, error) { + localPort, err := m.findFreeLocalPort(preferredLocalPort) + if err != nil { + return "", 0, fmt.Errorf("failed to find free port: %w", err) + } + + cmd := exec.Command( + m.kubectl, + "-n", namespace, + "port-forward", + fmt.Sprintf("svc/%s", serviceName), + fmt.Sprintf("%d:%d", localPort, remotePort), + "--address", "127.0.0.1", + ) + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setsid: true, + } + + cmd.Stdout = nil + cmd.Stderr = nil + + if err := cmd.Start(); err != nil { + return "", 0, fmt.Errorf("failed to start port-forward: %w", err) + } + + pid := cmd.Process.Pid + + // Release the process so it won't be waited on by this process. + cmd.Process.Release() + + if !m.waitTCPReady("127.0.0.1", localPort, 20.0) { + syscall.Kill(pid, syscall.SIGTERM) + return "", 0, fmt.Errorf("port-forward did not become ready") + } + + endpoint := fmt.Sprintf("127.0.0.1:%d", localPort) + m.logger.Successf("✓ Detached port-forward active at https://%s (pid %d)", endpoint, pid) + + return endpoint, pid, nil +} + // Stop stops the active port-forward if running func (m *Manager) Stop() { if m.proc == nil || m.proc.Process == nil { diff --git a/tests/e2e/basic_test.go b/tests/e2e/basic_test.go index ac91591f..9141c7d5 100644 --- a/tests/e2e/basic_test.go +++ b/tests/e2e/basic_test.go @@ -4,9 +4,16 @@ package e2e import ( + "net" "os" + "strconv" + "strings" + "syscall" "testing" "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // TestDeployBothSimple tests deploying both components together (simplest scenario) @@ -22,7 +29,7 @@ func TestDeployBothSimple(t *testing.T) { envrcFile.Close() t.Log("=== Deploying both components together ===") - args := append([]string{roxieBinary, "deploy", "--early-readiness", "both", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "--early-readiness", "both", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout*2, nil, args...) // Verify namespaces exist and have managed-by labels @@ -46,3 +53,56 @@ func TestDeployBothSimple(t *testing.T) { verifyCentralNotInstalled(t, "acs-central") verifySecuredClusterNotInstalled(t, "acs-sensor") } + +// TestDetachedPortForwarding tests the detached port-forwarding mode for central. +func TestDetachedPortForwarding(t *testing.T) { + dumpClusterStateOnFailure(t) + + envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*") + if err != nil { + t.Fatalf("Failed to create temp envrc: %v", err) + } + envrcPath := envrcFile.Name() + envrcFile.Close() + + t.Log("=== Deploying central without exposure and with port-forwarding and envrc ===") + args := append([]string{roxieBinary, "deploy", "--early-readiness", "central", "--exposure=none", "--port-forwarding", "--envrc", envrcPath}, commonDeployArgs...) + runCommand(t, deployTimeout, nil, args...) + + env, err := loadEnvrcFile(envrcPath) + require.NoError(t, err, "Failed to load envrc file") + pidStr, ok := env["ROXIE_PORT_FORWARD_PID"] + require.True(t, ok, "ROXIE_PORT_FORWARD_PID not set in envrc") + pid, err := strconv.Atoi(pidStr) + require.NoError(t, err, "ROXIE_PORT_FORWARD_PID is not a valid integer: %s", pidStr) + t.Logf("Port-forward PID: %d", pid) + + require.NoError(t, syscall.Kill(pid, 0), "Port-forward process (PID %d) does not exist", pid) + + endpoint, ok := env["API_ENDPOINT"] + require.True(t, ok, "API_ENDPOINT not set in envrc") + require.True(t, strings.HasPrefix(endpoint, "127.0.0.1:"), + "Expected localhost endpoint, got: %s", endpoint) + + caCertFile, ok := env["ROX_CA_CERT_FILE"] + require.True(t, ok, "ROX_CA_CERT_FILE not set in envrc") + + testCentralAPI(t, endpoint, caCertFile) + + t.Log("=== Cleaning up ===") + teardownArgs := []string{roxieBinary, "teardown", "central"} + runCommand(t, teardownTimeout, env, teardownArgs...) + + // Verify port-forward cleanup by checking the port is free. We can't use + // kill(pid, 0) because CI containers often lack a proper init to reap + // zombies, causing the check to pass for dead processes. Binding to the + // port works because even zombies release their file descriptors. + assert.Eventually(t, func() bool { + ln, err := net.Listen("tcp", endpoint) + if err != nil { + return false + } + ln.Close() + return true + }, 10*time.Second, 200*time.Millisecond, "Port-forward port %s should be free after teardown", endpoint) +} diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 425ec7e9..cb42af42 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -50,7 +50,7 @@ func TestDeployBothComponentsTogetherInSingleNamespace(t *testing.T) { envrcFile.Close() t.Log("=== Deploying both components in single namespace ===") - args := append([]string{roxieBinary, "deploy", "both", "--single-namespace", "--early-readiness", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "both", "--single-namespace", "--early-readiness", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout*2, nil, args...) verifyCentralInstalled(t, "stackrox") diff --git a/tests/e2e/helpers.go b/tests/e2e/helpers.go index 14c20fce..73b7faf2 100644 --- a/tests/e2e/helpers.go +++ b/tests/e2e/helpers.go @@ -5,8 +5,11 @@ package e2e import ( "bytes" "context" + "crypto/tls" + "crypto/x509" "encoding/json" "fmt" + "net/http" "os" "os/exec" "strings" @@ -26,8 +29,7 @@ const ( ) var ( - commonDeployArgs = []string{"--port-forwarding", "--exposure=none", "--resources=small"} - commonDeployArgsNoPortForward = []string{"--exposure=loadbalancer", "--resources=small"} + commonDeployArgs = []string{"--resources=small"} roxieBinary = "roxie" ) @@ -348,6 +350,30 @@ func dumpLogsForFailingPods(namespace string) { } } +func testCentralAPI(t *testing.T, endpoint, caCertFile string) { + t.Helper() + + caCert, err := os.ReadFile(caCertFile) + require.NoError(t, err, "Failed to read CA cert file: %s", caCertFile) + certPool := x509.NewCertPool() + require.True(t, certPool.AppendCertsFromPEM(caCert), "Failed to parse CA certificate") + + client := &http.Client{ + Timeout: 10 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + RootCAs: certPool, + ServerName: "central.stackrox.svc", + }, + }, + } + resp, err := client.Get(fmt.Sprintf("https://%s/v1/ping", endpoint)) + require.NoError(t, err, "Failed to reach Central via %s", endpoint) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode, "Central API returned unexpected status") + t.Logf("Central at %s responded with status: %d", endpoint, resp.StatusCode) +} + func verifyAnnotation(t *testing.T, resourceType, resourceName, namespace, annotationKey, expectedValue string) { t.Helper() diff --git a/tests/e2e/olm_switch_test.go b/tests/e2e/olm_switch_test.go index fe9512b0..0c6fa480 100644 --- a/tests/e2e/olm_switch_test.go +++ b/tests/e2e/olm_switch_test.go @@ -81,7 +81,7 @@ func TestOLMToNonOLMSwitch(t *testing.T) { // Step 1: Deploy central with OLM operator t.Log("=== Step 1: Deploy central with OLM operator ===") - args := append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator is in OLM mode @@ -94,7 +94,7 @@ func TestOLMToNonOLMSwitch(t *testing.T) { // Step 2: Deploy central again without OLM (should switch modes) t.Log("=== Step 2: Redeploy central without OLM (triggering mode switch) ===") - args = append([]string{roxieBinary, "deploy", "central", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args = append([]string{roxieBinary, "deploy", "central", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator switched to non-OLM mode @@ -131,7 +131,7 @@ func TestNonOLMToOLMSwitch(t *testing.T) { // Step 1: Deploy central without OLM (non-OLM operator) t.Log("=== Step 1: Deploy central with non-OLM operator ===") - args := append([]string{roxieBinary, "deploy", "central", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "central", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator is in non-OLM mode @@ -144,7 +144,7 @@ func TestNonOLMToOLMSwitch(t *testing.T) { // Step 2: Deploy central again with OLM (should switch modes) t.Log("=== Step 2: Redeploy central with OLM (triggering mode switch) ===") - args = append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args = append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator switched to OLM mode @@ -185,7 +185,7 @@ func TestOLMOperatorVersionUpgrade(t *testing.T) { // Step 1: Deploy central with OLM operator t.Log("=== Step 1: Deploy central with OLM operator ===") - args := append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator is in OLM mode @@ -207,7 +207,7 @@ func TestOLMOperatorVersionUpgrade(t *testing.T) { // Step 2: Redeploy with same version (should skip if version matches) t.Log("=== Step 2: Redeploy with same version (should detect correct version) ===") - args = append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args = append([]string{roxieBinary, "deploy", "central", "--olm", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) // Verify operator is still in OLM mode and deployment exists @@ -245,7 +245,7 @@ func TestSecuredClusterWithOLMSwitch(t *testing.T) { // Step 1: Deploy central with OLM t.Log("=== Step 1: Deploy central with OLM ===") - args := append([]string{roxieBinary, "deploy", "--early-readiness", "central", "--olm", "--envrc", envrcPath}, commonDeployArgsNoPortForward...) + args := append([]string{roxieBinary, "deploy", "--early-readiness", "central", "--olm", "--envrc", envrcPath}, commonDeployArgs...) runCommand(t, deployTimeout, nil, args...) verifyOperatorMode(t, true) @@ -259,7 +259,7 @@ func TestSecuredClusterWithOLMSwitch(t *testing.T) { // Step 2: Deploy secured-cluster (should reuse OLM operator) t.Log("=== Step 2: Deploy secured-cluster (should reuse OLM operator) ===") - args = append([]string{roxieBinary, "deploy", "--early-readiness", "secured-cluster", "--olm"}, commonDeployArgsNoPortForward...) + args = append([]string{roxieBinary, "deploy", "--early-readiness", "secured-cluster", "--olm"}, commonDeployArgs...) runCommand(t, deployTimeout, envrcEnv, args...) // Verify operator is still in OLM mode @@ -268,7 +268,7 @@ func TestSecuredClusterWithOLMSwitch(t *testing.T) { // Step 3: Switch to non-OLM by redeploying secured-cluster without --olm t.Log("=== Step 3: Redeploy secured-cluster without OLM (triggering mode switch) ===") - args = append([]string{roxieBinary, "deploy", "--early-readiness", "secured-cluster"}, commonDeployArgsNoPortForward...) + args = append([]string{roxieBinary, "deploy", "--early-readiness", "secured-cluster"}, commonDeployArgs...) runCommand(t, deployTimeout, envrcEnv, args...) // Verify operator switched to non-OLM mode