diff --git a/apis/bases/instanceha.openstack.org_instancehas.yaml b/apis/bases/instanceha.openstack.org_instancehas.yaml index 2d9bdc61..26180e32 100644 --- a/apis/bases/instanceha.openstack.org_instancehas.yaml +++ b/apis/bases/instanceha.openstack.org_instancehas.yaml @@ -100,6 +100,18 @@ spec: default: 7410 format: int32 type: integer + metricsTLS: + description: MetricsTLS - Parameters related to TLS for the metrics + endpoint + properties: + caBundleSecretName: + description: CaBundleSecretName - holding the CA certs in a pre-created + bundle file + type: string + secretName: + description: SecretName - holding the cert, key for the service + type: string + type: object networkAttachments: description: |- NetworkAttachments is a list of NetworkAttachment resource names to expose diff --git a/apis/instanceha/v1beta1/instanceha_types.go b/apis/instanceha/v1beta1/instanceha_types.go index 4066e2cc..37ebe5d3 100644 --- a/apis/instanceha/v1beta1/instanceha_types.go +++ b/apis/instanceha/v1beta1/instanceha_types.go @@ -115,6 +115,11 @@ type InstanceHaSpec struct { // +kubebuilder:validation:Optional // Auth - Parameters related to authentication Auth AuthSpec `json:"auth,omitempty"` + + // +kubebuilder:validation:Optional + //+operator-sdk:csv:customresourcedefinitions:type=spec + // MetricsTLS - Parameters related to TLS for the metrics endpoint + MetricsTLS tls.SimpleService `json:"metricsTLS,omitempty"` } // InstanceHaStatus defines the observed state of InstanceHa diff --git a/apis/instanceha/v1beta1/zz_generated.deepcopy.go b/apis/instanceha/v1beta1/zz_generated.deepcopy.go index 0c5162e3..44c43d9a 100644 --- a/apis/instanceha/v1beta1/zz_generated.deepcopy.go +++ b/apis/instanceha/v1beta1/zz_generated.deepcopy.go @@ -141,6 +141,7 @@ func (in *InstanceHaSpec) DeepCopyInto(out *InstanceHaSpec) { **out = **in } out.Auth = in.Auth + in.MetricsTLS.DeepCopyInto(&out.MetricsTLS) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceHaSpec. diff --git a/config/crd/bases/instanceha.openstack.org_instancehas.yaml b/config/crd/bases/instanceha.openstack.org_instancehas.yaml index 2d9bdc61..26180e32 100644 --- a/config/crd/bases/instanceha.openstack.org_instancehas.yaml +++ b/config/crd/bases/instanceha.openstack.org_instancehas.yaml @@ -100,6 +100,18 @@ spec: default: 7410 format: int32 type: integer + metricsTLS: + description: MetricsTLS - Parameters related to TLS for the metrics + endpoint + properties: + caBundleSecretName: + description: CaBundleSecretName - holding the CA certs in a pre-created + bundle file + type: string + secretName: + description: SecretName - holding the cert, key for the service + type: string + type: object networkAttachments: description: |- NetworkAttachments is a list of NetworkAttachment resource names to expose diff --git a/docs/instanceha_guide.md b/docs/instanceha_guide.md index 39da81e0..43e88a14 100644 --- a/docs/instanceha_guide.md +++ b/docs/instanceha_guide.md @@ -190,7 +190,9 @@ groups: #### Scraping Configuration -The InstanceHA pod exposes metrics on TCP port 8080. To scrape with Prometheus, create a `PodMonitor` or `ServiceMonitor`: +The InstanceHA pod exposes metrics on TCP port 8080. The infra-operator automatically creates a Kubernetes Service (`-metrics`) with the labels `metrics: enabled` and `service: instanceha`, which the telemetry-operator discovers and scrapes via the COO Prometheus. **No manual configuration is needed when the telemetry-operator is deployed.** + +For environments using OpenShift user workload monitoring instead of (or in addition to) the telemetry-operator, create a `PodMonitor`: ```yaml apiVersion: monitoring.coreos.com/v1 diff --git a/docs/instanceha_prometheus.md b/docs/instanceha_prometheus.md index 0cec93ba..72af31bf 100644 --- a/docs/instanceha_prometheus.md +++ b/docs/instanceha_prometheus.md @@ -6,6 +6,8 @@ InstanceHA exposes Prometheus metrics at `:8080/metrics` on the workload pod, co The metrics are served by the `prometheus_client` Python library on the same HTTP server used for liveness and readiness probes. No sidecar or additional container is needed. +When pod-level TLS is enabled, the metrics endpoint serves over **HTTPS**. The openstack-operator creates a cert-manager Certificate producing a TLS secret (`cert-instanceha-metrics`), which the infra-operator mounts into the pod. The Python HTTP server wraps its socket with TLS automatically when the certificate files are present. + --- ## Prerequisites @@ -17,6 +19,34 @@ The metrics are served by the `prometheus_client` Python library on the same HTT --- +## TLS Configuration + +When `OpenStackControlPlane` has pod-level TLS enabled (`spec.tls.podLevel.enabled: true`), the openstack-operator automatically provisions a cert-manager Certificate for the InstanceHA metrics endpoint. This produces a Kubernetes TLS secret (`cert-instanceha-metrics`) containing `tls.crt`, `tls.key`, and `ca.crt`. + +The infra-operator InstanceHA controller **auto-detects** this secret: if the default secret `cert-instanceha-metrics` exists in the namespace, TLS is enabled automatically without any configuration on the InstanceHa CR. The controller: +1. Validates the TLS secret exists and is well-formed +2. Mounts the certificate at `/etc/pki/tls/certs/metrics.crt` and the key at `/etc/pki/tls/private/metrics.key` +3. Sets `METRICS_TLS_CERT` and `METRICS_TLS_KEY` environment variables +4. Switches liveness and readiness probes to HTTPS + +The Python process detects these environment variables and wraps the HTTP server socket with TLS. A single wildcard certificate (`*.NAMESPACE.svc`) covers all InstanceHA instances in a namespace. + +To use a custom TLS secret instead of the auto-detected default, set `metricsTLS.secretName` in the InstanceHa CR: + +```yaml +apiVersion: instanceha.openstack.org/v1beta1 +kind: InstanceHa +metadata: + name: instanceha +spec: + metricsTLS: + secretName: my-custom-metrics-cert +``` + +When the telemetry-operator is deployed, its `ScrapeConfig` automatically switches to `scheme: HTTPS` with the appropriate TLS configuration when `PrometheusTLS` is enabled — no manual changes are needed. + +--- + ## Enabling Scraping ### Step 1: Deploy a PodMonitor @@ -95,8 +125,9 @@ curl -sk -H "Authorization: Bearer $TOKEN" \ ```bash # Scrape metrics directly from the pod +# Use https and -k when TLS is enabled oc exec -n openstack deployment/instanceha-instanceha -- \ - curl -s http://localhost:8080/metrics + curl -sk https://localhost:8080/metrics # Query a specific metric in Prometheus # (via Prometheus UI or API) @@ -299,13 +330,17 @@ promtool check rules instanceha-prometheusrule.yaml ### Verify Metrics Endpoint ```bash -# Scrape all metrics from the pod +# Scrape all metrics from the pod (HTTP, when TLS is not enabled) oc exec -n openstack deployment/instanceha-instanceha -- \ curl -s http://localhost:8080/metrics +# When TLS is enabled, use HTTPS with -k to skip certificate verification +oc exec -n openstack deployment/instanceha-instanceha -- \ + curl -sk https://localhost:8080/metrics + # Check a specific metric family oc exec -n openstack deployment/instanceha-instanceha -- \ - curl -s http://localhost:8080/metrics | grep instanceha_poll_cycles_total + curl -sk https://localhost:8080/metrics | grep instanceha_poll_cycles_total ``` Expected output (counters start at zero, increment over time): @@ -319,16 +354,16 @@ instanceha_poll_cycles_total{result="error"} 0.0 ### Verify Poll Loop Metrics -After the pod has been running for a few poll cycles: +After the pod has been running for a few poll cycles (use `https` and `-k` when TLS is enabled): ```bash # Should show increasing success count oc exec -n openstack deployment/instanceha-instanceha -- \ - curl -s http://localhost:8080/metrics | grep poll_cycles + curl -sk https://localhost:8080/metrics | grep poll_cycles # Should show 0 consecutive failures (healthy state) oc exec -n openstack deployment/instanceha-instanceha -- \ - curl -s http://localhost:8080/metrics | grep poll_consecutive_failures + curl -sk https://localhost:8080/metrics | grep poll_consecutive_failures ``` ### Simulate a Nova API Failure @@ -353,8 +388,9 @@ Fencing and evacuation counters only increment during actual host failures. To v ```bash # Check that the metric families are registered (even if values are 0) +# Use https and -k when TLS is enabled oc exec -n openstack deployment/instanceha-instanceha -- \ - curl -s http://localhost:8080/metrics | grep "^instanceha_" | grep "# TYPE" + curl -sk https://localhost:8080/metrics | grep "^instanceha_" | grep "# TYPE" ``` Expected output: @@ -482,37 +518,23 @@ When the [telemetry-operator](https://github.com/openstack-k8s-operators/telemet | OpenShift user workload monitoring | `prometheus-user-workload` in `openshift-user-workload-monitoring` | `thanos-querier` route in `openshift-monitoring` | | telemetry-operator (COO) | `prometheus-metric-storage` in `openstack` | `metric-storage-prometheus.openstack.svc:9090` | -The PodMonitor approach described above places InstanceHA metrics in the OpenShift user workload Prometheus. If you want InstanceHA metrics alongside other OpenStack metrics (Ceilometer, RabbitMQ, node-exporter, OVN) in the COO Prometheus, create a `ScrapeConfig` CR instead. +### Automatic Discovery (default) -### Creating a ScrapeConfig for COO Prometheus +The telemetry-operator **automatically discovers and scrapes InstanceHA metrics** — no manual configuration is required. The infra-operator creates a Kubernetes Service (`-metrics`) with the labels `metrics: enabled` and `service: instanceha`. The telemetry-operator's `MetricStorage` controller watches for Services with these labels and automatically generates a `ScrapeConfig` CR named `telemetry-instanceha` targeting port 8080. -The COO Prometheus only picks up CRs with the label `service: metricStorage`. Create a `ScrapeConfig` targeting the InstanceHA pod: +This works the same way as the OVN metrics integration. When a `MetricStorage` CR exists in the namespace: -```yaml -apiVersion: monitoring.rhobs/v1alpha1 -kind: ScrapeConfig -metadata: - name: instanceha-metrics - namespace: openstack - labels: - service: metricStorage -spec: - scrapeInterval: 30s - metricsPath: /metrics - staticConfigs: - - targets: - - ":8080" -``` +1. The telemetry-operator discovers the InstanceHA metrics Service via label selectors +2. A `ScrapeConfig` CR is created with the target `..svc:8080` +3. The COO Prometheus picks up the `ScrapeConfig` and begins scraping +4. If the InstanceHA Service is deleted or recreated, the `ScrapeConfig` is automatically reconciled -To discover the pod IP dynamically: +To verify the automatic scrapeconfig was created: ```bash -POD_IP=$(oc get pod -n openstack -l service=instanceha -o jsonpath='{.items[0].status.podIP}') -echo "Target: ${POD_IP}:8080" +oc get scrapeconfig -n openstack telemetry-instanceha -o yaml ``` -> **Note**: The COO `ScrapeConfig` uses static targets (IP:port), not label-based pod discovery like a `PodMonitor`. If the InstanceHA pod is rescheduled and gets a new IP, the `ScrapeConfig` must be updated. For automatic discovery, consider requesting native InstanceHA support in the telemetry-operator — the OVN metrics integration uses a label-based service discovery pattern that could be extended to InstanceHA. - ### Alert Rules for COO Prometheus The alert rules from the [Alert Rules](#alert-rules) section use the `monitoring.coreos.com/v1` API group, which is picked up by OpenShift's built-in Prometheus Operator. To use these alerts with the COO Prometheus instead, change the API group and add the `service: metricStorage` label: @@ -532,7 +554,7 @@ spec: ### Which Approach to Use - **OpenShift user workload monitoring only** (no telemetry-operator): Use the PodMonitor approach from [Enabling Scraping](#enabling-scraping). This is simpler and uses automatic pod discovery. -- **telemetry-operator deployed**: Use the ScrapeConfig approach if you want all OpenStack metrics in a single Prometheus. You can also use both approaches simultaneously — the PodMonitor and ScrapeConfig target different Prometheus instances and do not conflict. +- **telemetry-operator deployed** (default): InstanceHA metrics are automatically scraped by the COO Prometheus alongside other OpenStack metrics (Ceilometer, RabbitMQ, node-exporter, OVN). No manual configuration needed. You can also deploy the PodMonitor simultaneously — it targets the OpenShift user workload Prometheus and does not conflict with the COO scrapeconfig. - **Querying across both**: OpenShift's `thanos-querier` route aggregates the cluster and user workload Prometheus instances. The COO Prometheus is separate and must be queried directly at `metric-storage-prometheus.openstack.svc:9090`. --- diff --git a/internal/controller/instanceha/instanceha_controller.go b/internal/controller/instanceha/instanceha_controller.go index 248d2253..99f2a74e 100644 --- a/internal/controller/instanceha/instanceha_controller.go +++ b/internal/controller/instanceha/instanceha_controller.go @@ -55,6 +55,7 @@ import ( commondeployment "github.com/openstack-k8s-operators/lib-common/modules/common/deployment" "github.com/openstack-k8s-operators/lib-common/modules/common/secret" + commonservice "github.com/openstack-k8s-operators/lib-common/modules/common/service" "github.com/openstack-k8s-operators/lib-common/modules/common/util" networkv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" @@ -80,6 +81,7 @@ func (r *Reconciler) GetLogger(ctx context.Context) logr.Logger { // +kubebuilder:rbac:groups=instanceha.openstack.org,resources=instancehas/finalizers,verbs=update;patch // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch; // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch; +// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=k8s.cni.cncf.io,resources=network-attachment-definitions,verbs=get;list;watch // service account, role, rolebinding // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;update;patch @@ -164,6 +166,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct condition.UnknownCondition(condition.RoleReadyCondition, condition.InitReason, condition.RoleReadyInitMessage), condition.UnknownCondition(condition.RoleBindingReadyCondition, condition.InitReason, condition.RoleBindingReadyInitMessage), condition.UnknownCondition(condition.NetworkAttachmentsReadyCondition, condition.InitReason, condition.NetworkAttachmentsReadyInitMessage), + condition.UnknownCondition(condition.CreateServiceReadyCondition, condition.InitReason, condition.CreateServiceReadyInitMessage), ) instance.Status.Conditions.Init(&cl) instance.Status.ObservedGeneration = instance.Generation @@ -369,8 +372,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct ) if err != nil { if k8s_errors.IsNotFound(err) { - // Since the CA cert secret should have been manually created by the user and provided in the spec, - // we treat this as a warning because it means that the service will not be able to start. instance.Status.Conditions.Set(condition.FalseCondition( condition.TLSInputReadyCondition, condition.ErrorReason, @@ -390,6 +391,38 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct configVars[instance.Spec.CaBundleSecretName] = env.SetValue(secretHash) } + metricsTLSExplicit := instance.Spec.MetricsTLS.Enabled() + if !metricsTLSExplicit { + certName := instanceha.DefaultMetricsCertSecret + instance.Spec.MetricsTLS.SecretName = &certName + } + + hash, err := instance.Spec.MetricsTLS.ValidateCertSecret(ctx, helper, instance.Namespace) + if err != nil { + if k8s_errors.IsNotFound(err) { + if metricsTLSExplicit { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.TLSInputReadyCondition, + condition.RequestedReason, + condition.SeverityInfo, + condition.TLSInputReadyWaitingMessage, err.Error())) + return ctrl.Result{}, nil + } + // Auto-detect: default cert not found, proceed without TLS + instance.Spec.MetricsTLS.SecretName = nil + } else { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.TLSInputReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.TLSInputErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + } else { + configVars[tls.TLSHashName+"_metrics"] = env.SetValue(hash) + } + // all cert input checks out so report InputReady instance.Status.Conditions.MarkTrue(condition.TLSInputReadyCondition, condition.InputReadyMessage) @@ -505,6 +538,28 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct // remove LastAppliedTopology from the .Status instance.Status.LastAppliedTopology = nil } + commonsvc, err := commonservice.NewService(instanceha.MetricsService(instance), time.Duration(5)*time.Second, nil) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.CreateServiceReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.CreateServiceReadyErrorMessage, + err.Error())) + return ctrl.Result{}, err + } + sres, serr := commonsvc.CreateOrPatch(ctx, helper) + if serr != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + condition.CreateServiceReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + condition.CreateServiceReadyErrorMessage, + serr.Error())) + return sres, serr + } + instance.Status.Conditions.MarkTrue(condition.CreateServiceReadyCondition, condition.CreateServiceReadyMessage) + deployment := commondeployment.NewDeployment(instanceha.Deployment(instance, deploymentLabels, serviceAnnotations, cloud, configVarsHash, containerImage, topology, acSecretName), time.Duration(5)*time.Second) sfres, sferr := deployment.CreateOrPatch(ctx, helper) if sferr != nil { @@ -558,6 +613,7 @@ const ( instanceHaConfigMapField = ".spec.instanceHaConfigMap" topologyField = ".spec.topologyRef.Name" acSecretField = ".spec.auth.applicationCredentialSecret" // #nosec G101 + metricsTLSField = ".spec.metricsTLS.secretName" // #nosec G101 ) var allWatchFields = []string{ @@ -568,6 +624,7 @@ var allWatchFields = []string{ instanceHaConfigMapField, topologyField, acSecretField, + metricsTLSField, } // SetupWithManager sets up the controller with the Manager. @@ -649,9 +706,21 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { return err } + // index metricsTLSField + if err := mgr.GetFieldIndexer().IndexField(context.Background(), &instancehav1.InstanceHa{}, metricsTLSField, func(rawObj client.Object) []string { + cr := rawObj.(*instancehav1.InstanceHa) + if cr.Spec.MetricsTLS.SecretName == nil { + return []string{instanceha.DefaultMetricsCertSecret} + } + return []string{*cr.Spec.MetricsTLS.SecretName} + }); err != nil { + return err + } + return ctrl.NewControllerManagedBy(mgr). For(&instancehav1.InstanceHa{}). Owns(&appsv1.Deployment{}). + Owns(&corev1.Service{}). Owns(&corev1.ServiceAccount{}). Owns(&rbacv1.Role{}). Owns(&rbacv1.RoleBinding{}). diff --git a/internal/instanceha/const.go b/internal/instanceha/const.go new file mode 100644 index 00000000..28bbfc85 --- /dev/null +++ b/internal/instanceha/const.go @@ -0,0 +1,10 @@ +package instanceha + +const ( + // MetricsCertPath is the path to the metrics certificate file + MetricsCertPath = "/etc/pki/tls/certs/metrics.crt" + // MetricsKeyPath is the path to the metrics private key file + MetricsKeyPath = "/etc/pki/tls/private/metrics.key" + // DefaultMetricsCertSecret is the default secret name for the metrics TLS certificate + DefaultMetricsCertSecret = "cert-instanceha-metrics" //nolint:gosec +) diff --git a/internal/instanceha/funcs.go b/internal/instanceha/funcs.go index d66cf71e..070ac4be 100644 --- a/internal/instanceha/funcs.go +++ b/internal/instanceha/funcs.go @@ -17,6 +17,7 @@ import ( instancehav1 "github.com/openstack-k8s-operators/infra-operator/apis/instanceha/v1beta1" topologyv1 "github.com/openstack-k8s-operators/infra-operator/apis/topology/v1beta1" env "github.com/openstack-k8s-operators/lib-common/modules/common/env" + "github.com/openstack-k8s-operators/lib-common/modules/common/tls" "fmt" appsv1 "k8s.io/api/apps/v1" @@ -103,6 +104,27 @@ func Deployment( volumeMounts = append(volumeMounts, instance.Spec.CreateVolumeMounts(nil)...) } + // add metrics TLS cert if defined + if instance.Spec.MetricsTLS.Enabled() { + certSecretName := DefaultMetricsCertSecret + if instance.Spec.MetricsTLS.SecretName != nil && *instance.Spec.MetricsTLS.SecretName != "" { + certSecretName = *instance.Spec.MetricsTLS.SecretName + } + metricsSvc := tls.Service{ + SecretName: certSecretName, + CertMount: ptr.To(MetricsCertPath), + KeyMount: ptr.To(MetricsKeyPath), + } + volumes = append(volumes, metricsSvc.CreateVolume("metrics-certs")) + volumeMounts = append(volumeMounts, metricsSvc.CreateVolumeMounts("metrics-certs")...) + + envVars["METRICS_TLS_CERT"] = env.SetValue(MetricsCertPath) + envVars["METRICS_TLS_KEY"] = env.SetValue(MetricsKeyPath) + + livenessProbe.HTTPGet.Scheme = corev1.URISchemeHTTPS + readinessProbe.HTTPGet.Scheme = corev1.URISchemeHTTPS + } + dep := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: instance.Name, diff --git a/internal/instanceha/service.go b/internal/instanceha/service.go new file mode 100644 index 00000000..f7f5265a --- /dev/null +++ b/internal/instanceha/service.go @@ -0,0 +1,45 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instanceha + +import ( + instancehav1 "github.com/openstack-k8s-operators/infra-operator/apis/instanceha/v1beta1" + common "github.com/openstack-k8s-operators/lib-common/modules/common" + labels "github.com/openstack-k8s-operators/lib-common/modules/common/labels" + service "github.com/openstack-k8s-operators/lib-common/modules/common/service" + corev1 "k8s.io/api/core/v1" +) + +// MetricsService exposes the InstanceHA metrics endpoint for Prometheus scraping +func MetricsService(instance *instancehav1.InstanceHa) *corev1.Service { + svcLabels := labels.GetLabels(instance, labels.GetGroupLabel("instanceha"), map[string]string{ + common.AppSelector: "instanceha", + "metrics": "enabled", + }) + + details := &service.GenericServiceDetails{ + Name: instance.GetName() + "-metrics", + Namespace: instance.GetNamespace(), + Labels: svcLabels, + Selector: map[string]string{ + common.AppSelector: "instanceha", + }, + Port: service.GenericServicePort{ + Name: "metrics", + Port: 8080, + Protocol: "TCP", + }, + } + + return service.GenericService(details) +} diff --git a/templates/instanceha/bin/instanceha.py b/templates/instanceha/bin/instanceha.py index 8a3a9a70..20c36ae1 100755 --- a/templates/instanceha/bin/instanceha.py +++ b/templates/instanceha/bin/instanceha.py @@ -1393,6 +1393,14 @@ def do_GET(self): try: server = HTTPServer(('', HEALTH_CHECK_PORT), HealthHandler) + tls_cert = os.getenv('METRICS_TLS_CERT') + tls_key = os.getenv('METRICS_TLS_KEY') + if tls_cert and tls_key: + import ssl + ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ssl_ctx.load_cert_chain(tls_cert, tls_key) + server.socket = ssl_ctx.wrap_socket(server.socket, server_side=True) + logging.info("Metrics endpoint serving over HTTPS on port %d", HEALTH_CHECK_PORT) server.serve_forever() except OSError as e: logging.error('Health check server failed to bind to port %d: %s', diff --git a/test/functional/base_test.go b/test/functional/base_test.go index 3b404c9f..03e6a81c 100644 --- a/test/functional/base_test.go +++ b/test/functional/base_test.go @@ -40,6 +40,7 @@ import ( k8s_networkv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" frrk8sv1 "github.com/metallb/frr-k8s/api/v1beta1" + instancehav1 "github.com/openstack-k8s-operators/infra-operator/apis/instanceha/v1beta1" memcachedv1 "github.com/openstack-k8s-operators/infra-operator/apis/memcached/v1beta1" networkv1 "github.com/openstack-k8s-operators/infra-operator/apis/network/v1beta1" rabbitmqv1 "github.com/openstack-k8s-operators/infra-operator/apis/rabbitmq/v1beta1" @@ -1494,3 +1495,38 @@ func GetSecretHash(name types.NamespacedName) string { Expect(err).ShouldNot(HaveOccurred()) return hash } + +func CreateInstanceHaConfig(namespace string, spec map[string]any) client.Object { + name := "instanceha-" + uuid.New().String()[:25] + + raw := map[string]any{ + "apiVersion": "instanceha.openstack.org/v1beta1", + "kind": "InstanceHa", + "metadata": map[string]any{ + "name": name, + "namespace": namespace, + }, + "spec": spec, + } + + return th.CreateUnstructured(raw) +} + +func GetDefaultInstanceHaSpec() map[string]any { + return map[string]any{ + "containerImage": "test-instanceha-image:latest", + } +} + +func GetInstanceHa(name types.NamespacedName) *instancehav1.InstanceHa { + instance := &instancehav1.InstanceHa{} + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, name, instance)).Should(Succeed()) + }, timeout, interval).Should(Succeed()) + return instance +} + +func InstanceHaConditionGetter(name types.NamespacedName) condition.Conditions { + instance := GetInstanceHa(name) + return instance.Status.Conditions +} diff --git a/test/functional/instanceha_controller_test.go b/test/functional/instanceha_controller_test.go new file mode 100644 index 00000000..4e425bc8 --- /dev/null +++ b/test/functional/instanceha_controller_test.go @@ -0,0 +1,326 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package functional_test + +import ( + . "github.com/onsi/ginkgo/v2" //revive:disable:dot-imports + . "github.com/onsi/gomega" //revive:disable:dot-imports + + condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + //revive:disable-next-line:dot-imports + instanceha "github.com/openstack-k8s-operators/infra-operator/internal/instanceha" + . "github.com/openstack-k8s-operators/lib-common/modules/common/test/helpers" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +var _ = Describe("InstanceHa Controller", func() { + var instanceHaName types.NamespacedName + + When("a default InstanceHa gets created", func() { + BeforeEach(func() { + ih := CreateInstanceHaConfig(namespace, GetDefaultInstanceHaSpec()) + instanceHaName.Name = ih.GetName() + instanceHaName.Namespace = ih.GetNamespace() + DeferCleanup(th.DeleteInstance, ih) + }) + + It("should have created an InstanceHa", func() { + Eventually(func(_ Gomega) { + GetInstanceHa(instanceHaName) + }, timeout, interval).Should(Succeed()) + }) + + It("should be waiting for input resources", func() { + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.InputReadyCondition, + corev1.ConditionFalse, + ) + }) + }) + + When("prerequisite resources exist", func() { + BeforeEach(func() { + ih := CreateInstanceHaConfig(namespace, GetDefaultInstanceHaSpec()) + instanceHaName.Name = ih.GetName() + instanceHaName.Namespace = ih.GetNamespace() + DeferCleanup(th.DeleteInstance, ih) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateConfigMap(types.NamespacedName{ + Name: "openstack-config", + Namespace: namespace, + }, map[string]any{ + "clouds.yaml": "test-data", + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "openstack-config-secret", + Namespace: namespace, + }, map[string][]byte{ + "secure.yaml": []byte("test-data"), + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "fencing-secret", + Namespace: namespace, + }, map[string][]byte{ + "fencing.yaml": []byte("test-data"), + })) + }) + + It("should create a metrics Service with correct labels and port", func() { + metricsServiceName := types.NamespacedName{ + Name: instanceHaName.Name + "-metrics", + Namespace: instanceHaName.Namespace, + } + + Eventually(func(g Gomega) { + svc := &corev1.Service{} + g.Expect(k8sClient.Get(ctx, metricsServiceName, svc)).Should(Succeed()) + + g.Expect(svc.Labels).To(HaveKeyWithValue("service", "instanceha")) + g.Expect(svc.Labels).To(HaveKeyWithValue("metrics", "enabled")) + + g.Expect(svc.Spec.Selector).To(HaveKeyWithValue("service", "instanceha")) + + g.Expect(svc.Spec.Ports).To(HaveLen(1)) + g.Expect(svc.Spec.Ports[0].Name).To(Equal("metrics")) + g.Expect(svc.Spec.Ports[0].Port).To(Equal(int32(8080))) + g.Expect(svc.Spec.Ports[0].Protocol).To(Equal(corev1.ProtocolTCP)) + }, timeout, interval).Should(Succeed()) + }) + + It("should have the Service owned by the InstanceHa CR", func() { + metricsServiceName := types.NamespacedName{ + Name: instanceHaName.Name + "-metrics", + Namespace: instanceHaName.Namespace, + } + + Eventually(func(g Gomega) { + svc := &corev1.Service{} + g.Expect(k8sClient.Get(ctx, metricsServiceName, svc)).Should(Succeed()) + + ownerRef := svc.GetOwnerReferences() + g.Expect(ownerRef).To(HaveLen(1)) + g.Expect(ownerRef[0].Kind).To(Equal("InstanceHa")) + g.Expect(ownerRef[0].Name).To(Equal(instanceHaName.Name)) + }, timeout, interval).Should(Succeed()) + }) + + It("should mark CreateServiceReady condition as True", func() { + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.CreateServiceReadyCondition, + corev1.ConditionTrue, + ) + }) + }) + + When("prerequisite resources exist and deployment is ready", func() { + BeforeEach(func() { + ih := CreateInstanceHaConfig(namespace, GetDefaultInstanceHaSpec()) + instanceHaName.Name = ih.GetName() + instanceHaName.Namespace = ih.GetNamespace() + DeferCleanup(th.DeleteInstance, ih) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateConfigMap(types.NamespacedName{ + Name: "openstack-config", + Namespace: namespace, + }, map[string]any{ + "clouds.yaml": "test-data", + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "openstack-config-secret", + Namespace: namespace, + }, map[string][]byte{ + "secure.yaml": []byte("test-data"), + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "fencing-secret", + Namespace: namespace, + }, map[string][]byte{ + "fencing.yaml": []byte("test-data"), + })) + + th.SimulateDeploymentReplicaReady(instanceHaName) + }) + + It("should mark the InstanceHa as ready", func() { + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.ReadyCondition, + corev1.ConditionTrue, + ) + }) + }) + + When("MetricsTLS is configured without the TLS secret", func() { + BeforeEach(func() { + spec := GetDefaultInstanceHaSpec() + spec["metricsTLS"] = map[string]any{ + "secretName": "cert-instanceha-metrics", + } + ih := CreateInstanceHaConfig(namespace, spec) + instanceHaName.Name = ih.GetName() + instanceHaName.Namespace = ih.GetNamespace() + DeferCleanup(th.DeleteInstance, ih) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateConfigMap(types.NamespacedName{ + Name: "openstack-config", + Namespace: namespace, + }, map[string]any{ + "clouds.yaml": "test-data", + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "openstack-config-secret", + Namespace: namespace, + }, map[string][]byte{ + "secure.yaml": []byte("test-data"), + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "fencing-secret", + Namespace: namespace, + }, map[string][]byte{ + "fencing.yaml": []byte("test-data"), + })) + }) + + It("should wait for the metrics TLS secret", func() { + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.TLSInputReadyCondition, + corev1.ConditionFalse, + ) + }) + }) + + When("the default metrics TLS cert secret exists", func() { + BeforeEach(func() { + certSecret := CreateCertSecret(types.NamespacedName{ + Name: "cert-instanceha-metrics", + Namespace: namespace, + }) + DeferCleanup(k8sClient.Delete, ctx, certSecret) + + ih := CreateInstanceHaConfig(namespace, GetDefaultInstanceHaSpec()) + instanceHaName.Name = ih.GetName() + instanceHaName.Namespace = ih.GetNamespace() + DeferCleanup(th.DeleteInstance, ih) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateConfigMap(types.NamespacedName{ + Name: "openstack-config", + Namespace: namespace, + }, map[string]any{ + "clouds.yaml": "test-data", + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "openstack-config-secret", + Namespace: namespace, + }, map[string][]byte{ + "secure.yaml": []byte("test-data"), + })) + + DeferCleanup(k8sClient.Delete, ctx, th.CreateSecret(types.NamespacedName{ + Name: "fencing-secret", + Namespace: namespace, + }, map[string][]byte{ + "fencing.yaml": []byte("test-data"), + })) + }) + + It("should mark TLSInputReady as True", func() { + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.TLSInputReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should become fully ready when the deployment is ready", func() { + th.SimulateDeploymentReplicaReady(instanceHaName) + + th.ExpectCondition( + instanceHaName, + ConditionGetterFunc(InstanceHaConditionGetter), + condition.ReadyCondition, + corev1.ConditionTrue, + ) + }) + + It("should mount metrics TLS volumes and set env vars in the deployment", func() { + Eventually(func(g Gomega) { + dep := &appsv1.Deployment{} + g.Expect(k8sClient.Get(ctx, instanceHaName, dep)).Should(Succeed()) + + volumes := dep.Spec.Template.Spec.Volumes + var volumeNames []string + var found bool + for _, v := range volumes { + volumeNames = append(volumeNames, v.Name) + if v.Name == "metrics-certs-tls-certs" { + found = true + g.Expect(v.VolumeSource.Secret).ToNot(BeNil()) + g.Expect(v.VolumeSource.Secret.SecretName).To(Equal("cert-instanceha-metrics")) + break + } + } + g.Expect(found).To(BeTrue(), "metrics-certs-tls-certs volume not found in: %v", volumeNames) + + container := dep.Spec.Template.Spec.Containers[0] + + var certMountFound, keyMountFound bool + for _, vm := range container.VolumeMounts { + if vm.Name == "metrics-certs-tls-certs" && vm.MountPath == instanceha.MetricsCertPath { + certMountFound = true + } + if vm.Name == "metrics-certs-tls-certs" && vm.MountPath == instanceha.MetricsKeyPath { + keyMountFound = true + } + } + g.Expect(certMountFound).To(BeTrue(), "metrics cert volume mount not found") + g.Expect(keyMountFound).To(BeTrue(), "metrics key volume mount not found") + + var certEnvFound, keyEnvFound bool + for _, e := range container.Env { + if e.Name == "METRICS_TLS_CERT" && e.Value == instanceha.MetricsCertPath { + certEnvFound = true + } + if e.Name == "METRICS_TLS_KEY" && e.Value == instanceha.MetricsKeyPath { + keyEnvFound = true + } + } + g.Expect(certEnvFound).To(BeTrue(), "METRICS_TLS_CERT env var not found") + g.Expect(keyEnvFound).To(BeTrue(), "METRICS_TLS_KEY env var not found") + + g.Expect(container.LivenessProbe.HTTPGet.Scheme).To(Equal(corev1.URISchemeHTTPS)) + g.Expect(container.ReadinessProbe.HTTPGet.Scheme).To(Equal(corev1.URISchemeHTTPS)) + }, timeout, interval).Should(Succeed()) + }) + }) +}) diff --git a/test/functional/suite_test.go b/test/functional/suite_test.go index af99f482..824ddcaa 100644 --- a/test/functional/suite_test.go +++ b/test/functional/suite_test.go @@ -43,16 +43,19 @@ import ( topologyv1 "github.com/openstack-k8s-operators/infra-operator/apis/topology/v1beta1" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + instancehav1 "github.com/openstack-k8s-operators/infra-operator/apis/instanceha/v1beta1" memcachedv1 "github.com/openstack-k8s-operators/infra-operator/apis/memcached/v1beta1" networkv1 "github.com/openstack-k8s-operators/infra-operator/apis/network/v1beta1" rabbitmqv1 "github.com/openstack-k8s-operators/infra-operator/apis/rabbitmq/v1beta1" redisv1 "github.com/openstack-k8s-operators/infra-operator/apis/redis/v1beta1" + instanceha_ctrl "github.com/openstack-k8s-operators/infra-operator/internal/controller/instanceha" memcached_ctrl "github.com/openstack-k8s-operators/infra-operator/internal/controller/memcached" network_ctrl "github.com/openstack-k8s-operators/infra-operator/internal/controller/network" rabbitmq_ctrl "github.com/openstack-k8s-operators/infra-operator/internal/controller/rabbitmq" redis_ctrl "github.com/openstack-k8s-operators/infra-operator/internal/controller/redis" + webhookinstancehav1beta1 "github.com/openstack-k8s-operators/infra-operator/internal/webhook/instanceha/v1beta1" webhookmemcachedv1beta1 "github.com/openstack-k8s-operators/infra-operator/internal/webhook/memcached/v1beta1" webhooknetworkv1beta1 "github.com/openstack-k8s-operators/infra-operator/internal/webhook/network/v1beta1" webhookrabbitmqv1beta1 "github.com/openstack-k8s-operators/infra-operator/internal/webhook/rabbitmq/v1beta1" @@ -156,6 +159,8 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = rabbitmqv1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = instancehav1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) err = memcachedv1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) err = redisv1.AddToScheme(scheme.Scheme) @@ -215,6 +220,8 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = webhooknetworkv1beta1.SetupDNSMasqWebhookWithManager(k8sManager) Expect(err).NotTo(HaveOccurred()) + err = webhookinstancehav1beta1.SetupInstanceHaWebhookWithManager(k8sManager) + Expect(err).NotTo(HaveOccurred()) err = webhookmemcachedv1beta1.SetupMemcachedWebhookWithManager(k8sManager) Expect(err).NotTo(HaveOccurred()) err = webhookredisv1beta1.SetupRedisWebhookWithManager(k8sManager) @@ -271,6 +278,13 @@ var _ = BeforeSuite(func() { }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) + err = (&instanceha_ctrl.Reconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Kclient: kclient, + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + err = (&memcached_ctrl.Reconciler{ Client: k8sManager.GetClient(), Scheme: k8sManager.GetScheme(),