diff --git a/.ai/spec/how/project-structure.md b/.ai/spec/how/project-structure.md index 805cf531b..08c691f51 100644 --- a/.ai/spec/how/project-structure.md +++ b/.ai/spec/how/project-structure.md @@ -21,6 +21,9 @@ | `internal/controller/console/reconciler.go` | `ReconcileConsoleUIResources()`, `ReconcileConsoleUIDeploymentAndPlugin()`, `RemoveConsoleUI()` | Console UI Phase 1 + Phase 2 + cleanup | | `internal/controller/console/deployment.go` | `GenerateConsoleUIDeployment()` | Console UI deployment generation | | `internal/controller/console/assets.go` | ConsolePlugin CR generator, nginx config, service, network policy | Console UI resource generation | +| `internal/controller/alertsadapter/reconciler.go` | `ReconcileAlertsAdapterResources()`, `ReconcileAlertsAdapterDeployment()`, `RemoveAlertsAdapter()`, `RestartAlertsAdapter()` | Alerts adapter Phase 1 + Phase 2 + operand teardown (disable/finalizer) + rolling restart | +| `internal/controller/alertsadapter/deployment.go` | `GenerateDeployment()` | Alerts adapter deployment generation | +| `internal/controller/alertsadapter/assets.go` | SA, ClusterRole, ClusterRoleBinding, monitoring RoleBinding, NetworkPolicy generators | Alerts adapter resource generation | | `internal/controller/reconciler/interface.go` | `Reconciler` interface | Dependency injection interface for component packages | | `internal/controller/utils/constants.go` | ~200 constants | Resource names, ports, paths, annotation keys, defaults | | `internal/controller/utils/errors.go` | ~80 error message constants | Structured error messages for all operations | @@ -69,10 +72,14 @@ OLSConfigReconciler.Reconcile() +-- console.ReconcileConsoleUIResources() +-- postgres.ReconcilePostgresResources() +-- appserver.ReconcileAppServerResources() + +-- alertsadapter.ReconcileAlertsAdapterResources() + (opt-in via configMapRef; RemoveAlertsAdapter() when disabled; no ConfigMap validation; + mount at /etc/alerts-adapter when CM exists) 6. reconcileDeploymentsAndStatus() -- Phase 2: Deployments, Services, TLS certs, status +-- console.ReconcileConsoleUIDeploymentAndPlugin() +-- postgres.ReconcilePostgresDeployment() +-- appserver.ReconcileAppServerDeployment() + +-- alertsadapter.ReconcileAlertsAdapterDeployment() # when configMapRef set +-- checkDeploymentStatus() per deployment -> build newStatus +-- UpdateStatusCondition() ``` @@ -90,14 +97,14 @@ External secret/configmap changes -> Match against SystemResources list (by name+namespace) -> OR match against WatcherAnnotationKey annotation -> Resolve "ACTIVE_BACKEND" to appserver deployment name - -> Call RestartAppServer() / RestartPostgres() / RestartConsoleUI() + -> Call RestartAppServer() / RestartPostgres() / RestartConsoleUI() / RestartAlertsAdapter() -> Set force-reload annotation with current timestamp ``` ## Key Abstractions ### Image Management -Default images are stored in a `defaultImages` map in `cmd/main.go` keyed by logical name (e.g., `"lightspeed-service"`, `"postgres-image"`, `"console-plugin"`). Default values come from `internal/relatedimages/` which reads `related_images.json` at build time. Command-line flags override individual images. The map is passed to the reconciler via `OLSConfigReconcilerOptions` as individual named fields (e.g., `LightspeedServiceImage`, `ConsoleUIImage`). +Default images are stored in a `defaultImages` map in `cmd/main.go` keyed by logical name (e.g., `"lightspeed-service"`, `"postgres-image"`, `"console-plugin"`, `"alerts-adapter"`). Default values come from `internal/relatedimages/` which reads `related_images.json` at build time. Command-line flags override individual images. The map is passed to the reconciler via `OLSConfigReconcilerOptions` as individual named fields (e.g., `LightspeedServiceImage`, `ConsoleUIImage`, `AlertsAdapterImage`). ### WatcherConfig Declarative configuration for external resource watching. Contains: @@ -108,7 +115,7 @@ Declarative configuration for external resource watching. Contains: The special deployment name `"ACTIVE_BACKEND"` resolves to the AppServer deployment name (`lightspeed-app-server`). ### Component Package Pattern -Each component (appserver, postgres, console) follows the same package structure: +Each component (appserver, postgres, console, alertsadapter) follows the same package structure: - `reconciler.go`: Phase 1 (resources) and Phase 2 (deployment) entry points - `deployment.go`: Deployment spec generation and update detection - `assets.go` and/or `config.go`: Resource and config generation @@ -117,17 +124,18 @@ The packages receive `reconciler.Reconciler` interface, never import the control ### Reconciler Interface (`internal/controller/reconciler/interface.go`) Embeds `client.Client` and adds getter methods for: - `GetScheme()`, `GetLogger()`, `GetNamespace()` -- Image getters: `GetAppServerImage()`, `GetPostgresImage()`, `GetConsoleUIImage()`, `GetOpenShiftMCPServerImage()`, `GetDataverseExporterImage()` +- Image getters: `GetAppServerImage()`, `GetPostgresImage()`, `GetConsoleUIImage()`, `GetAlertsAdapterImage()`, `GetOpenShiftMCPServerImage()`, `GetDataverseExporterImage()` - Version getters: `GetOpenShiftMajor()`, `GetOpenshiftMinor()` - Config getters: `IsPrometheusAvailable()`, `GetWatcherConfig()` ### Finalizer Pattern The OLSConfig CR uses finalizer `ols.openshift.io/finalizer` (defined in `utils.OLSConfigFinalizer`). On deletion: 1. Remove Console UI (deactivate plugin, delete ConsolePlugin CR) -2. List all owned resources via owner references -3. Explicitly delete owned resources -4. Wait up to 3 minutes for deletion (poll every 5 seconds) -5. Remove finalizer (proceeds even if cleanup times out) +2. Remove alerts adapter operand resources (`alertsadapter.RemoveAlertsAdapter()`: deployment, namespaced RBAC, SA, NetworkPolicy, monitoring RoleBinding; proposals ClusterRole/ClusterRoleBinding when the platform permits delete) +3. List all owned resources via owner references +4. Explicitly delete owned resources +5. Wait up to 3 minutes for deletion (poll every 5 seconds) +6. Remove finalizer (proceeds even if cleanup times out) ## Integration Points diff --git a/.ai/spec/how/reconciliation.md b/.ai/spec/how/reconciliation.md index 088ce7dec..490ee8d7c 100644 --- a/.ai/spec/how/reconciliation.md +++ b/.ai/spec/how/reconciliation.md @@ -18,14 +18,18 @@ Reconcile(ctx, req) -> handleFinalizer() # Add/remove finalizer, run cleanup -> reconcileOperatorResources() # ServiceMonitor, NetworkPolicy (operator-level) -> annotateExternalResources() # Validate secrets, annotate for watching - -> reconcileIndependentResources() # Phase 1: console, postgres, backend resources + -> reconcileIndependentResources() # Phase 1: console, postgres, backend, alerts adapter resources | |-- console.ReconcileConsoleUIResources() | |-- postgres.ReconcilePostgresResources() - | +-- appserver.ReconcileAppServerResources() + | |-- appserver.ReconcileAppServerResources() + | +-- alertsadapter.ReconcileAlertsAdapterResources() + | (opt-in via configMapRef; RemoveAlertsAdapter() when disabled; no ConfigMap validation; + | mount at /etc/alerts-adapter when CM exists) -> reconcileDeploymentsAndStatus() # Phase 2: deployments + status update |-- console.ReconcileConsoleUIDeploymentAndPlugin() |-- postgres.ReconcilePostgresDeployment() |-- appserver.ReconcileAppServerDeployment() + |-- alertsadapter.ReconcileAlertsAdapterDeployment() # when configMapRef set |-- checkDeploymentStatus() for each # Collect diagnostics +-- UpdateStatusCondition() # Single status update ``` @@ -33,7 +37,7 @@ Reconcile(ctx, req) ## Key Abstractions ### Reconciler Interface -The `reconciler.Reconciler` interface breaks the circular dependency between the main controller and component packages. Component packages (appserver, postgres, console) receive this interface instead of importing the controller package directly. It embeds `client.Client` and adds getter methods for images, namespace, and OpenShift version. +The `reconciler.Reconciler` interface breaks the circular dependency between the main controller and component packages. Component packages (appserver, postgres, console, alertsadapter) receive this interface instead of importing the controller package directly. It embeds `client.Client` and adds getter methods for images, namespace, and OpenShift version. ### ReconcileSteps Pattern Both phases use a slice of `ReconcileSteps` structs, each containing a Name, reconcile function, and (for Phase 2) a ConditionType and Deployment name. Phase 1 iterates with continue-on-error; Phase 2 iterates but tracks all conditions and diagnostics. @@ -44,7 +48,7 @@ Two ownership models: 2. **External resources**: Watches() with custom predicates. Annotation-based filtering. Secret/ConfigMap handlers compare data and trigger deployment restarts. ### Finalizer Cleanup -The `finalizeOLSConfig()` method uses `listOwnedResources()` which queries every resource type by owner reference UID (not labels). This is more reliable than label-based cleanup. The wait loop polls with a fixed interval and timeout, using `wait.PollUntilContextTimeout`. +The `finalizeOLSConfig()` method removes Console UI, deletes alerts adapter operand resources via `alertsadapter.RemoveAlertsAdapter()` (deployment, namespaced RBAC, SA, NetworkPolicy, cross-namespace monitoring RoleBinding; proposals ClusterRole/ClusterRoleBinding when permitted—may remain on managed OpenShift if admission webhook blocks delete), then uses `listOwnedResources()` which queries every resource type by owner reference UID (not labels). This is more reliable than label-based cleanup. The wait loop polls with a fixed interval and timeout, using `wait.PollUntilContextTimeout`. ### Status Update Mechanics `UpdateStatusCondition()` uses `retry.RetryOnConflict` with `client.MergeFrom` patch. It preserves `LastTransitionTime` for conditions whose status hasn't changed. It re-fetches the CR before each update attempt to get the latest ResourceVersion. diff --git a/.ai/spec/what/bundle-composition.md b/.ai/spec/what/bundle-composition.md index e6eabdf34..effe07116 100644 --- a/.ai/spec/what/bundle-composition.md +++ b/.ai/spec/what/bundle-composition.md @@ -36,8 +36,8 @@ The lightspeed-operator OLM bundle installs both the lightspeed-operator control ### Agentic Operand Deployment -16. [PLANNED: OLS-3236] The lightspeed-operator deploys the agentic alerts adapter and the agentic console plugin as fully reconciled operands, with Phase 1/2 reconciliation, status conditions, health monitoring, and finalizer cleanup. The agentic-operator does not deploy these operands. -17. [PLANNED: OLS-3236] Agentic operand images default to `:main` tags until Konflux onboarding provides SHA-pinned productized images. CLI flags (`--alerts-adapter-image`, `--agentic-console-image`) on the lightspeed-operator deployment override the defaults. +16. The lightspeed-operator reconciles the agentic alerts adapter as a fully managed operand (OLS-3348, opt-in via `spec.ols.deployment.alertsAdapter.configMapRef`): Phase 1/2 reconciliation when enabled, `AlertsAdapterReady` status condition (`NotConfigured` when disabled), health monitoring, operand teardown on disable, ConfigMap watcher restarts, and finalizer cleanup via `RemoveAlertsAdapter()`. The agentic console plugin remains [PLANNED: OLS-3236]. +17. Agentic operand images default to `:main` tags until Konflux onboarding provides SHA-pinned productized images. The `--alerts-adapter-image` flag is implemented on the lightspeed-operator binary; wiring it into the CSV deployment spec is [PLANNED: OLS-3236]. The `--agentic-console-image` flag is [PLANNED: OLS-3236]. ## Configuration Surface @@ -48,7 +48,7 @@ The lightspeed-operator OLM bundle installs both the lightspeed-operator control | Agentic controller startup flags | CSV deployment spec args | Operand image overrides for the agentic controller | | Agentic controller `--sandbox-mode` | CSV deployment spec args | `bare-pod` (default) or `sandbox-claim` — selects sandbox provisioning strategy | | Agentic controller `--agentic-sandbox-image` | CSV deployment spec args | [PLANNED: OLS-3236] Sandbox container image (default: `:main` tag, overridable) | -| Lightspeed controller `--alerts-adapter-image` | CSV deployment spec args | [PLANNED: OLS-3236] Alerts adapter container image (default: `:main` tag) | +| Lightspeed controller `--alerts-adapter-image` | `cmd/main.go` flag (implemented); CSV deployment spec args [PLANNED: OLS-3236] | Alerts adapter container image (default: Konflux `:main` tag) | | Lightspeed controller `--agentic-console-image` | CSV deployment spec args | [PLANNED: OLS-3236] Agentic console plugin container image (default: `:main` tag) | ## Constraints @@ -61,4 +61,4 @@ The lightspeed-operator OLM bundle installs both the lightspeed-operator control | Ticket | Summary | |---|---| -| OLS-3236 | Migrate agentic console deployment from agentic-operator to lightspeed-operator. Add alerts-adapter as new operand. Add `--alerts-adapter-image` and `--agentic-console-image` flags to lightspeed-operator CSV deployment. Remove `--agentic-console-image` from agentic-operator CSV deployment. | +| OLS-3236 | Migrate agentic console deployment from agentic-operator to lightspeed-operator. Wire `--alerts-adapter-image` and `--agentic-console-image` into lightspeed-operator CSV deployment. Remove `--agentic-console-image` from agentic-operator CSV deployment. | diff --git a/.ai/spec/what/crd-api.md b/.ai/spec/what/crd-api.md index 2df275fdc..6c7d8a022 100644 --- a/.ai/spec/what/crd-api.md +++ b/.ai/spec/what/crd-api.md @@ -108,7 +108,9 @@ Field path (relative to `spec.ols.deployment`) | JSON key | Go type | Notes `mcpServer` | `mcpServer` | `ContainerConfig` | MCP server container. Resources only `console` | `console` | `Config` | Console container. Has replicas field but operator forces 1 `database` | `database` | `Config` | Database container. Has replicas field but operator forces 1 -`alertsAdapter` | `alertsAdapter` | `Config` | [PLANNED: OLS-3236] Agentic alerts adapter container. Replicas forced to 1 +`alertsAdapter` | `alertsAdapter` | `AlertsAdapterSpec` | Agentic alerts adapter deployment and user-managed runtime config reference. Replicas forced to 1 + +`AlertsAdapterSpec` embeds `Config` (deployment scheduling/resources) and optional `configMapRef` (`LocalObjectReference`). Setting `configMapRef` **enables** the alerts adapter operand. The referenced ConfigMap name is `configMapRef.name` (commonly `alerts-adapter-config`; see [adapter manifests](https://github.com/openshift/lightspeed-agentic-alerts-adapter/tree/main/manifests)). The operator does not create or validate ConfigMap content. When the ConfigMap exists, it is mounted at `/etc/alerts-adapter`; when absent, no config volume is mounted. The adapter reads `config.yaml` from that path and uses built-in defaults when the file is missing or invalid. `agenticConsole` | `agenticConsole` | `Config` | [PLANNED: OLS-3236] Agentic console plugin container. Replicas forced to 1 20. Replicas are only user-configurable for the API container (`spec.ols.deployment.api.replicas`). For console, database, alerts adapter, and agentic console, the operator always overrides replicas to 1 regardless of spec value. @@ -280,7 +282,7 @@ Condition types used by the operator: - `ApiReady` -- API server deployment health - `CacheReady` -- PostgreSQL cache deployment health - `ConsolePluginReady` -- Console UI plugin deployment health -- `AlertsAdapterReady` -- [PLANNED: OLS-3236] Agentic alerts adapter deployment health +- `AlertsAdapterReady` -- Agentic alerts adapter deployment health - `AgenticConsolePluginReady` -- [PLANNED: OLS-3236] Agentic console plugin deployment health - `ResourceReconciliation` -- Overall resource reconciliation status (set directly, not deployment-based) @@ -372,7 +374,8 @@ Path | Type | Default | Required | Validation | Description `spec.ols.deployment.database.nodeSelector` | `map[string]string` | -- | No | -- | Database node selector `spec.ols.deployment.database.affinity` | `*Affinity` | -- | No | -- | Database affinity `spec.ols.deployment.database.topologySpreadConstraints` | `[]TopologySpreadConstraint` | -- | No | -- | Database topology spread -`spec.ols.deployment.alertsAdapter` | `Config` | -- | No | -- | [PLANNED: OLS-3236] Alerts adapter deployment +`spec.ols.deployment.alertsAdapter` | `AlertsAdapterSpec` | -- | No | -- | Alerts adapter deployment and config reference +`spec.ols.deployment.alertsAdapter.configMapRef` | `LocalObjectReference` | (none) | No | -- | Opt-in switch and runtime config reference: ConfigMap name in operator namespace; mounted at `/etc/alerts-adapter` when present (adapter reads `config.yaml`) `spec.ols.deployment.alertsAdapter.replicas` | `*int32` | `1` | No | Min=0 | Alerts adapter replicas (operator forces 1) `spec.ols.deployment.alertsAdapter.resources` | `*ResourceRequirements` | -- | No | -- | Alerts adapter resources `spec.ols.deployment.alertsAdapter.tolerations` | `[]Toleration` | -- | No | -- | Alerts adapter tolerations diff --git a/.ai/spec/what/reconciliation.md b/.ai/spec/what/reconciliation.md index 3531be790..d2a91aa81 100644 --- a/.ai/spec/what/reconciliation.md +++ b/.ai/spec/what/reconciliation.md @@ -17,16 +17,17 @@ The operator reconciles the OLSConfig CR into Kubernetes resources through a two 8. Step 6 (Phase 2): Reconcile deployments and dependent resources -- Deployments, Services, TLS certificates, ServiceMonitors, PrometheusRules. After reconciliation, check deployment health and update CR status. ### Phase 1: Independent Resources -9. Five component groups are reconciled in Phase 1: Console UI, PostgreSQL, the application server, the agentic alerts adapter, and the agentic console plugin. +9. Four component groups are reconciled in Phase 1: Console UI, PostgreSQL, the application server, and (when enabled) the agentic alerts adapter. The agentic console plugin is [PLANNED: OLS-3236]. 10. All Phase 1 resource groups are independent and can be reconciled in any order. 11. If any Phase 1 resource fails, the operator continues reconciling the remaining resources, then reports all failures in the CR status with ResourceReconciliation conditions. -11a. Alerts adapter Phase 1 resources: ServiceAccount, ClusterRole (`agentic.openshift.io/proposals`: create, list, get), ClusterRoleBinding, RoleBinding in `openshift-monitoring` (binds SA to `monitoring-alertmanager-view`), NetworkPolicy. -11b. Agentic console Phase 1 resources: ServiceAccount, ConfigMap (nginx.conf), NetworkPolicy. +11a. Alerts adapter (OLS-3348) is **opt-in** via `spec.ols.deployment.alertsAdapter.configMapRef`. When unset, `ReconcileAlertsAdapterResources()` calls `RemoveAlertsAdapter()` to delete operator-managed operand resources (deployment, SA, namespaced RBAC, NetworkPolicy, monitoring RoleBinding; proposals ClusterRole/ClusterRoleBinding when the platform allows delete) and Phase 2 is skipped with `AlertsAdapterReady=True`, `Reason=NotConfigured`. +11b. When `configMapRef` is set, Phase 1 reconciles: ServiceAccount, ClusterRole (`agentic.openshift.io/proposals`: create, list, get), ClusterRoleBinding, legacy config Role/RoleBinding cleanup, RoleBinding in `openshift-monitoring` (binds SA to `monitoring-alertmanager-view`), NetworkPolicy. The operator does not create, update, or validate ConfigMap data. When the referenced ConfigMap exists, Phase 2 mounts it at `/etc/alerts-adapter`; when absent, no config volume is mounted. The adapter reads `config.yaml` and uses built-in defaults when the file is missing or invalid. +11c. Agentic console Phase 1 resources [PLANNED: OLS-3236]: ServiceAccount, ConfigMap (nginx.conf), NetworkPolicy. ### Phase 2: Deployments and Status -12. Five deployments are reconciled in Phase 2: Console UI (condition: ConsolePluginReady), PostgreSQL (condition: CacheReady), the active backend (condition: ApiReady), the agentic alerts adapter (condition: AlertsAdapterReady), and the agentic console plugin (condition: AgenticConsolePluginReady). -12a. Alerts adapter Phase 2: Deployment (1 replica, `ALERTMANAGER_URL` env hardcoded to `https://alertmanager-main.openshift-monitoring.svc:9094`). -12b. Agentic console Phase 2: Deployment (1 replica, nginx with TLS via service-ca cert), Service (port 9443, serving-cert annotation), ConsolePlugin CR, Console CR activation. +12. Deployments reconciled in Phase 2: Console UI (condition: `ConsolePluginReady`), PostgreSQL (condition: `CacheReady`), the active backend (condition: `ApiReady`), and (when `configMapRef` set) the agentic alerts adapter (condition: `AlertsAdapterReady`). The agentic console plugin (condition: `AgenticConsolePluginReady`) is [PLANNED: OLS-3236]. +12a. Alerts adapter Phase 2 (OLS-3348): Deployment (1 replica, `ALERTMANAGER_URL` env hardcoded to `https://alertmanager-main.openshift-monitoring.svc:9094`, `POD_NAMESPACE` via downward API). +12b. Agentic console Phase 2 [PLANNED: OLS-3236]: Deployment (1 replica, nginx with TLS via service-ca cert), Service (port 9443, serving-cert annotation), ConsolePlugin CR, Console CR activation. 13. After each deployment reconciliation, the operator checks the deployment's health status. 14. Deployment health has three states: Ready (Available condition true), Progressing (not yet available, no terminal failures), Failed (terminal pod failures detected). 15. Terminal pod failures include: CrashLoopBackOff, ImagePullBackOff, ErrImagePull, OOMKilled, and containers terminated with non-zero exit codes after CrashLoopBackOff. @@ -37,12 +38,12 @@ The operator reconciles the OLSConfig CR into Kubernetes resources through a two ### Finalizer Lifecycle 19. On CR creation: add finalizer, return immediately (controller-runtime auto-requeues). 20. On CR deletion: run finalizer cleanup before removing finalizer. -21. Finalizer cleanup sequence: remove Console UI from Console CR, delete ConsolePlugin CR, remove agentic console plugin from Console CR, delete agentic ConsolePlugin CR, delete alerts-adapter RoleBinding in `openshift-monitoring`, delete alerts-adapter ClusterRoleBinding, delete alerts-adapter ClusterRole, list all owned resources by owner reference, explicitly delete them, wait for deletion (polling with timeout). +21. Finalizer cleanup sequence: remove Console UI from Console CR, delete ConsolePlugin CR, remove agentic console plugin from Console CR [PLANNED: OLS-3236], delete agentic ConsolePlugin CR [PLANNED: OLS-3236], delete alerts adapter operand resources via `alertsadapter.RemoveAlertsAdapter()` (deployment, namespaced RBAC, SA, NetworkPolicy, monitoring RoleBinding; proposals ClusterRole/ClusterRoleBinding when permitted), list all owned resources by owner reference, explicitly delete them, wait for deletion (polling with timeout). 22. If cleanup times out, the finalizer is removed anyway to prevent the CR from being stuck in Terminating state. 23. Console UI and agentic component removal errors during finalization are logged but do not block finalization. ### Status Conditions -24. The operator sets these condition types: ApiReady, CacheReady, ConsolePluginReady, AlertsAdapterReady, AgenticConsolePluginReady, ResourceReconciliation. +24. The operator sets these condition types: `ApiReady`, `CacheReady`, `ConsolePluginReady`, `AlertsAdapterReady` (`NotConfigured` when `configMapRef` unset; does not block `OverallStatus=Ready`), `AgenticConsolePluginReady` [PLANNED: OLS-3236], `ResourceReconciliation`. 25. OverallStatus is Ready only when all deployment conditions are True. 26. OverallStatus is NotReady if any condition is False. 27. When deployments are not ready, diagnosticInfo is populated with per-pod failure details including container name, reason, message, exit code, and diagnostic type. @@ -67,4 +68,4 @@ Reconciliation behavior is not directly user-configurable. It is driven by the O | Ticket | Summary | |---|---| -| OLS-3236 | [PLANNED] Add alerts-adapter and agentic-console as reconciled operands with Phase 1/2 steps, status conditions (AlertsAdapterReady, AgenticConsolePluginReady), and finalizer cleanup for cross-namespace resources | +| OLS-3236 | [PLANNED] Add agentic-console as a reconciled operand with Phase 1/2 steps, `AgenticConsolePluginReady` status condition, and finalizer cleanup for Console CR / ConsolePlugin CR | diff --git a/.ai/spec/what/resource-lifecycle.md b/.ai/spec/what/resource-lifecycle.md index 734ee91b4..4e91d855e 100644 --- a/.ai/spec/what/resource-lifecycle.md +++ b/.ai/spec/what/resource-lifecycle.md @@ -18,7 +18,7 @@ The operator manages two categories of Kubernetes resources: owned resources (cr 7. System secrets: the telemetry pull secret (`openshift-config/pull-secret`), console UI service cert (`lightspeed-console-plugin-cert`), PostgreSQL certs (`lightspeed-postgres-certs`). 8. System configmaps: the OpenShift root CA (`kube-root-ca.crt`), the service CA bundle (`openshift-service-ca.crt`). 9. User-provided secrets: LLM provider credential secrets (`spec.llm.providers[].credentialsSecretRef`), custom TLS secret (`spec.ols.tlsConfig.keyCertSecretRef`), MCP server header secrets (`spec.mcpServers[].headers[].valueFrom.secretRef`). -10. User-provided configmaps: additional CA ConfigMap (`spec.ols.additionalCAConfigMapRef`), proxy CA ConfigMap (`spec.ols.proxyConfig.proxyCACertificate`). +10. User-provided configmaps: additional CA ConfigMap (`spec.ols.additionalCAConfigMapRef`), proxy CA ConfigMap (`spec.ols.proxyConfig.proxyCACertificate`), alerts adapter runtime config (`spec.ols.deployment.alertsAdapter.configMapRef`, when set). ### Annotation-Based Watching @@ -31,7 +31,7 @@ The operator manages two categories of Kubernetes resources: owned resources (cr 14. When a watched secret's `.data` changes (compared via `apiequality.Semantic.DeepEqual`), the `SecretUpdateHandler` triggers restarts of affected deployments directly, without triggering a full reconciliation. 15. When a watched configmap's `.data` or `.binaryData` changes, the `ConfigMapUpdateHandler` triggers restarts of affected deployments directly. 16. Each external resource has a list of affected deployments configured in `WatcherConfig`. The special value `ACTIVE_BACKEND` resolves to the application server deployment name (`lightspeed-app-server`). -17. Restarts are triggered by updating the `ols.openshift.io/force-reload` annotation on the deployment's pod template with the current timestamp (RFC3339Nano), causing a rolling update. +17. Restarts are triggered by updating the `ols.openshift.io/force-reload` annotation on the deployment's pod template with the current timestamp (RFC3339Nano), causing a rolling update. Alerts adapter runtime ConfigMap changes restart `lightspeed-agentic-alerts-adapter` via `RestartAlertsAdapter()`. 18. TLS secrets are mapped to affect both `lightspeed-console-plugin` and `ACTIVE_BACKEND` deployments. All other user-provided secrets default to `ACTIVE_BACKEND` only. ### Validation @@ -49,6 +49,7 @@ Resource lifecycle behavior is not directly user-configurable. External resource | `spec.ols.tlsConfig.keyCertSecretRef` | Custom TLS secret | | `spec.ols.additionalCAConfigMapRef` | Additional CA ConfigMap | | `spec.ols.proxyConfig.proxyCACertificate` | Proxy CA ConfigMap | +| `spec.ols.deployment.alertsAdapter.configMapRef` | Alerts adapter runtime ConfigMap (restarts `lightspeed-agentic-alerts-adapter` on data change) | | `spec.mcpServers[].headers[].valueFrom.secretRef` | MCP header secret | ## Constraints diff --git a/AGENTS.md b/AGENTS.md index 5db4639d3..90a1efd8c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,11 +40,20 @@ When updating the operator version for a release, you **MUST** update version nu OLSConfigReconciler.Reconcile() → ├── [Operator-level resources: ServiceMonitor, NetworkPolicy] ├── [Finalizer logic: handle CR deletion if DeletionTimestamp set] -├── reconcileLLMSecrets() -├── reconcileConsoleUI() -├── reconcilePostgresServer() -└── reconcileAppServer() (application server via `appserver` package) - └── [12+ sub-tasks via ReconcileTask pattern] +├── annotateExternalResources() (validate LLM/TLS credentials) +├── Phase 1 — reconcileIndependentResources() (continue-on-error) +│ ├── console.ReconcileConsoleUIResources() +│ ├── postgres.ReconcilePostgresResources() +│ ├── appserver.ReconcileAppServerResources() +│ └── alertsadapter.ReconcileAlertsAdapterResources() +│ (opt-in via configMapRef; RemoveAlertsAdapter() when unset; no ConfigMap validation— +│ mount at /etc/alerts-adapter when CM exists, adapter reads config.yaml) +└── Phase 2 — reconcileDeploymentsAndStatus() + ├── console.ReconcileConsoleUIDeploymentAndPlugin() → ConsolePluginReady + ├── postgres.ReconcilePostgresDeployment() → CacheReady + ├── appserver.ReconcileAppServerDeployment() → ApiReady + └── alertsadapter.ReconcileAlertsAdapterDeployment() → AlertsAdapterReady + (only when configMapRef set; else AlertsAdapterReady=True, Reason=NotConfigured) ``` ## Code Conventions @@ -76,6 +85,7 @@ make test-e2e # E2E tests (requires cluster) - `internal/controller/appserver/` - App server - `internal/controller/postgres/` - PostgreSQL - `internal/controller/console/` - Console UI +- `internal/controller/alertsadapter/` - Agentic alerts adapter (opt-in via `configMapRef`; mounts user CM at `/etc/alerts-adapter` when present; adapter validates config) - `internal/controller/watchers/` - External resource watching - `internal/controller/utils/` - Shared utilities, constants - `constants.go` - Includes `OLSConfigFinalizer` constant @@ -99,8 +109,8 @@ make test-e2e # E2E tests (requires cluster) ### Adding New Reconciliation Step - **App Server**: Add to `ReconcileTask` slice in `internal/controller/appserver/reconciler.go` -- **Top-Level**: Create package under `internal/controller//`, add to `olsconfig_controller.go` -- Add error constants to `internal/controller/utils/utils.go` +- **Top-Level**: Create package under `internal/controller//`, add to `olsconfig_controller.go` Phase 1 (`reconcileIndependentResources`) and Phase 2 (`reconcileDeploymentsAndStatus`) +- Add error constants to `internal/controller/utils/errors.go` - Write unit tests in co-located `*_test.go` files ## AI Assistant Skills diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 1d8564578..51497a5d9 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -4,7 +4,7 @@ This document describes the internal architecture of the OpenShift Lightspeed Op ## Overview -The operator follows a modular, component-based architecture where each major component (application server, PostgreSQL, Console UI) is managed by its own dedicated package with independent reconciliation logic. +The operator follows a modular, component-based architecture where each major component (application server, PostgreSQL, Console UI, alerts adapter) is managed by its own dedicated package with independent reconciliation logic. ## Key Design Decisions @@ -84,7 +84,21 @@ Provides clean contract between main controller and component packages: **Purpose:** Manages OpenShift Console plugin for web UI integration -**Entry Points:** `ReconcileConsoleUI()` (setup), `RemoveConsoleUI()` (cleanup when disabled) +**Entry Points:** `ReconcileConsoleUIResources()`, `ReconcileConsoleUIDeploymentAndPlugin()`, `RemoveConsoleUI()` (cleanup on deletion) + +### Alerts Adapter Package (`internal/controller/alertsadapter`) + +**Purpose:** Manages the agentic alerts adapter that polls Alertmanager and creates `Proposal` CRs for firing alerts. + +**Opt-in:** Enabled only when `spec.ols.deployment.alertsAdapter.configMapRef` is set (non-empty name). When unset, Phase 1 calls `RemoveAlertsAdapter()` to tear down operand resources and Phase 2 sets `AlertsAdapterReady=True` with `Reason=NotConfigured`. + +**Entry Points:** `ReconcileAlertsAdapterResources()` (Phase 1), `ReconcileAlertsAdapterDeployment()` (Phase 2), `RemoveAlertsAdapter()` (operand teardown on disable and during finalization), `RestartAlertsAdapter()` (rolling restart on deployment spec or runtime ConfigMap changes). + +**Phase 1 resources (when enabled):** ServiceAccount, ClusterRole/ClusterRoleBinding for `agentic.openshift.io/proposals`, legacy config Role/RoleBinding cleanup (removed from reconcile; deleted if still present), RoleBinding in `openshift-monitoring` to `monitoring-alertmanager-view`, NetworkPolicy. The operator does not create, update, or validate user ConfigMap data. + +**Runtime config:** User creates the ConfigMap (see [adapter manifests](https://github.com/openshift/lightspeed-agentic-alerts-adapter/tree/main/manifests)). When the referenced ConfigMap exists, it is mounted read-only at `/etc/alerts-adapter`; when absent, no config volume is mounted. The adapter reads `config.yaml` from that path and uses built-in defaults when the file is missing or invalid. ConfigMap data changes trigger a deployment restart via the external ConfigMap watcher (`RestartAlertsAdapter`). + +**Phase 2 resources:** Deployment (`lightspeed-agentic-alerts-adapter`, 1 replica) with `ALERTMANAGER_URL` and `POD_NAMESPACE` env vars, and conditional ConfigMap volume mount as above. Image from `--alerts-adapter-image` / `GetAlertsAdapterImage()`. ### Utilities Package (`internal/controller/utils`) @@ -121,12 +135,17 @@ High-level reconciliation sequence: 2. Check if CR is being deleted → run finalizer cleanup if needed 3. Add finalizer if not present 4. Validate OLSConfig CR exists -5. Reconcile LLM Secrets (validate credentials) -6. Reconcile Components: - - Console UI (if enabled) - - PostgreSQL (if conversation cache enabled) - - Application server (`appserver` package) -7. Update Status Conditions based on deployment readiness +5. Annotate external resources and validate LLM/TLS credentials +6. Phase 1 — reconcile independent resources (continue-on-error): + - Console UI (ServiceAccount, ConfigMap, NetworkPolicy, …) + - PostgreSQL (ConfigMap, Secrets, NetworkPolicy, ServiceAccount, …) + - Application server (ConfigMap, RBAC, NetworkPolicy, ServiceAccount, …) + - Alerts adapter (when `configMapRef` set: SA, ClusterRole/CRB, legacy config RB cleanup, monitoring RoleBinding, NetworkPolicy; when unset: `RemoveAlertsAdapter()`) +7. Phase 2 — reconcile deployments and update status (fail-fast per step): + - Console UI → `ConsolePluginReady` + - PostgreSQL → `CacheReady` + - Application server → `ApiReady` + - Alerts adapter → `AlertsAdapterReady` (or `NotConfigured` when `configMapRef` unset) ``` ### Finalizer Pattern @@ -135,6 +154,7 @@ The operator uses a finalizer (`ols.openshift.io/finalizer`) to ensure proper cl **Why Needed:** - **Console UI cleanup**: ConsolePlugin is cluster-scoped and not cascade-deleted by owner references +- **Alerts adapter cleanup**: RoleBinding in `openshift-monitoring` is outside the operator namespace; `RemoveAlertsAdapter()` also deletes deployment, namespaced RBAC, SA, and NetworkPolicy when the operand is disabled or during finalization. Proposals ClusterRole/ClusterRoleBinding deletion may be blocked on managed OpenShift clusters (admission webhook); the operator logs and continues. - **PVC cleanup**: PersistentVolumeClaims can block deletion if not properly released - **Race condition prevention**: Ensures complete cleanup before CR can be recreated (important for tests and sequential deployments) @@ -161,8 +181,10 @@ if !olsconfig.DeletionTimestamp.IsZero() { **Cleanup Sequence** (`finalizeOLSConfig`): 1. **Remove Console UI**: Deactivate plugin from Console CR, delete ConsolePlugin CR -2. **Wait for owned resources**: Poll for up to 3 minutes until deployments, services, PVCs are deleted (cascade deletion) -3. **Remove finalizer**: Allows Kubernetes to remove CR from etcd +2. **Remove alerts adapter operand**: Delete deployment, namespaced RBAC, SA, NetworkPolicy, monitoring RoleBinding, and attempt proposals ClusterRoleBinding/ClusterRole deletion (`alertsadapter.RemoveAlertsAdapter()`). ClusterRoleBinding deletion may be blocked on managed OpenShift; remaining cluster RBAC is harmless when the operand is disabled. +3. **List and delete owned resources**: Explicitly delete children tracked by owner references +4. **Wait for owned resources**: Poll for up to 3 minutes until deployments, services, PVCs are deleted +5. **Remove finalizer**: Allows Kubernetes to remove CR from etcd **Error Handling:** - Cleanup errors are logged but don't block finalizer removal diff --git a/Makefile b/Makefile index 7943debc2..a6322aa63 100644 --- a/Makefile +++ b/Makefile @@ -252,9 +252,9 @@ dev-teardown: uninstall ## Teardown local development environment (removes RBAC, @echo "✅ Development environment cleaned up." .PHONY: run -run: dev-setup manifests generate fmt vet ## Run a controller from your host (auto-setup RBAC if needed). +run: dev-setup manifests generate fmt vet ## Run a controller from your host (auto-setup RBAC if needed). Optional: make run ARGS="--alerts-adapter-image=..." @echo "🔧 Running controller locally - using default images from constants" - LOCAL_DEV_MODE=true go run ./cmd/main.go + LOCAL_DEV_MODE=true go run ./cmd/main.go $(ARGS) # If you wish built the manager image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it. diff --git a/api/v1alpha1/olsconfig_types.go b/api/v1alpha1/olsconfig_types.go index 936b27434..55f780737 100644 --- a/api/v1alpha1/olsconfig_types.go +++ b/api/v1alpha1/olsconfig_types.go @@ -78,7 +78,7 @@ type OLSConfigStatus struct { // PodDiagnostic describes a pod-level issue type PodDiagnostic struct { // FailedComponent identifies which component this diagnostic relates to, - // using the same type as the Conditions field (e.g., "ApiReady", "CacheReady") + // using the same type as the Conditions field (e.g., "ApiReady", "CacheReady", "AlertsAdapterReady") // This allows easy correlation between condition status and diagnostic details. FailedComponent string `json:"failedComponent"` @@ -348,12 +348,28 @@ type DeploymentConfig struct { // Database container settings. // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Database Deployment" DatabaseContainer Config `json:"database,omitempty"` + // Alerts adapter deployment and runtime config reference. + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Alerts Adapter" + AlertsAdapter AlertsAdapterSpec `json:"alertsAdapter,omitempty"` +} + +// AlertsAdapterSpec defines deployment settings and a reference to user-managed adapter runtime config. +type AlertsAdapterSpec struct { + Config `json:",inline"` + // ConfigMapRef enables the alerts adapter when set and references a user-managed ConfigMap + // in the operator namespace. When unset, reconciliation is skipped and managed operand + // resources are removed. The operator does not create or validate ConfigMap data. When the + // referenced ConfigMap exists, it is mounted read-only at /etc/alerts-adapter; when absent, + // no config volume is mounted. The adapter reads config.yaml from that path and uses + // built-in defaults when the file is missing or invalid. + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Alerts Adapter ConfigMap Reference" + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` } // Config defines pod configuration using standard Kubernetes types type Config struct { // Defines the number of desired OLS pods. Default: "1" - // Note: Replicas can only be changed for APIContainer. For PostgreSQL and Console containers, + // Note: Replicas can only be changed for APIContainer. For PostgreSQL, Console, and Alerts Adapter containers, // the number of replicas will always be set to 1. // +kubebuilder:default=1 // +kubebuilder:validation:Minimum=0 diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index a49d08d50..cbd2a813a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -21,11 +21,33 @@ limitations under the License. package v1alpha1 import ( + configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AlertsAdapterSpec) DeepCopyInto(out *AlertsAdapterSpec) { + *out = *in + in.Config.DeepCopyInto(&out.Config) + if in.ConfigMapRef != nil { + in, out := &in.ConfigMapRef, &out.ConfigMapRef + *out = new(corev1.LocalObjectReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertsAdapterSpec. +func (in *AlertsAdapterSpec) DeepCopy() *AlertsAdapterSpec { + if in == nil { + return nil + } + out := new(AlertsAdapterSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Config) DeepCopyInto(out *Config) { *out = *in @@ -36,7 +58,8 @@ func (in *Config) DeepCopyInto(out *Config) { } if in.Resources != nil { in, out := &in.Resources, &out.Resources - *out = (*in).DeepCopy() + *out = new(corev1.ResourceRequirements) + (*in).DeepCopyInto(*out) } if in.Tolerations != nil { in, out := &in.Tolerations, &out.Tolerations @@ -54,7 +77,8 @@ func (in *Config) DeepCopyInto(out *Config) { } if in.Affinity != nil { in, out := &in.Affinity, &out.Affinity - *out = (*in).DeepCopy() + *out = new(corev1.Affinity) + (*in).DeepCopyInto(*out) } if in.TopologySpreadConstraints != nil { in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints @@ -80,7 +104,8 @@ func (in *ContainerConfig) DeepCopyInto(out *ContainerConfig) { *out = *in if in.Resources != nil { in, out := &in.Resources, &out.Resources - *out = (*in).DeepCopy() + *out = new(corev1.ResourceRequirements) + (*in).DeepCopyInto(*out) } } @@ -118,6 +143,7 @@ func (in *DeploymentConfig) DeepCopyInto(out *DeploymentConfig) { in.MCPServerContainer.DeepCopyInto(&out.MCPServerContainer) in.ConsoleContainer.DeepCopyInto(&out.ConsoleContainer) in.DatabaseContainer.DeepCopyInto(&out.DatabaseContainer) + in.AlertsAdapter.DeepCopyInto(&out.AlertsAdapter) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentConfig. @@ -188,7 +214,8 @@ func (in *MCPHeaderValueSource) DeepCopyInto(out *MCPHeaderValueSource) { *out = *in if in.SecretRef != nil { in, out := &in.SecretRef, &out.SecretRef - *out = (*in).DeepCopy() + *out = new(corev1.LocalObjectReference) + **out = **in } } @@ -421,11 +448,13 @@ func (in *OLSSpec) DeepCopyInto(out *OLSSpec) { } if in.AdditionalCAConfigMapRef != nil { in, out := &in.AdditionalCAConfigMapRef, &out.AdditionalCAConfigMapRef - *out = (*in).DeepCopy() + *out = new(corev1.LocalObjectReference) + **out = **in } if in.TLSSecurityProfile != nil { in, out := &in.TLSSecurityProfile, &out.TLSSecurityProfile - *out = (*in).DeepCopy() + *out = new(configv1.TLSSecurityProfile) + (*in).DeepCopyInto(*out) } if in.IntrospectionEnabled != nil { in, out := &in.IntrospectionEnabled, &out.IntrospectionEnabled @@ -460,9 +489,7 @@ func (in *OLSSpec) DeepCopyInto(out *OLSSpec) { if in.ImagePullSecrets != nil { in, out := &in.ImagePullSecrets, &out.ImagePullSecrets *out = make([]corev1.LocalObjectReference, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } + copy(*out, *in) } if in.ToolFilteringConfig != nil { in, out := &in.ToolFilteringConfig, &out.ToolFilteringConfig @@ -525,7 +552,7 @@ func (in *PostgresSpec) DeepCopy() *PostgresSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProviderSpec) DeepCopyInto(out *ProviderSpec) { *out = *in - in.CredentialsSecretRef.DeepCopyInto(&out.CredentialsSecretRef) + out.CredentialsSecretRef = in.CredentialsSecretRef if in.Models != nil { in, out := &in.Models, &out.Models *out = make([]ModelSpec, len(*in)) @@ -543,7 +570,8 @@ func (in *ProviderSpec) DeepCopyInto(out *ProviderSpec) { } if in.TLSSecurityProfile != nil { in, out := &in.TLSSecurityProfile, &out.TLSSecurityProfile - *out = (*in).DeepCopy() + *out = new(configv1.TLSSecurityProfile) + (*in).DeepCopyInto(*out) } } @@ -560,7 +588,7 @@ func (in *ProviderSpec) DeepCopy() *ProviderSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProxyCACertConfigMapRef) DeepCopyInto(out *ProxyCACertConfigMapRef) { *out = *in - in.LocalObjectReference.DeepCopyInto(&out.LocalObjectReference) + out.LocalObjectReference = in.LocalObjectReference } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxyCACertConfigMapRef. @@ -662,7 +690,7 @@ func (in *Storage) DeepCopy() *Storage { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TLSConfig) DeepCopyInto(out *TLSConfig) { *out = *in - in.KeyCertSecretRef.DeepCopyInto(&out.KeyCertSecretRef) + out.KeyCertSecretRef = in.KeyCertSecretRef } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TLSConfig. diff --git a/cmd/main.go b/cmd/main.go index 994589c55..43bd1ca55 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -34,6 +34,7 @@ limitations under the License. // - secure-metrics-server: Enable mTLS for metrics server // - service-image: Override default lightspeed-service image // - console-image: Override default console plugin image +// - alerts-adapter-image: Override default agentic alerts adapter image // - postgres-image: Override default PostgreSQL image // - openshift-mcp-server-image: Override default MCP server image // - namespace: Operator namespace (defaults to WATCH_NAMESPACE env var or "openshift-lightspeed") @@ -63,6 +64,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" openshiftv1 "github.com/openshift/api/operator/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -95,6 +97,7 @@ var ( "lightspeed-service": utils.OLSAppServerImageDefault, "postgres-image": utils.PostgresServerImageDefault, "console-plugin": utils.ConsoleUIImageDefault, + "alerts-adapter": utils.AlertsAdapterImageDefault, "openshift-mcp-server-image": utils.OpenShiftMCPServerImageDefault, "dataverse-exporter-image": utils.DataverseExporterImageDefault, "ocp-rag-image": utils.OcpRagImageDefault, @@ -115,7 +118,7 @@ func init() { // overrideImages overrides the default images with the images provided by the user. // If an image is not provided, the default is used. -func overrideImages(serviceImage string, consoleImage string, postgresImage string, openshiftMCPServerImage string, dataverseExporterImage string, ocpRagImage string) map[string]string { +func overrideImages(serviceImage string, consoleImage string, alertsAdapterImage string, postgresImage string, openshiftMCPServerImage string, dataverseExporterImage string, ocpRagImage string) map[string]string { res := defaultImages if serviceImage != "" { res["lightspeed-service"] = serviceImage @@ -123,6 +126,9 @@ func overrideImages(serviceImage string, consoleImage string, postgresImage stri if consoleImage != "" { res["console-plugin"] = consoleImage } + if alertsAdapterImage != "" { + res["alerts-adapter"] = alertsAdapterImage + } if postgresImage != "" { res["postgres-image"] = postgresImage } @@ -162,6 +168,7 @@ func main() { var metricsClientCA string var serviceImage string var consoleImage string + var alertsAdapterImage string var namespace string var postgresImage string var openshiftMCPServerImage string @@ -179,6 +186,7 @@ func main() { flag.StringVar(&caCertPath, "ca-cert", utils.OperatorCACertPathDefault, "The path to the CA certificate file.") flag.StringVar(&serviceImage, "service-image", utils.OLSAppServerImageDefault, "The image of the lightspeed-service container.") flag.StringVar(&consoleImage, "console-image", utils.ConsoleUIImageDefault, "The image of the console-plugin container.") + flag.StringVar(&alertsAdapterImage, "alerts-adapter-image", utils.AlertsAdapterImageDefault, "The image of the agentic alerts adapter container.") flag.StringVar(&namespace, "namespace", "", "The namespace where the operator is deployed.") flag.StringVar(&postgresImage, "postgres-image", utils.PostgresServerImageDefault, "The image of the PostgreSQL server.") flag.StringVar(&openshiftMCPServerImage, "openshift-mcp-server-image", utils.OpenShiftMCPServerImageDefault, "The image of the OpenShift MCP server container.") @@ -196,7 +204,7 @@ func main() { namespace = getWatchNamespace() } - imagesMap := overrideImages(serviceImage, consoleImage, postgresImage, openshiftMCPServerImage, dataverseExporterImage, ocpRagImage) + imagesMap := overrideImages(serviceImage, consoleImage, alertsAdapterImage, postgresImage, openshiftMCPServerImage, dataverseExporterImage, ocpRagImage) setupLog.Info("Images setting loaded", "images", listImages()) setupLog.Info("Starting the operator", "metricsAddr", metricsAddr, "probeAddr", probeAddr, "certDir", certDir, "certName", certName, "keyName", keyName, "namespace", namespace) @@ -289,6 +297,12 @@ func main() { utils.TelemetryPullSecretNamespace: {}, }, }, + &rbacv1.RoleBinding{}: { + Namespaces: map[string]cache.Config{ + namespace: {}, + utils.OpenShiftMonitoringNamespace: {}, + }, + }, }, }, }) @@ -389,6 +403,7 @@ func main() { OpenShiftMajor: major, OpenshiftMinor: minor, ConsoleUIImage: imagesMap["console-plugin"], + AlertsAdapterImage: imagesMap["alerts-adapter"], LightspeedServiceImage: imagesMap["lightspeed-service"], LightspeedServicePostgresImage: imagesMap["postgres-image"], OpenShiftMCPServerImage: imagesMap["openshift-mcp-server-image"], diff --git a/config/crd/bases/ols.openshift.io_olsconfigs.yaml b/config/crd/bases/ols.openshift.io_olsconfigs.yaml index addd1fcf9..3abe3b73e 100644 --- a/config/crd/bases/ols.openshift.io_olsconfigs.yaml +++ b/config/crd/bases/ols.openshift.io_olsconfigs.yaml @@ -477,6 +477,1254 @@ spec: deployment: description: OLS deployment settings properties: + alertsAdapter: + description: Alerts adapter deployment and runtime config + reference. + properties: + affinity: + description: |- + Affinity rules (can be added without API version bump) + Uses standard corev1.Affinity + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + configMapRef: + description: |- + ConfigMapRef enables the alerts adapter when set and references a user-managed ConfigMap + in the operator namespace (data key: config.yaml). When unset, reconciliation is skipped + and managed operand resources are removed. When set but the ConfigMap does not exist, + the adapter uses built-in defaults. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + nodeSelector: + additionalProperties: + type: string + description: Node selector constraints + type: object + replicas: + default: 1 + description: |- + Defines the number of desired OLS pods. Default: "1" + Note: Replicas can only be changed for APIContainer. For PostgreSQL, Console, and Alerts Adapter containers, + the number of replicas will always be set to 1. + format: int32 + minimum: 0 + type: integer + resources: + description: |- + Resource requirements (CPU, memory) + Uses standard corev1.ResourceRequirements + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry + in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: |- + Tolerations for pod scheduling + Uses standard corev1.Toleration + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + topologySpreadConstraints: + description: |- + Topology spread constraints (can be added without API version bump) + Uses standard corev1.TopologySpreadConstraint + items: + description: TopologySpreadConstraint specifies how + to spread matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + If this value is nil, the behavior is equivalent to the Honor policy. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + If this value is nil, the behavior is equivalent to the Ignore policy. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + type: object api: description: API container settings. properties: @@ -1419,7 +2667,7 @@ spec: default: 1 description: |- Defines the number of desired OLS pods. Default: "1" - Note: Replicas can only be changed for APIContainer. For PostgreSQL and Console containers, + Note: Replicas can only be changed for APIContainer. For PostgreSQL, Console, and Alerts Adapter containers, the number of replicas will always be set to 1. format: int32 minimum: 0 @@ -2648,7 +3896,7 @@ spec: default: 1 description: |- Defines the number of desired OLS pods. Default: "1" - Note: Replicas can only be changed for APIContainer. For PostgreSQL and Console containers, + Note: Replicas can only be changed for APIContainer. For PostgreSQL, Console, and Alerts Adapter containers, the number of replicas will always be set to 1. format: int32 minimum: 0 @@ -3943,7 +5191,7 @@ spec: default: 1 description: |- Defines the number of desired OLS pods. Default: "1" - Note: Replicas can only be changed for APIContainer. For PostgreSQL and Console containers, + Note: Replicas can only be changed for APIContainer. For PostgreSQL, Console, and Alerts Adapter containers, the number of replicas will always be set to 1. format: int32 minimum: 0 @@ -4806,7 +6054,7 @@ spec: failedComponent: description: |- FailedComponent identifies which component this diagnostic relates to, - using the same type as the Conditions field (e.g., "ApiReady", "CacheReady") + using the same type as the Conditions field (e.g., "ApiReady", "CacheReady", "AlertsAdapterReady") This allows easy correlation between condition status and diagnostic details. type: string lastUpdated: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 9d7358ef0..ed12086d3 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -206,3 +206,22 @@ rules: - roles verbs: - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: manager-role + namespace: openshift-monitoring +rules: +- apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/internal/controller/alertsadapter/assets.go b/internal/controller/alertsadapter/assets.go new file mode 100644 index 000000000..da35a2d9c --- /dev/null +++ b/internal/controller/alertsadapter/assets.go @@ -0,0 +1,150 @@ +package alertsadapter + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/reconciler" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +// GenerateServiceAccount generates the alerts adapter ServiceAccount. +func GenerateServiceAccount(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (*corev1.ServiceAccount, error) { + return utils.GenerateServiceAccount(r, cr, utils.AlertsAdapterServiceAccountName) +} + +// GenerateProposalsClusterRole generates the ClusterRole granting proposal create/list/get access. +func GenerateProposalsClusterRole(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (*rbacv1.ClusterRole, error) { + role := rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterProposalsClusterRoleName, + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"agentic.openshift.io"}, + Resources: []string{"proposals"}, + Verbs: []string{"create", "list", "get"}, + }, + }, + } + + if err := controllerutil.SetControllerReference(cr, &role, r.GetScheme()); err != nil { + return nil, err + } + + return &role, nil +} + +// GenerateProposalsClusterRoleBinding binds the alerts adapter ServiceAccount to the proposals ClusterRole. +func GenerateProposalsClusterRoleBinding(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (*rbacv1.ClusterRoleBinding, error) { + rb := rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterProposalsClusterRoleBindingName, + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: utils.AlertsAdapterServiceAccountName, + Namespace: r.GetNamespace(), + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: utils.AlertsAdapterProposalsClusterRoleName, + }, + } + + if err := controllerutil.SetControllerReference(cr, &rb, r.GetScheme()); err != nil { + return nil, err + } + + return &rb, nil +} + +// GenerateAlertmanagerRoleBinding grants the adapter view access to Alertmanager in openshift-monitoring. +func GenerateAlertmanagerRoleBinding(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (*rbacv1.RoleBinding, error) { + rb := rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterAlertmanagerRoleBindingName, + Namespace: utils.OpenShiftMonitoringNamespace, + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: utils.AlertsAdapterServiceAccountName, + Namespace: r.GetNamespace(), + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "Role", + Name: utils.MonitoringAlertmanagerViewRoleName, + }, + } + + if err := controllerutil.SetControllerReference(cr, &rb, r.GetScheme()); err != nil { + return nil, err + } + + return &rb, nil +} + +// GenerateNetworkPolicy generates the NetworkPolicy restricting adapter egress and ingress. +func GenerateNetworkPolicy(r reconciler.Reconciler, cr *olsv1alpha1.OLSConfig) (*networkingv1.NetworkPolicy, error) { + np := networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterNetworkPolicyName, + Namespace: r.GetNamespace(), + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Spec: networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{}, + Egress: []networkingv1.NetworkPolicyEgressRule{}, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + }, + }, + } + + if err := controllerutil.SetControllerReference(cr, &np, r.GetScheme()); err != nil { + return nil, err + } + + return &np, nil +} + +// getUserConfigMap loads the referenced ConfigMap when alerts adapter is enabled. +// Returns (nil, nil) when the ConfigMap does not exist. +func getUserConfigMap(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) (*corev1.ConfigMap, error) { + name, ok := utils.AlertsAdapterConfigMapRef(cr) + if !ok { + return nil, fmt.Errorf("%s: alerts adapter configMapRef is not set", utils.ErrGetAlertsAdapterConfigMap) + } + + cm := &corev1.ConfigMap{} + err := r.Get(ctx, client.ObjectKey{Name: name, Namespace: r.GetNamespace()}, cm) + if err != nil { + if errors.IsNotFound(err) { + return nil, nil + } + return nil, fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterConfigMap, err) + } + + return cm, nil +} diff --git a/internal/controller/alertsadapter/assets_test.go b/internal/controller/alertsadapter/assets_test.go new file mode 100644 index 000000000..08f910390 --- /dev/null +++ b/internal/controller/alertsadapter/assets_test.go @@ -0,0 +1,133 @@ +package alertsadapter + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +func crWithAlertsAdapterConfigMapRef() *olsv1alpha1.OLSConfig { + crWithRef := cr.DeepCopy() + crWithRef.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + return crWithRef +} + +var _ = Describe("Alerts adapter assets", func() { + It("should generate the service account", func() { + sa, err := GenerateServiceAccount(testReconcilerInstance, cr) + Expect(err).NotTo(HaveOccurred()) + Expect(sa.Name).To(Equal(utils.AlertsAdapterServiceAccountName)) + Expect(sa.Namespace).To(Equal(utils.OLSNamespaceDefault)) + }) + + It("should generate the proposals ClusterRole", func() { + role, err := GenerateProposalsClusterRole(testReconcilerInstance, cr) + Expect(err).NotTo(HaveOccurred()) + Expect(role.Name).To(Equal(utils.AlertsAdapterProposalsClusterRoleName)) + Expect(role.Rules).To(ContainElement(rbacv1.PolicyRule{ + APIGroups: []string{"agentic.openshift.io"}, + Resources: []string{"proposals"}, + Verbs: []string{"create", "list", "get"}, + })) + }) + + It("should generate the Alertmanager RoleBinding in openshift-monitoring", func() { + rb, err := GenerateAlertmanagerRoleBinding(testReconcilerInstance, cr) + Expect(err).NotTo(HaveOccurred()) + Expect(rb.Name).To(Equal(utils.AlertsAdapterAlertmanagerRoleBindingName)) + Expect(rb.Namespace).To(Equal(utils.OpenShiftMonitoringNamespace)) + Expect(rb.RoleRef.Name).To(Equal(utils.MonitoringAlertmanagerViewRoleName)) + }) + + It("should generate the deployment without a config volume when configMapRef is unset", func() { + deployment, err := GenerateDeployment(testReconcilerInstance, ctx, cr) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Name).To(Equal(utils.AlertsAdapterDeploymentName)) + Expect(deployment.Spec.Template.Spec.ServiceAccountName).To(Equal(utils.AlertsAdapterServiceAccountName)) + Expect(deployment.Spec.Template.Spec.Containers).To(HaveLen(1)) + Expect(deployment.Spec.Template.Spec.Containers[0].Name).To(Equal(utils.AlertsAdapterContainerName)) + Expect(deployment.Spec.Template.Spec.Containers[0].Image).To(Equal(utils.AlertsAdapterImageDefault)) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(1)) + Expect(deployment.Spec.Template.Spec.Volumes[0].Name).To(Equal(utils.TmpVolumeName)) + }) + + It("should mount the referenced ConfigMap when it exists", func() { + crWithRef := crWithAlertsAdapterConfigMapRef() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, + Data: map[string]string{ + utils.AlertsAdapterConfigMapDataKey: "pollInterval: 30s\n", + }, + } + Expect(k8sClient.Create(ctx, cm)).To(Succeed()) + defer func() { + Expect(k8sClient.Delete(ctx, cm)).To(Succeed()) + }() + + deployment, err := GenerateDeployment(testReconcilerInstance, ctx, crWithRef) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(2)) + Expect(deployment.Spec.Template.Spec.Volumes[1].ConfigMap.Name).To(Equal(utils.AlertsAdapterConfigMapName)) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts).To(ContainElement(corev1.VolumeMount{ + Name: utils.AlertsAdapterConfigVolumeName, + MountPath: utils.AlertsAdapterConfigVolumeMountPath, + ReadOnly: true, + })) + }) + + It("should not mount a config volume when configMapRef is set but the ConfigMap is missing", func() { + const missingConfigName = "missing-alerts-adapter-config" + crWithRef := cr.DeepCopy() + crWithRef.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: missingConfigName, + } + + deployment, err := GenerateDeployment(testReconcilerInstance, ctx, crWithRef) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(1)) + Expect(deployment.Spec.Template.Spec.Volumes[0].Name).To(Equal(utils.TmpVolumeName)) + }) + + It("should mount the referenced ConfigMap even when config.yaml is absent", func() { + crWithRef := crWithAlertsAdapterConfigMapRef() + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, + Data: map[string]string{ + "other.yaml": "pollInterval: 30s\n", + }, + } + Expect(k8sClient.Create(ctx, cm)).To(Succeed()) + defer func() { + Expect(k8sClient.Delete(ctx, cm)).To(Succeed()) + }() + + deployment, err := GenerateDeployment(testReconcilerInstance, ctx, crWithRef) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(2)) + Expect(deployment.Spec.Template.Spec.Volumes[1].ConfigMap.Name).To(Equal(utils.AlertsAdapterConfigMapName)) + }) + + It("does not enable the adapter when configMapRef is unset", func() { + _, ok := utils.AlertsAdapterConfigMapRef(cr) + Expect(ok).To(BeFalse()) + }) + + It("enables the adapter when configMapRef is set", func() { + name, ok := utils.AlertsAdapterConfigMapRef(crWithAlertsAdapterConfigMapRef()) + Expect(ok).To(BeTrue()) + Expect(name).To(Equal(utils.AlertsAdapterConfigMapName)) + }) +}) diff --git a/internal/controller/alertsadapter/deployment.go b/internal/controller/alertsadapter/deployment.go new file mode 100644 index 000000000..649ccf78e --- /dev/null +++ b/internal/controller/alertsadapter/deployment.go @@ -0,0 +1,137 @@ +package alertsadapter + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/reconciler" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +func getAlertsAdapterResources(cr *olsv1alpha1.OLSConfig) *corev1.ResourceRequirements { + return utils.GetResourcesOrDefault( + cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.Resources, + &corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("50Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + ) +} + +// GenerateDeployment generates the alerts adapter Deployment. +// When the referenced user ConfigMap exists, it is mounted read-only at /etc/alerts-adapter. +// If configMapRef is set but the ConfigMap is absent, no config volume is mounted and the adapter uses defaults. +func GenerateDeployment(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) (*appsv1.Deployment, error) { + runAsNonRoot := true + resources := getAlertsAdapterResources(cr) + + volumes := []corev1.Volume{ + { + Name: utils.TmpVolumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + } + volumeMounts := []corev1.VolumeMount{ + { + Name: utils.TmpVolumeName, + MountPath: utils.TmpVolumeMountPath, + }, + } + + if _, ok := utils.AlertsAdapterConfigMapRef(cr); ok { + cm, err := getUserConfigMap(r, ctx, cr) + if err != nil { + return nil, fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterDeployment, err) + } + if cm != nil { + volumes = append(volumes, corev1.Volume{ + Name: utils.AlertsAdapterConfigVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: cm.Name}, + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: utils.AlertsAdapterConfigVolumeName, + MountPath: utils.AlertsAdapterConfigVolumeMountPath, + ReadOnly: true, + }) + } + } + + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: r.GetNamespace(), + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: utils.GenerateAlertsAdapterSelectorLabels(), + }, + Spec: corev1.PodSpec{ + ServiceAccountName: utils.AlertsAdapterServiceAccountName, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &runAsNonRoot, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: utils.AlertsAdapterContainerName, + Image: r.GetAlertsAdapterImage(), + ImagePullPolicy: corev1.PullAlways, + SecurityContext: utils.RestrictedContainerSecurityContext(), + Resources: *resources, + Env: append(utils.GetProxyEnvVars(), + corev1.EnvVar{ + Name: utils.AlertsAdapterAlertmanagerURLEnvVar, + Value: utils.AlertsAdapterAlertmanagerURL, + }, + corev1.EnvVar{ + Name: "POD_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "metadata.namespace", + }, + }, + }, + ), + VolumeMounts: volumeMounts, + }, + }, + Volumes: volumes, + }, + }, + }, + } + + utils.ApplyPodDeploymentConfig(deployment, cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.Config, false) + + if err := controllerutil.SetControllerReference(cr, deployment, r.GetScheme()); err != nil { + return nil, fmt.Errorf("%s: %w", utils.ErrSetAlertsAdapterDeploymentOwnerReference, err) + } + + return deployment, nil +} diff --git a/internal/controller/alertsadapter/reconciler.go b/internal/controller/alertsadapter/reconciler.go new file mode 100644 index 000000000..1e667c121 --- /dev/null +++ b/internal/controller/alertsadapter/reconciler.go @@ -0,0 +1,488 @@ +// Package alertsadapter reconciles the agentic alerts adapter operand that polls +// Alertmanager and creates Proposal CRs for firing alerts. +package alertsadapter + +import ( + "context" + "fmt" + "strings" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/reconciler" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +// ReconcileAlertsAdapterResources reconciles Phase 1 alerts adapter resources. +// When configMapRef is unset, operand resources are removed instead. +func ReconcileAlertsAdapterResources(r reconciler.Reconciler, ctx context.Context, olsconfig *olsv1alpha1.OLSConfig) error { + if _, ok := utils.AlertsAdapterConfigMapRef(olsconfig); !ok { + r.GetLogger().Info("alerts adapter disabled; removing operand resources") + return RemoveAlertsAdapter(r, ctx) + } + + r.GetLogger().Info("reconcileAlertsAdapterResources starts") + + tasks := []utils.ReconcileTask{ + {Name: "reconcile alerts adapter ServiceAccount", Task: reconcileServiceAccount}, + {Name: "reconcile alerts adapter proposals ClusterRole", Task: reconcileProposalsClusterRole}, + {Name: "reconcile alerts adapter proposals ClusterRoleBinding", Task: reconcileProposalsClusterRoleBinding}, + {Name: "remove legacy alerts adapter config RoleBinding", Task: removeLegacyConfigRoleBinding}, + {Name: "remove legacy alerts adapter config Role", Task: removeLegacyConfigRole}, + {Name: "reconcile alerts adapter Alertmanager RoleBinding", Task: reconcileAlertmanagerRoleBinding}, + {Name: "reconcile alerts adapter NetworkPolicy", Task: reconcileNetworkPolicy}, + } + + failedTasks := make(map[string]error) + for _, task := range tasks { + if err := task.Task(r, ctx, olsconfig); err != nil { + r.GetLogger().Error(err, "reconcileAlertsAdapterResources error", "task", task.Name) + failedTasks[task.Name] = err + } + } + + if len(failedTasks) > 0 { + taskNames := make([]string, 0, len(failedTasks)) + for taskName := range failedTasks { + taskNames = append(taskNames, taskName) + } + return fmt.Errorf("failed tasks: %v", taskNames) + } + + r.GetLogger().Info("reconcileAlertsAdapterResources completes") + return nil +} + +// ReconcileAlertsAdapterDeployment reconciles the alerts adapter Deployment (Phase 2). +func ReconcileAlertsAdapterDeployment(r reconciler.Reconciler, ctx context.Context, olsconfig *olsv1alpha1.OLSConfig) error { + r.GetLogger().Info("reconcileAlertsAdapterDeployment starts") + + if err := reconcileDeployment(r, ctx, olsconfig); err != nil { + r.GetLogger().Error(err, "reconcileAlertsAdapterDeployment error") + return fmt.Errorf("failed to reconcile alerts adapter deployment: %w", err) + } + + r.GetLogger().Info("reconcileAlertsAdapterDeployment completes") + return nil +} + +// RemoveAlertsAdapter deletes all operator-managed alerts adapter resources when the operand +// is disabled (configMapRef unset) or during OLSConfig finalization. +func RemoveAlertsAdapter(r reconciler.Reconciler, ctx context.Context) error { + tasks := []utils.DeleteTask{ + {Name: "delete alerts adapter deployment", Task: deleteDeployment}, + {Name: "delete alerts adapter network policy", Task: deleteNetworkPolicy}, + {Name: "delete alerts adapter config RoleBinding", Task: deleteConfigRoleBinding}, + {Name: "delete alerts adapter config Role", Task: deleteConfigRole}, + {Name: "delete alerts adapter service account", Task: deleteServiceAccount}, + {Name: "delete alerts adapter Alertmanager RoleBinding", Task: deleteAlertmanagerRoleBinding}, + {Name: "delete alerts adapter proposals cluster RBAC", Task: deleteProposalsClusterRBAC}, + } + + for _, task := range tasks { + if err := task.Task(r, ctx); err != nil { + r.GetLogger().Error(err, "RemoveAlertsAdapter error", "task", task.Name) + return fmt.Errorf("failed to %s: %w", task.Name, err) + } + } + + r.GetLogger().Info("RemoveAlertsAdapter completed") + return nil +} + +func reconcileServiceAccount(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + sa, err := GenerateServiceAccount(r, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterServiceAccount, err) + } + + foundSA := &corev1.ServiceAccount{} + err = r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterServiceAccountName, Namespace: r.GetNamespace()}, foundSA) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter service account", "serviceAccount", sa.Name) + if err := r.Create(ctx, sa); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterServiceAccount, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterServiceAccount, err) + } + + r.GetLogger().Info("alerts adapter service account reconciled", "serviceAccount", sa.Name) + return nil +} + +func reconcileProposalsClusterRole(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + role, err := GenerateProposalsClusterRole(r, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterProposalsClusterRole, err) + } + + foundRole := &rbacv1.ClusterRole{} + err = r.Get(ctx, client.ObjectKey{Name: role.Name}, foundRole) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter proposals cluster role", "ClusterRole", role.Name) + if err := r.Create(ctx, role); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterProposalsClusterRole, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterProposalsClusterRole, err) + } + + r.GetLogger().Info("alerts adapter proposals cluster role reconciled", "ClusterRole", role.Name) + return nil +} + +func reconcileProposalsClusterRoleBinding(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + rb, err := GenerateProposalsClusterRoleBinding(r, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterProposalsClusterRoleBinding, err) + } + + foundRB := &rbacv1.ClusterRoleBinding{} + err = r.Get(ctx, client.ObjectKey{Name: rb.Name}, foundRB) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter proposals cluster role binding", "ClusterRoleBinding", rb.Name) + if err := r.Create(ctx, rb); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterProposalsClusterRoleBinding, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterProposalsClusterRoleBinding, err) + } + + r.GetLogger().Info("alerts adapter proposals cluster role binding reconciled", "ClusterRoleBinding", rb.Name) + return nil +} + +func reconcileAlertmanagerRoleBinding(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + rb, err := GenerateAlertmanagerRoleBinding(r, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterAlertmanagerRoleBinding, err) + } + + foundRB := &rbacv1.RoleBinding{} + err = r.Get(ctx, client.ObjectKey{Name: rb.Name, Namespace: rb.Namespace}, foundRB) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter Alertmanager role binding", "RoleBinding", rb.Name, "namespace", rb.Namespace) + if err := r.Create(ctx, rb); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterAlertmanagerRoleBinding, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterAlertmanagerRoleBinding, err) + } + + r.GetLogger().Info("alerts adapter Alertmanager role binding reconciled", "RoleBinding", rb.Name, "namespace", rb.Namespace) + return nil +} + +func reconcileNetworkPolicy(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + np, err := GenerateNetworkPolicy(r, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterNetworkPolicy, err) + } + + foundNP := &networkingv1.NetworkPolicy{} + err = r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterNetworkPolicyName, Namespace: r.GetNamespace()}, foundNP) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter network policy", "networkpolicy", np.Name) + if err := r.Create(ctx, np); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterNetworkPolicy, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterNetworkPolicy, err) + } + + if utils.NetworkPolicyEqual(np, foundNP) { + r.GetLogger().Info("alerts adapter network policy unchanged, reconciliation skipped", "networkpolicy", np.Name) + return nil + } + + foundNP.Labels = np.Labels + foundNP.Spec = np.Spec + if err := r.Update(ctx, foundNP); err != nil { + return fmt.Errorf("%s: %w", utils.ErrUpdateAlertsAdapterNetworkPolicy, err) + } + + r.GetLogger().Info("alerts adapter network policy reconciled", "networkpolicy", np.Name) + return nil +} + +func reconcileDeployment(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + desiredDeployment, err := GenerateDeployment(r, ctx, cr) + if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGenerateAlertsAdapterDeployment, err) + } + + existingDeployment := &appsv1.Deployment{} + err = r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterDeploymentName, Namespace: r.GetNamespace()}, existingDeployment) + if err != nil && errors.IsNotFound(err) { + r.GetLogger().Info("creating alerts adapter deployment", "deployment", desiredDeployment.Name) + if err := r.Create(ctx, desiredDeployment); err != nil { + return fmt.Errorf("%s: %w", utils.ErrCreateAlertsAdapterDeployment, err) + } + return nil + } else if err != nil { + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterDeployment, err) + } + + utils.SetDefaults_Deployment(desiredDeployment) + if utils.DeploymentSpecEqual(&existingDeployment.Spec, &desiredDeployment.Spec, true) { + r.GetLogger().Info("alerts adapter deployment unchanged, reconciliation skipped", "deployment", desiredDeployment.Name) + return nil + } + + existingDeployment.Spec = desiredDeployment.Spec + r.GetLogger().Info("updating alerts adapter deployment", "deployment", existingDeployment.Name) + if err := RestartAlertsAdapter(r, ctx, existingDeployment); err != nil { + return fmt.Errorf("%s: %w", utils.ErrUpdateAlertsAdapterDeployment, err) + } + + r.GetLogger().Info("alerts adapter deployment reconciled", "deployment", desiredDeployment.Name) + return nil +} + +func deleteDeployment(r reconciler.Reconciler, ctx context.Context) error { + dep := &appsv1.Deployment{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterDeploymentName, Namespace: r.GetNamespace()}, dep) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter deployment not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterDeployment, err) + } + + if err := r.Delete(ctx, dep); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter deployment: %w", err) + } + + r.GetLogger().Info("alerts adapter deployment deleted") + return nil +} + +func deleteNetworkPolicy(r reconciler.Reconciler, ctx context.Context) error { + np := &networkingv1.NetworkPolicy{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterNetworkPolicyName, Namespace: r.GetNamespace()}, np) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter network policy not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterNetworkPolicy, err) + } + + if err := r.Delete(ctx, np); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter network policy: %w", err) + } + + r.GetLogger().Info("alerts adapter network policy deleted") + return nil +} + +func removeLegacyConfigRoleBinding(r reconciler.Reconciler, ctx context.Context, _ *olsv1alpha1.OLSConfig) error { + return deleteConfigRoleBinding(r, ctx) +} + +func removeLegacyConfigRole(r reconciler.Reconciler, ctx context.Context, _ *olsv1alpha1.OLSConfig) error { + return deleteConfigRole(r, ctx) +} + +func deleteConfigRoleBinding(r reconciler.Reconciler, ctx context.Context) error { + rb := &rbacv1.RoleBinding{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterConfigRoleBindingName, Namespace: r.GetNamespace()}, rb) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter config RoleBinding not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterConfigRoleBinding, err) + } + + if err := r.Delete(ctx, rb); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter config RoleBinding: %w", err) + } + + r.GetLogger().Info("alerts adapter config RoleBinding deleted") + return nil +} + +func deleteConfigRole(r reconciler.Reconciler, ctx context.Context) error { + role := &rbacv1.Role{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterConfigRoleName, Namespace: r.GetNamespace()}, role) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter config Role not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterConfigRole, err) + } + + if err := r.Delete(ctx, role); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter config Role: %w", err) + } + + r.GetLogger().Info("alerts adapter config Role deleted") + return nil +} + +func deleteServiceAccount(r reconciler.Reconciler, ctx context.Context) error { + sa := &corev1.ServiceAccount{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterServiceAccountName, Namespace: r.GetNamespace()}, sa) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter service account not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterServiceAccount, err) + } + + if err := r.Delete(ctx, sa); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter service account: %w", err) + } + + r.GetLogger().Info("alerts adapter service account deleted") + return nil +} + +func deleteAlertmanagerRoleBinding(r reconciler.Reconciler, ctx context.Context) error { + rb := &rbacv1.RoleBinding{} + err := r.Get(ctx, client.ObjectKey{ + Name: utils.AlertsAdapterAlertmanagerRoleBindingName, + Namespace: utils.OpenShiftMonitoringNamespace, + }, rb) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter Alertmanager RoleBinding not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterAlertmanagerRoleBinding, err) + } + + if err := r.Delete(ctx, rb); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter Alertmanager RoleBinding: %w", err) + } + + r.GetLogger().Info("alerts adapter Alertmanager RoleBinding deleted") + return nil +} + +// isOpenShiftManagedCRBDeleteDenied reports whether err is the OpenShift admission webhook +// that blocks deleting ClusterRoleBindings whose subjects are ServiceAccounts in openshift-* namespaces. +func isOpenShiftManagedCRBDeleteDenied(err error) bool { + if err == nil || !errors.IsForbidden(err) { + return false + } + msg := err.Error() + return strings.Contains(msg, "clusterrolebindings-validation.managed.openshift.io") || + (strings.Contains(msg, "Deleting ClusterRoleBinding") && strings.Contains(msg, "is not allowed")) +} + +func deleteProposalsClusterRBAC(r reconciler.Reconciler, ctx context.Context) error { + rb := &rbacv1.ClusterRoleBinding{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterProposalsClusterRoleBindingName}, rb) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter proposals ClusterRoleBinding not found, skip deletion") + return deleteProposalsClusterRole(r, ctx) + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterProposalsClusterRoleBinding, err) + } + + if err := r.Delete(ctx, rb); err != nil { + if errors.IsNotFound(err) { + return deleteProposalsClusterRole(r, ctx) + } + if isOpenShiftManagedCRBDeleteDenied(err) { + r.GetLogger().Info( + "alerts adapter proposals ClusterRoleBinding deletion blocked by OpenShift; leaving cluster RBAC in place", + "ClusterRoleBinding", rb.Name, + ) + return nil + } + return fmt.Errorf("failed to delete alerts adapter proposals ClusterRoleBinding: %w", err) + } + + r.GetLogger().Info("alerts adapter proposals ClusterRoleBinding deleted") + return deleteProposalsClusterRole(r, ctx) +} + +func deleteProposalsClusterRole(r reconciler.Reconciler, ctx context.Context) error { + role := &rbacv1.ClusterRole{} + err := r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterProposalsClusterRoleName}, role) + if err != nil { + if errors.IsNotFound(err) { + r.GetLogger().Info("alerts adapter proposals ClusterRole not found, skip deletion") + return nil + } + return fmt.Errorf("%s: %w", utils.ErrGetAlertsAdapterProposalsClusterRole, err) + } + + if err := r.Delete(ctx, role); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete alerts adapter proposals ClusterRole: %w", err) + } + + r.GetLogger().Info("alerts adapter proposals ClusterRole deleted") + return nil +} + +// RestartAlertsAdapter triggers a rolling restart of the alerts adapter deployment by updating +// its pod template annotation. Used when the user-managed runtime ConfigMap changes. +func RestartAlertsAdapter(r reconciler.Reconciler, ctx context.Context, deployment ...*appsv1.Deployment) error { + var dep *appsv1.Deployment + var err error + + if len(deployment) > 0 && deployment[0] != nil { + dep = deployment[0] + } else { + dep = &appsv1.Deployment{} + err = r.Get(ctx, client.ObjectKey{Name: utils.AlertsAdapterDeploymentName, Namespace: r.GetNamespace()}, dep) + if err != nil { + return fmt.Errorf("failed to get deployment %s: %w", utils.AlertsAdapterDeploymentName, err) + } + } + + if dep.Spec.Template.Annotations == nil { + dep.Spec.Template.Annotations = make(map[string]string) + } + + dep.Spec.Template.Annotations[utils.ForceReloadAnnotationKey] = time.Now().Format(time.RFC3339Nano) + + r.GetLogger().Info("triggering alerts adapter rolling restart", "deployment", dep.Name) + if err := r.Update(ctx, dep); err != nil { + return fmt.Errorf("failed to update deployment %s: %w", dep.Name, err) + } + + return nil +} diff --git a/internal/controller/alertsadapter/reconciler_test.go b/internal/controller/alertsadapter/reconciler_test.go new file mode 100644 index 000000000..1a85c7c2a --- /dev/null +++ b/internal/controller/alertsadapter/reconciler_test.go @@ -0,0 +1,284 @@ +package alertsadapter + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +func expectOwnedByOLSConfig(obj metav1.Object) { + olsConfig := &olsv1alpha1.OLSConfig{} + Expect(k8sClient.Get(ctx, crNamespacedName, olsConfig)).To(Succeed()) + + var ownerRef *metav1.OwnerReference + for i := range obj.GetOwnerReferences() { + ref := &obj.GetOwnerReferences()[i] + if ref.APIVersion == utils.OLSConfigAPIVersion && + ref.Kind == utils.OLSConfigKind && + ref.Name == olsConfig.Name { + ownerRef = ref + break + } + } + Expect(ownerRef).NotTo(BeNil(), "expected %T %s to be owned by OLSConfig", obj, obj.GetName()) + Expect(ownerRef.UID).To(Equal(olsConfig.UID)) +} + +var _ = Describe("Alerts adapter reconciler", Ordered, func() { + It("detects OpenShift managed ClusterRoleBinding delete denial", func() { + webhookErr := apierrors.NewForbidden( + schema.GroupResource{Group: "rbac.authorization.k8s.io", Resource: "clusterrolebindings"}, + "lightspeed-agentic-alerts-adapter-proposals", + fmt.Errorf(`admission webhook "clusterrolebindings-validation.managed.openshift.io" denied the request: Deleting ClusterRoleBinding lightspeed-agentic-alerts-adapter-proposals is not allowed`), + ) + Expect(isOpenShiftManagedCRBDeleteDenied(webhookErr)).To(BeTrue()) + Expect(isOpenShiftManagedCRBDeleteDenied(fmt.Errorf("some other error"))).To(BeFalse()) + }) + + It("does not create resources when configMapRef is unset", func() { + err := ReconcileAlertsAdapterResources(testReconcilerInstance, ctx, cr) + Expect(err).NotTo(HaveOccurred()) + + sa := &corev1.ServiceAccount{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterServiceAccountName, + Namespace: utils.OLSNamespaceDefault, + }, sa) + Expect(err).To(HaveOccurred()) + }) + + It("RemoveAlertsAdapter is idempotent when operand resources are absent", func() { + err := RemoveAlertsAdapter(testReconcilerInstance, ctx) + Expect(err).NotTo(HaveOccurred()) + }) + + Context("ConfigMap without operator validation", func() { + It("reconciles Phase 1 when the referenced ConfigMap exists without config.yaml", func() { + enabledCR := cr.DeepCopy() + enabledCR.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, + Data: map[string]string{"other.yaml": "pollInterval: 30s\n"}, + } + Expect(k8sClient.Create(ctx, cm)).To(Succeed()) + defer func() { + Expect(k8sClient.Delete(ctx, cm)).To(Succeed()) + }() + + err := ReconcileAlertsAdapterResources(testReconcilerInstance, ctx, enabledCR) + Expect(err).NotTo(HaveOccurred()) + }) + + It("reconciles Phase 2 deployment when the referenced ConfigMap has no config.yaml", func() { + enabledCR := cr.DeepCopy() + enabledCR.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, + Data: map[string]string{"other.yaml": "pollInterval: 30s\n"}, + } + Expect(k8sClient.Create(ctx, cm)).To(Succeed()) + defer func() { + Expect(k8sClient.Delete(ctx, cm)).To(Succeed()) + }() + + err := ReconcileAlertsAdapterDeployment(testReconcilerInstance, ctx, enabledCR) + Expect(err).NotTo(HaveOccurred()) + + deployment := &appsv1.Deployment{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, deployment) + Expect(err).NotTo(HaveOccurred()) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(2)) + }) + }) + + Context("Creation logic", Ordered, func() { + var enabledCR *olsv1alpha1.OLSConfig + + BeforeAll(func() { + enabledCR = cr.DeepCopy() + enabledCR.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, + Data: map[string]string{ + utils.AlertsAdapterConfigMapDataKey: "pollInterval: 30s\ninitialDelay: 5m\ncooldownWindow: 1h\n", + }, + } + err := k8sClient.Create(ctx, cm) + Expect(client.IgnoreAlreadyExists(err)).NotTo(HaveOccurred()) + + err = ReconcileAlertsAdapterResources(testReconcilerInstance, ctx, enabledCR) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterAll(func() { + Expect(RemoveAlertsAdapter(testReconcilerInstance, ctx)).To(Succeed()) + + cm := &corev1.ConfigMap{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterConfigMapName, + Namespace: utils.OLSNamespaceDefault, + }, cm) + if err == nil { + Expect(k8sClient.Delete(ctx, cm)).To(Succeed()) + } + }) + + It("should create the service account", func() { + sa := &corev1.ServiceAccount{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterServiceAccountName, + Namespace: utils.OLSNamespaceDefault, + }, sa) + Expect(err).NotTo(HaveOccurred()) + expectOwnedByOLSConfig(sa) + }) + + It("should create the proposals ClusterRole and ClusterRoleBinding", func() { + role := &rbacv1.ClusterRole{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: utils.AlertsAdapterProposalsClusterRoleName}, role) + Expect(err).NotTo(HaveOccurred()) + expectOwnedByOLSConfig(role) + + rb := &rbacv1.ClusterRoleBinding{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: utils.AlertsAdapterProposalsClusterRoleBindingName}, rb) + Expect(err).NotTo(HaveOccurred()) + expectOwnedByOLSConfig(rb) + }) + + It("should create the Alertmanager RoleBinding", func() { + rb := &rbacv1.RoleBinding{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterAlertmanagerRoleBindingName, + Namespace: utils.OpenShiftMonitoringNamespace, + }, rb) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should create the network policy", func() { + np := &networkingv1.NetworkPolicy{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterNetworkPolicyName, + Namespace: utils.OLSNamespaceDefault, + }, np) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should not create legacy config Role or RoleBinding", func() { + role := &rbacv1.Role{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterConfigRoleName, + Namespace: utils.OLSNamespaceDefault, + }, role) + Expect(err).To(HaveOccurred()) + + rb := &rbacv1.RoleBinding{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterConfigRoleBindingName, + Namespace: utils.OLSNamespaceDefault, + }, rb) + Expect(err).To(HaveOccurred()) + }) + + It("should create the deployment", func() { + err := ReconcileAlertsAdapterDeployment(testReconcilerInstance, ctx, enabledCR) + Expect(err).NotTo(HaveOccurred()) + + deployment := &appsv1.Deployment{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, deployment) + Expect(err).NotTo(HaveOccurred()) + expectOwnedByOLSConfig(deployment) + Expect(deployment.Spec.Template.Spec.Containers[0].Env).To(ContainElement(corev1.EnvVar{ + Name: utils.AlertsAdapterAlertmanagerURLEnvVar, + Value: utils.AlertsAdapterAlertmanagerURL, + })) + Expect(deployment.Spec.Template.Spec.Containers[0].SecurityContext).NotTo(BeNil()) + Expect(*deployment.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot).To(BeTrue()) + Expect(deployment.Spec.Template.Spec.Volumes).To(HaveLen(2)) + Expect(deployment.Spec.Template.Spec.Volumes[1].ConfigMap.Name).To(Equal(utils.AlertsAdapterConfigMapName)) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts).To(ContainElement(corev1.VolumeMount{ + Name: utils.AlertsAdapterConfigVolumeName, + MountPath: utils.AlertsAdapterConfigVolumeMountPath, + ReadOnly: true, + })) + }) + + It("should trigger a rolling restart via ForceReload annotation", func() { + deployment := &appsv1.Deployment{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, deployment) + Expect(err).NotTo(HaveOccurred()) + + err = RestartAlertsAdapter(testReconcilerInstance, ctx, deployment) + Expect(err).NotTo(HaveOccurred()) + + updated := &appsv1.Deployment{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, updated) + Expect(err).NotTo(HaveOccurred()) + Expect(updated.Spec.Template.Annotations).To(HaveKey(utils.ForceReloadAnnotationKey)) + }) + + It("should remove all operand resources when configMapRef is unset", func() { + disabledCR := cr.DeepCopy() + err := ReconcileAlertsAdapterResources(testReconcilerInstance, ctx, disabledCR) + Expect(err).NotTo(HaveOccurred()) + + deployment := &appsv1.Deployment{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, deployment) + Expect(err).To(HaveOccurred()) + + sa := &corev1.ServiceAccount{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: utils.AlertsAdapterServiceAccountName, + Namespace: utils.OLSNamespaceDefault, + }, sa) + Expect(err).To(HaveOccurred()) + + role := &rbacv1.ClusterRole{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: utils.AlertsAdapterProposalsClusterRoleName}, role) + Expect(err).To(HaveOccurred()) + }) + }) +}) diff --git a/internal/controller/alertsadapter/suite_test.go b/internal/controller/alertsadapter/suite_test.go new file mode 100644 index 000000000..580d05165 --- /dev/null +++ b/internal/controller/alertsadapter/suite_test.go @@ -0,0 +1,135 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package alertsadapter + +import ( + "context" + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + configv1 "github.com/openshift/api/config/v1" + consolev1 "github.com/openshift/api/console/v1" + openshiftv1 "github.com/openshift/api/operator/v1" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/reconciler" + "github.com/openshift/lightspeed-operator/internal/controller/utils" +) + +var ( + ctx context.Context + cfg *rest.Config + k8sClient client.Client + testEnv *envtest.Environment + cr *olsv1alpha1.OLSConfig + testReconcilerInstance reconciler.Reconciler + crNamespacedName types.NamespacedName +) + +func TestAlertsAdapter(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Alerts Adapter Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{ + filepath.Join("..", "..", "..", "config", "crd", "bases"), + filepath.Join("..", "..", "..", ".testcrds"), + }, + ErrorIfCRDPathMissing: true, + } + + var err error + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = olsv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = configv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = consolev1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = openshiftv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = monv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + ctx = context.Background() + + clusterVersion := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{Name: "version"}, + Spec: configv1.ClusterVersionSpec{ClusterID: "foobar"}, + } + err = k8sClient.Create(ctx, clusterVersion) + Expect(err).NotTo(HaveOccurred()) + clusterVersion.Status = configv1.ClusterVersionStatus{ + Desired: configv1.Release{Version: "4.16.0"}, + } + err = k8sClient.Status().Update(ctx, clusterVersion) + Expect(err).NotTo(HaveOccurred()) + + for _, ns := range []string{utils.OLSNamespaceDefault, utils.OpenShiftMonitoringNamespace, utils.TelemetryPullSecretNamespace} { + err = k8sClient.Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) + Expect(err).NotTo(HaveOccurred()) + } + + testReconcilerInstance = utils.NewTestReconciler( + k8sClient, + logf.Log.WithName("controller").WithName("OLSConfig"), + scheme.Scheme, + utils.OLSNamespaceDefault, + ) + + cr = &olsv1alpha1.OLSConfig{} + crNamespacedName = types.NamespacedName{Name: utils.OLSConfigName} + err = k8sClient.Get(ctx, crNamespacedName, cr) + if err != nil && errors.IsNotFound(err) { + cr = utils.GetDefaultOLSConfigCR() + err = k8sClient.Create(ctx, cr) + Expect(err).NotTo(HaveOccurred()) + } else { + Expect(err).NotTo(HaveOccurred()) + } +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/controller/appserver/reconciler.go b/internal/controller/appserver/reconciler.go index 8161f799a..903af75e9 100644 --- a/internal/controller/appserver/reconciler.go +++ b/internal/controller/appserver/reconciler.go @@ -17,6 +17,7 @@ package appserver import ( "context" "fmt" + "os" "time" "github.com/openshift/lightspeed-operator/internal/controller/reconciler" @@ -373,6 +374,13 @@ func reconcileService(r reconciler.Reconciler, ctx context.Context, cr *olsv1alp } func reconcileMetricsReaderSecret(r reconciler.Reconciler, ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + // Skip in local development mode (make run sets LOCAL_DEV_MODE=true); same rationale as + // skipping the operator ServiceMonitor in olsconfig_controller.reconcileOperatorResources. + if os.Getenv("LOCAL_DEV_MODE") == "true" { + r.GetLogger().Info("Skipping metrics reader secret reconciliation in LOCAL_DEV_MODE") + return nil + } + secret, err := GenerateMetricsReaderSecret(r, cr) if err != nil { return fmt.Errorf("%s: %w", utils.ErrGenerateMetricsReaderSecret, err) diff --git a/internal/controller/olsconfig_controller.go b/internal/controller/olsconfig_controller.go index d40be7c54..e9f8600c2 100644 --- a/internal/controller/olsconfig_controller.go +++ b/internal/controller/olsconfig_controller.go @@ -77,6 +77,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/alertsadapter" "github.com/openshift/lightspeed-operator/internal/controller/appserver" "github.com/openshift/lightspeed-operator/internal/controller/console" "github.com/openshift/lightspeed-operator/internal/controller/postgres" @@ -128,6 +129,7 @@ type OLSConfigReconciler struct { // +kubebuilder:rbac:groups=operator.openshift.io,resources=consoles,verbs=watch;list;get;update // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles;clusterrolebindings,verbs=get;list;create;update;patch;watch // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,namespace=openshift-lightspeed,resources=roles;rolebindings,verbs=* +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,namespace=openshift-monitoring,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete // NonResourceURLs for Lightspeed access control and metrics // +kubebuilder:rbac:urls=/ls-access,verbs=get // +kubebuilder:rbac:urls=/ols-metrics-access,verbs=get @@ -306,6 +308,11 @@ func (r *OLSConfigReconciler) reconcileIndependentResources(ctx context.Context, Fn: func(ctx context.Context, cr *olsv1alpha1.OLSConfig) error { return appserver.ReconcileAppServerResources(r, ctx, cr) }, + }, utils.ReconcileSteps{ + Name: "alerts adapter resources", + Fn: func(ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + return alertsadapter.ReconcileAlertsAdapterResources(r, ctx, cr) + }, }) // Reconcile all independent resources (continue on error to reconcile as many as possible) @@ -392,6 +399,26 @@ func (r *OLSConfigReconciler) reconcileDeploymentsAndStatus(ctx context.Context, DiagnosticInfo: []olsv1alpha1.PodDiagnostic{}, } + if _, enabled := utils.AlertsAdapterConfigMapRef(olsconfig); enabled { + deploymentSteps = append(deploymentSteps, utils.ReconcileSteps{ + Name: "alerts adapter deployment", + Fn: func(ctx context.Context, cr *olsv1alpha1.OLSConfig) error { + return alertsadapter.ReconcileAlertsAdapterDeployment(r, ctx, cr) + }, + ConditionType: utils.TypeAlertsAdapterReady, + Deployment: utils.AlertsAdapterDeploymentName, + }) + } else { + newStatus.Conditions = append(newStatus.Conditions, metav1.Condition{ + Type: utils.TypeAlertsAdapterReady, + Status: metav1.ConditionTrue, + ObservedGeneration: olsconfig.Generation, + Reason: "NotConfigured", + Message: "Alerts adapter is disabled; spec.ols.deployment.alertsAdapter.configMapRef is not set", + LastTransitionTime: metav1.Now(), + }) + } + for _, step := range deploymentSteps { err := step.Fn(ctx, olsconfig) if err != nil { @@ -572,6 +599,12 @@ func (r *OLSConfigReconciler) finalizeOLSConfig(ctx context.Context, cr *olsv1al r.Logger.V(1).Info("Proceeding with finalization despite Console UI removal error") } + r.Logger.V(1).Info("Removing alerts adapter RBAC during finalization") + if err := alertsadapter.RemoveAlertsAdapter(r, ctx); err != nil { + r.Logger.Error(err, "Failed to remove alerts adapter during finalization") + r.Logger.V(1).Info("Proceeding with finalization despite alerts adapter removal error") + } + // Step 2: List all owned resources once (avoids duplicate API calls) r.Logger.V(1).Info("Listing owned resources for cleanup") resourceGroups, err := r.listOwnedResources(ctx, cr) diff --git a/internal/controller/olsconfig_helpers.go b/internal/controller/olsconfig_helpers.go index 07f50c085..4b3d30cb1 100644 --- a/internal/controller/olsconfig_helpers.go +++ b/internal/controller/olsconfig_helpers.go @@ -49,6 +49,10 @@ func (r *OLSConfigReconciler) GetConsoleUIImage() string { return r.Options.ConsoleUIImage } +func (r *OLSConfigReconciler) GetAlertsAdapterImage() string { + return r.Options.AlertsAdapterImage +} + func (r *OLSConfigReconciler) GetOpenShiftMajor() string { return r.Options.OpenShiftMajor } @@ -406,8 +410,14 @@ func (r *OLSConfigReconciler) annotateExternalResources(ctx context.Context, } // Annotate all external configmaps - // All external ConfigMaps use the default behavior (restart ACTIVE_BACKEND only) err = utils.ForEachExternalConfigMap(cr, func(name string, source string) error { + // Alerts adapter runtime config restarts only the adapter deployment. + // annotateConfigMapIfNeeded no-ops when the CM is absent; the ConfigMap Create + // watcher handles annotation and restart on first creation. + if r.WatcherConfig != nil && source == "alerts-adapter" { + r.WatcherConfig.AnnotatedConfigMapMapping[name] = []string{utils.AlertsAdapterDeploymentName} + } + if err := r.annotateConfigMapIfNeeded(ctx, name, r.Options.Namespace); err != nil { r.Logger.Error(err, "Failed to annotate configmap", "source", source, "configmap", name) errs = append(errs, err) diff --git a/internal/controller/reconciler/interface.go b/internal/controller/reconciler/interface.go index c525b1e84..7ec7649a4 100644 --- a/internal/controller/reconciler/interface.go +++ b/internal/controller/reconciler/interface.go @@ -49,6 +49,9 @@ type Reconciler interface { // GetConsoleUIImage returns the console UI image to use GetConsoleUIImage() string + // GetAlertsAdapterImage returns the alerts adapter image to use + GetAlertsAdapterImage() string + // GetOpenShiftMajor returns the OpenShift major version GetOpenShiftMajor() string diff --git a/internal/controller/utils/constants.go b/internal/controller/utils/constants.go index e388d02a9..8734ec108 100644 --- a/internal/controller/utils/constants.go +++ b/internal/controller/utils/constants.go @@ -265,6 +265,44 @@ ssl_ca_file = '/etc/certs/cm-olspostgresca/service-ca.crt' // PostgresNetworkPolicyName is the name of the network policy for the OLS postgres server PostgresNetworkPolicyName = "lightspeed-postgres-server" + /*** Alerts Adapter Constants ***/ + // AlertsAdapterDeploymentName is the name of the agentic alerts adapter deployment + AlertsAdapterDeploymentName = "lightspeed-agentic-alerts-adapter" + // AlertsAdapterServiceAccountName is the name of the alerts adapter service account + AlertsAdapterServiceAccountName = "lightspeed-agentic-alerts-adapter" + // AlertsAdapterContainerName is the name of the alerts adapter container + AlertsAdapterContainerName = "adapter" + // AlertsAdapterNetworkPolicyName is the name of the network policy for the alerts adapter + AlertsAdapterNetworkPolicyName = "lightspeed-agentic-alerts-adapter" + // AlertsAdapterProposalsClusterRoleName is the cluster role granting Proposal create/list/get + AlertsAdapterProposalsClusterRoleName = "lightspeed-agentic-alerts-adapter-proposals" + // AlertsAdapterProposalsClusterRoleBindingName binds the proposals ClusterRole to the alerts adapter SA + AlertsAdapterProposalsClusterRoleBindingName = "lightspeed-agentic-alerts-adapter-proposals" + // AlertsAdapterAlertmanagerRoleBindingName is the RoleBinding in openshift-monitoring for Alertmanager read access + AlertsAdapterAlertmanagerRoleBindingName = "lightspeed-agentic-alerts-adapter-alertmanager" + // AlertsAdapterConfigMapName is the ConfigMap holding runtime adapter settings (poll interval, cooldown, tools) + AlertsAdapterConfigMapName = "alerts-adapter-config" + // AlertsAdapterConfigMapDataKey is the key within AlertsAdapterConfigMapName for adapter YAML config + AlertsAdapterConfigMapDataKey = "config.yaml" + // AlertsAdapterConfigVolumeName is the pod volume name for the mounted runtime config ConfigMap + AlertsAdapterConfigVolumeName = "config" + // AlertsAdapterConfigVolumeMountPath is where the adapter reads mounted config.yaml + AlertsAdapterConfigVolumeMountPath = "/etc/alerts-adapter" + // AlertsAdapterConfigRoleName is the legacy Role name removed after config moved to a volume mount + AlertsAdapterConfigRoleName = "lightspeed-agentic-alerts-adapter-config" + // AlertsAdapterConfigRoleBindingName is the legacy RoleBinding name removed after config moved to a volume mount + AlertsAdapterConfigRoleBindingName = "lightspeed-agentic-alerts-adapter-config" + // MonitoringAlertmanagerViewRoleName is the OpenShift monitoring Role for read-only Alertmanager API access + MonitoringAlertmanagerViewRoleName = "monitoring-alertmanager-view" + // OpenShiftMonitoringNamespace is the namespace for platform monitoring RBAC and services + OpenShiftMonitoringNamespace = "openshift-monitoring" + // AlertsAdapterAlertmanagerURL is the in-cluster Alertmanager API base URL + AlertsAdapterAlertmanagerURL = "https://alertmanager-main.openshift-monitoring.svc:9094" + // AlertsAdapterAlertmanagerURLEnvVar is the deployment env var for the Alertmanager URL + AlertsAdapterAlertmanagerURLEnvVar = "ALERTMANAGER_URL" + // AlertsAdapterComponentLabel is the app.kubernetes.io/component label value for alerts adapter resources + AlertsAdapterComponentLabel = "alerts-adapter" + // PostgresPVCName is the name of the PVC for the OLS Postgres server PostgresPVCName = "lightspeed-postgres-pvc" @@ -380,4 +418,6 @@ var ( OpenShiftMCPServerImageDefault = relatedimages.GetDefaultImage("openshift-mcp-server") DataverseExporterImageDefault = relatedimages.GetDefaultImage("lightspeed-to-dataverse-exporter") OcpRagImageDefault = relatedimages.GetDefaultImage("lightspeed-ocp-rag") + // Konflux-built image until a productized openshift-lightspeed image is published in related_images.json. + AlertsAdapterImageDefault = "quay.io/redhat-user-workloads/crt-nshift-lightspeed-tenant/lightspeed-agentic-alerts-adapter:main" ) diff --git a/internal/controller/utils/errors.go b/internal/controller/utils/errors.go index 3d03e677f..3db0e91ed 100644 --- a/internal/controller/utils/errors.go +++ b/internal/controller/utils/errors.go @@ -113,6 +113,34 @@ const ( ErrListOldPostgresSecrets = "failed to list old OLS Postgres secrets" ErrDeleteOldPostgresSecrets = "failed to delete old OLS Postgres secret" + // Alerts adapter errors + ErrCreateAlertsAdapterDeployment = "failed to create alerts adapter deployment" + ErrCreateAlertsAdapterNetworkPolicy = "failed to create alerts adapter network policy" + ErrCreateAlertsAdapterServiceAccount = "failed to create alerts adapter service account" + ErrCreateAlertsAdapterProposalsClusterRole = "failed to create alerts adapter proposals cluster role" + ErrCreateAlertsAdapterProposalsClusterRoleBinding = "failed to create alerts adapter proposals cluster role binding" + ErrCreateAlertsAdapterAlertmanagerRoleBinding = "failed to create alerts adapter alertmanager role binding" + ErrCreateAlertsAdapterConfigMap = "failed to create alerts adapter configmap" + ErrGenerateAlertsAdapterDeployment = "failed to generate alerts adapter deployment" + ErrSetAlertsAdapterDeploymentOwnerReference = "failed to set alerts adapter deployment owner reference" + ErrGenerateAlertsAdapterNetworkPolicy = "failed to generate alerts adapter network policy" + ErrGenerateAlertsAdapterServiceAccount = "failed to generate alerts adapter service account" + ErrGenerateAlertsAdapterConfigMap = "failed to generate alerts adapter configmap" + ErrGenerateAlertsAdapterProposalsClusterRole = "failed to generate alerts adapter proposals cluster role" + ErrGenerateAlertsAdapterProposalsClusterRoleBinding = "failed to generate alerts adapter proposals cluster role binding" + ErrGenerateAlertsAdapterAlertmanagerRoleBinding = "failed to generate alerts adapter alertmanager role binding" + ErrGetAlertsAdapterDeployment = "failed to get alerts adapter deployment" + ErrGetAlertsAdapterNetworkPolicy = "failed to get alerts adapter network policy" + ErrGetAlertsAdapterServiceAccount = "failed to get alerts adapter service account" + ErrGetAlertsAdapterProposalsClusterRole = "failed to get alerts adapter proposals cluster role" + ErrGetAlertsAdapterProposalsClusterRoleBinding = "failed to get alerts adapter proposals cluster role binding" + ErrGetAlertsAdapterAlertmanagerRoleBinding = "failed to get alerts adapter alertmanager role binding" + ErrGetAlertsAdapterConfigMap = "failed to get alerts adapter configmap" + ErrGetAlertsAdapterConfigRole = "failed to get alerts adapter config role" + ErrGetAlertsAdapterConfigRoleBinding = "failed to get alerts adapter config role binding" + ErrUpdateAlertsAdapterDeployment = "failed to update alerts adapter deployment" + ErrUpdateAlertsAdapterNetworkPolicy = "failed to update alerts adapter network policy" + // OpenShift MCP server config errors ErrCreateMCPServerConfigMap = "failed to create MCP server config configmap" ErrDeleteMCPServerConfigMap = "failed to delete MCP server config configmap" diff --git a/internal/controller/utils/resource_defaults_test.go b/internal/controller/utils/resource_defaults_test.go index 98c0b004d..7bf8a3f0c 100644 --- a/internal/controller/utils/resource_defaults_test.go +++ b/internal/controller/utils/resource_defaults_test.go @@ -56,6 +56,7 @@ var _ = Describe("Resource defaults and test reconciler", func() { Expect(r.GetNamespace()).To(Equal("test-ns")) Expect(r.GetPostgresImage()).To(Equal(PostgresServerImageDefault)) Expect(r.GetConsoleUIImage()).To(Equal(ConsoleUIImageDefault)) + Expect(r.GetAlertsAdapterImage()).To(Equal(AlertsAdapterImageDefault)) Expect(r.GetOpenShiftMajor()).To(Equal("123")) Expect(r.GetOpenshiftMinor()).To(Equal("456")) Expect(r.GetAppServerImage()).To(Equal(OLSAppServerImageDefault)) diff --git a/internal/controller/utils/testing.go b/internal/controller/utils/testing.go index 37136fdb3..a363e08e5 100644 --- a/internal/controller/utils/testing.go +++ b/internal/controller/utils/testing.go @@ -18,6 +18,7 @@ type TestReconciler struct { namespace string PostgresImage string ConsoleImage string + AlertsAdapterImage string AppServerImage string McpServerImage string DataverseExporter string @@ -47,6 +48,10 @@ func (r *TestReconciler) GetConsoleUIImage() string { return r.ConsoleImage } +func (r *TestReconciler) GetAlertsAdapterImage() string { + return r.AlertsAdapterImage +} + func (r *TestReconciler) GetOpenShiftMajor() string { return r.openShiftMajor } @@ -93,6 +98,7 @@ func NewTestReconciler( namespace: namespace, PostgresImage: PostgresServerImageDefault, ConsoleImage: ConsoleUIImageDefault, + AlertsAdapterImage: AlertsAdapterImageDefault, AppServerImage: OLSAppServerImageDefault, McpServerImage: OLSAppServerImageDefault, DataverseExporter: DataverseExporterImageDefault, diff --git a/internal/controller/utils/types.go b/internal/controller/utils/types.go index 791993931..87c560acb 100644 --- a/internal/controller/utils/types.go +++ b/internal/controller/utils/types.go @@ -12,6 +12,7 @@ const ( TypeApiReady = "ApiReady" TypeCacheReady = "CacheReady" TypeConsolePluginReady = "ConsolePluginReady" + TypeAlertsAdapterReady = "AlertsAdapterReady" TypeCRReconciled = "Reconciled" ) @@ -21,6 +22,7 @@ type OLSConfigReconcilerOptions struct { LightspeedServiceImage string LightspeedServicePostgresImage string ConsoleUIImage string + AlertsAdapterImage string DataverseExporterImage string OpenShiftMCPServerImage string Namespace string diff --git a/internal/controller/utils/utils.go b/internal/controller/utils/utils.go index 43ed0a08a..a5a38cec6 100644 --- a/internal/controller/utils/utils.go +++ b/internal/controller/utils/utils.go @@ -471,6 +471,17 @@ func GeneratePostgresSelectorLabels() map[string]string { } } +// GenerateAlertsAdapterSelectorLabels returns selector labels for the alerts adapter. +func GenerateAlertsAdapterSelectorLabels() map[string]string { + return map[string]string{ + "app": AlertsAdapterDeploymentName, + "app.kubernetes.io/component": AlertsAdapterComponentLabel, + "app.kubernetes.io/managed-by": "lightspeed-operator", + "app.kubernetes.io/name": AlertsAdapterDeploymentName, + "app.kubernetes.io/part-of": "openshift-lightspeed", + } +} + // GetPostgresCAConfigVolume returns the CA certificate volume for postgres TLS verification. func GetPostgresCAConfigVolume() corev1.Volume { volumeDefaultMode := VolumeDefaultMode @@ -766,6 +777,16 @@ func ForEachExternalSecret(cr *olsv1alpha1.OLSConfig, fn func(name string, sourc return nil } +// AlertsAdapterConfigMapRef returns the referenced ConfigMap name when the alerts adapter +// is enabled (configMapRef set with a non-empty name). The bool is false when disabled. +func AlertsAdapterConfigMapRef(cr *olsv1alpha1.OLSConfig) (name string, ok bool) { + ref := cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef + if ref == nil || ref.Name == "" { + return "", false + } + return ref.Name, true +} + // ForEachExternalConfigMap calls fn for each external configmap referenced in the OLSConfig CR. // The callback function receives: // - name: the configmap name @@ -799,6 +820,13 @@ func ForEachExternalConfigMap(cr *olsv1alpha1.OLSConfig, fn func(name string, so } } + // 3. Alerts adapter runtime config (opt-in via configMapRef) + if name, ok := AlertsAdapterConfigMapRef(cr); ok { + if err := fn(name, "alerts-adapter"); err != nil { + return err + } + } + return nil } diff --git a/internal/controller/watchers/watchers.go b/internal/controller/watchers/watchers.go index e91455fff..1059c0d1b 100644 --- a/internal/controller/watchers/watchers.go +++ b/internal/controller/watchers/watchers.go @@ -14,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" olsv1alpha1 "github.com/openshift/lightspeed-operator/api/v1alpha1" + "github.com/openshift/lightspeed-operator/internal/controller/alertsadapter" "github.com/openshift/lightspeed-operator/internal/controller/appserver" "github.com/openshift/lightspeed-operator/internal/controller/console" "github.com/openshift/lightspeed-operator/internal/controller/postgres" @@ -363,9 +364,10 @@ type RestartFunc func(reconciler.Reconciler, context.Context, ...*appsv1.Deploym // restartFuncs maps deployment names to their restart functions var restartFuncs = map[string]RestartFunc{ - utils.OLSAppServerDeploymentName: appserver.RestartAppServer, - utils.PostgresDeploymentName: postgres.RestartPostgres, - utils.ConsoleUIDeploymentName: console.RestartConsoleUI, + utils.OLSAppServerDeploymentName: appserver.RestartAppServer, + utils.PostgresDeploymentName: postgres.RestartPostgres, + utils.ConsoleUIDeploymentName: console.RestartConsoleUI, + utils.AlertsAdapterDeploymentName: alertsadapter.RestartAlertsAdapter, } // restart corresponding deployment diff --git a/internal/controller/watchers/watchers_test.go b/internal/controller/watchers/watchers_test.go index cd940603d..e8a687422 100644 --- a/internal/controller/watchers/watchers_test.go +++ b/internal/controller/watchers/watchers_test.go @@ -96,6 +96,15 @@ var _ = Describe("Watchers", func() { Expect(isConfigMapReferencedInCR(cr, "extra-ca")).To(BeTrue()) Expect(isConfigMapReferencedInCR(cr, "other")).To(BeFalse()) }) + + It("returns true when alerts adapter configMapRef is set", func() { + cr := utils.GetDefaultOLSConfigCR() + cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + Expect(isConfigMapReferencedInCR(cr, utils.AlertsAdapterConfigMapName)).To(BeTrue()) + Expect(isConfigMapReferencedInCR(cr, "other")).To(BeFalse()) + }) }) Describe("SecretWatcherFilter", func() { @@ -325,6 +334,31 @@ var _ = Describe("Watchers", func() { Expect(r.Get(ctx, client.ObjectKeyFromObject(cm), got)).To(Succeed()) Expect(got.Annotations).To(HaveKey(utils.WatcherAnnotationKey)) }) + + It("annotates alerts adapter runtime config when configMapRef is set", func() { + cr := utils.GetDefaultOLSConfigCR() + cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: utils.OLSNamespaceDefault, + Name: utils.AlertsAdapterConfigMapName, + }, + Data: map[string]string{ + utils.AlertsAdapterConfigMapDataKey: "pollInterval: 30s\n", + }, + } + r := createTestReconciler(cr, cm) + wc := r.GetWatcherConfig().(*utils.WatcherConfig) + wc.AnnotatedConfigMapMapping[utils.AlertsAdapterConfigMapName] = []string{utils.AlertsAdapterDeploymentName} + h := &ConfigMapUpdateHandler{Reconciler: r} + h.Create(ctx, event.CreateEvent{Object: cm}, nil) + + got := &corev1.ConfigMap{} + Expect(r.Get(ctx, client.ObjectKeyFromObject(cm), got)).To(Succeed()) + Expect(got.Annotations).To(HaveKey(utils.WatcherAnnotationKey)) + }) }) Describe("restartDeployment with in-cluster restart", func() { @@ -358,5 +392,49 @@ var _ = Describe("Watchers", func() { Expect(r.Get(ctx, client.ObjectKeyFromObject(dep), updated)).To(Succeed()) Expect(updated.Spec.Template.Annotations).To(HaveKey(utils.ForceReloadAnnotationKey)) }) + + It("restarts the alerts adapter deployment for mapped configmap change", func() { + cr := utils.GetDefaultOLSConfigCR() + cr.Spec.OLSConfig.DeploymentConfig.AlertsAdapter.ConfigMapRef = &corev1.LocalObjectReference{ + Name: utils.AlertsAdapterConfigMapName, + } + dep := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.AlertsAdapterDeploymentName, + Namespace: utils.OLSNamespaceDefault, + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{MatchLabels: utils.GenerateAlertsAdapterSelectorLabels()}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: utils.GenerateAlertsAdapterSelectorLabels()}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: utils.AlertsAdapterContainerName, + Image: "img", + }}, + }, + }, + }, + } + r := createTestReconciler(cr, dep) + wc := r.GetWatcherConfig().(*utils.WatcherConfig) + wc.AnnotatedConfigMapMapping[utils.AlertsAdapterConfigMapName] = []string{utils.AlertsAdapterDeploymentName} + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: utils.OLSNamespaceDefault, + Name: utils.AlertsAdapterConfigMapName, + Annotations: map[string]string{utils.WatcherAnnotationKey: "1"}, + }, + Data: map[string]string{ + utils.AlertsAdapterConfigMapDataKey: "pollInterval: 30s\n", + }, + } + ConfigMapWatcherFilter(r, ctx, cm, true) + + updated := &appsv1.Deployment{} + Expect(r.Get(ctx, client.ObjectKeyFromObject(dep), updated)).To(Succeed()) + Expect(updated.Spec.Template.Annotations).To(HaveKey(utils.ForceReloadAnnotationKey)) + }) }) })