fix: use pipelines for CR scheduling requests that dont write history (#881)

mblos · web-flow · commit 04a47288beef · 2026-05-22T11:28:04.000+02:00
diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
@@ -632,4 +632,301 @@ spec:
       description: |
         Excludes hosts that are not ready or are disabled.
   weighers: []
+---
+apiVersion: cortex.cloud/v1alpha1
+kind: Pipeline
+metadata:
+  name: kvm-general-purpose-load-balancing-no-history
+spec:
+  schedulingDomain: nova
+  description: |
+    Variant of kvm-general-purpose-load-balancing used for committed-resource
+    reservation scheduling and capacity probes. Identical filter/weigher chain
+    but does not write placement history, to avoid polluting history with
+    internal CR controller calls.
+  type: filter-weigher
+  ignorePreselection: true
+  createHistory: false
+  filters:
+    - name: filter_correct_az
+      description: |
+        This step will filter out hosts whose aggregate information indicates they
+        are not placed in the requested availability zone.
+    - name: filter_host_instructions
+      description: |
+        This step will consider the `ignore_hosts` and `force_hosts` instructions
+        from the nova scheduler request spec to filter out or exclusively allow
+        certain hosts.
+    - name: filter_status_conditions
+      description: |
+        This step will filter out hosts for which the hypervisor status conditions
+        do not meet the expected values, for example, that the hypervisor is ready
+        and not disabled.
+    - name: filter_capabilities
+      description: |
+        This step will filter out hosts that do not meet the compute capabilities
+        requested by the nova flavor extra specs, like `{"arch": "x86_64",
+        "maxphysaddr:bits": 46, ...}`.
+
+        Note: currently, advanced boolean/numeric operators for the capabilities
+        like `>`, `!`, ... are not supported because they are not used by any of our
+        flavors in production.
+    - name: filter_has_requested_traits
+      description: |
+        This step filters hosts that do not have the requested traits given by the
+        nova flavor extra spec: "trait:<trait>": "forbidden" means the host must
+        not have the specified trait. "trait:<trait>": "required" means the host
+        must have the specified trait.
+    - name: filter_external_customer
+      description: |
+        This step prefix-matches the domain name for external customer domains and
+        filters out hosts that are not intended for external customers. It considers
+        the `CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE` trait on hosts as well as the
+        `domain_name` scheduler hint from the nova request spec.
+      params:
+        - {key: domainNamePrefixes, stringListValue: ["iaas-"]}
+    - name: filter_has_accelerators
+      description: |
+        This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if
+        the nova flavor extra specs request accelerators via "accel:device_profile".
+    - name: filter_instance_group_affinity
+      description: |
+        This step selects hosts in the instance group specified in the nova
+        scheduler request spec.
+    - name: filter_instance_group_anti_affinity
+      description: |
+        This step selects hosts not in the instance group specified in the nova
+        scheduler request spec, but only until the max_server_per_host limit is
+        reached (default = 1).
+    - name: filter_has_enough_capacity
+      description: |
+        This step will filter out hosts that do not have enough available capacity
+        to host the requested flavor. If enabled, this step will subtract the
+        current reservations residing on this host from the available capacity.
+      params:
+        - {key: lockReserved, boolValue: false}
+    - name: filter_allowed_projects
+      description: |
+        This step filters hosts based on allowed projects defined in the
+        hypervisor resource. Note that hosts allowing all projects are still
+        accessible and will not be filtered out. In this way some hypervisors
+        are made accessible to some projects only.
+    - name: filter_aggregate_metadata
+      description: |
+        This step filters hosts based on metadata defined in their aggregates. For
+        example, if an aggregate has the metadata "filter_tenant_id": "<project_id>",
+        only hosts in that aggregate that match the project ID in the nova request
+        will pass this filter.
+    - name: filter_live_migratable
+      description: |
+        This step ensures that the target host of a live migration can accept
+        the migrating VM, by checking cpu architecture, cpu features, emulated
+        devices, and cpu modes.
+    - name: filter_requested_destination
+      description: |
+        This step filters hosts based on the `requested_destination` instruction
+        from the nova scheduler request spec. It supports filtering by host and
+        by aggregates. Aggregates use AND logic between list elements, with
+        comma-separated UUIDs within an element using OR logic.
+    - name: filter_quota_enforcement
+      description: |
+        This step enforces project quota by checking whether the request has
+        headroom under the project's committed resources or pay-as-you-go quota.
+        If a matching CommittedResource has unused capacity, the request is accepted.
+        Otherwise, PAYG headroom is checked for ram, cores, and instances.
+        Rejects all hosts if neither tier has headroom.
+        When dryRun is true the filter runs in shadow mode: it logs and emits
+        the cortex_nova_filter_quota_enforcement_decisions_total metric for
+        would-be rejects but never actually removes hosts.
+      params:
+        - {key: dryRun, boolValue: true}
+  weighers:
+    - name: kvm_prefer_smaller_hosts
+      params:
+        - {key: resourceWeights, floatMapValue: {"memory": 1.0}}
+      description: |
+        This step pulls virtual machines onto smaller hosts (by capacity). This
+        ensures that larger hosts are not overly fragmented with small VMs,
+        and can still accommodate larger VMs when they need to be scheduled.
+    - name: kvm_instance_group_soft_affinity
+      description: |
+        This weigher implements the "soft affinity" and "soft anti-affinity" policy
+        for instance groups in nova.
+
+        It assigns a weight to each host based on how many instances of the same
+        instance group are already running on that host. The more instances of the
+        same group on a host, the lower (for soft-anti-affinity) or higher
+        (for soft-affinity) the weight, which makes it less likely or more likely,
+        respectively, for the scheduler to choose that host for new instances of
+        the same group.
+    - name: kvm_binpack
+      multiplier: -1.0 # inverted = balancing
+      params:
+        - {key: resourceWeights, floatMapValue: {"memory": 1.0}}
+      description: |
+        This step implements a balancing weigher for workloads on kvm hypervisors,
+        which is the opposite of binpacking. Instead of pulling the requested vm
+        into the smallest gaps possible, it spreads the load to ensure
+        workloads are balanced across hosts. In this pipeline, the balancing will
+        focus on general purpose virtual machines.
+    - name: kvm_failover_evacuation
+      description: |
+        This weigher prefers hosts with active failover reservations during
+        evacuation requests. Hosts matching a failover reservation where the
+        VM is allocated get a higher weight, encouraging placement on
+        pre-reserved failover capacity. For non-evacuation requests, this
+        weigher has no effect.
+    - name: kvm_committed_resource_reservation
+      description: |
+        This weigher boosts hosts that have a ready CommittedResourceReservation
+        matching the request's project, resource group, and availability zone,
+        with enough free memory capacity for the requested VM. Hosts without a
+        matching reservation or without enough free capacity receive a lower weight.
+---
+apiVersion: cortex.cloud/v1alpha1
+kind: Pipeline
+metadata:
+  name: kvm-hana-bin-packing-no-history
+spec:
+  schedulingDomain: nova
+  description: |
+    Variant of kvm-hana-bin-packing used for committed-resource reservation
+    scheduling and capacity probes. Identical filter/weigher chain but does not
+    write placement history, to avoid polluting history with internal CR
+    controller calls.
+  type: filter-weigher
+  ignorePreselection: true
+  createHistory: false
+  filters:
+    - name: filter_correct_az
+      description: |
+        This step will filter out hosts whose aggregate information indicates they
+        are not placed in the requested availability zone.
+    - name: filter_host_instructions
+      description: |
+        This step will consider the `ignore_hosts` and `force_hosts` instructions
+        from the nova scheduler request spec to filter out or exclusively allow
+        certain hosts.
+    - name: filter_status_conditions
+      description: |
+        This step will filter out hosts for which the hypervisor status conditions
+        do not meet the expected values, for example, that the hypervisor is ready
+        and not disabled.
+    - name: filter_capabilities
+      description: |
+        This step will filter out hosts that do not meet the compute capabilities
+        requested by the nova flavor extra specs, like `{"arch": "x86_64",
+        "maxphysaddr:bits": 46, ...}`.
+
+        Note: currently, advanced boolean/numeric operators for the capabilities
+        like `>`, `!`, ... are not supported because they are not used by any of our
+        flavors in production.
+    - name: filter_has_requested_traits
+      description: |
+        This step filters hosts that do not have the requested traits given by the
+        nova flavor extra spec: "trait:<trait>": "forbidden" means the host must
+        not have the specified trait. "trait:<trait>": "required" means the host
+        must have the specified trait.
+    - name: filter_external_customer
+      description: |
+        This step prefix-matches the domain name for external customer domains and
+        filters out hosts that are not intended for external customers. It considers
+        the `CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE` trait on hosts as well as the
+        `domain_name` scheduler hint from the nova request spec.
+      params:
+        - {key: domainNamePrefixes, stringListValue: ["iaas-"]}
+    - name: filter_has_accelerators
+      description: |
+        This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if
+        the nova flavor extra specs request accelerators via "accel:device_profile".
+    - name: filter_instance_group_affinity
+      description: |
+        This step selects hosts in the instance group specified in the nova
+        scheduler request spec.
+    - name: filter_instance_group_anti_affinity
+      description: |
+        This step selects hosts not in the instance group specified in the nova
+        scheduler request spec, but only until the max_server_per_host limit is
+        reached (default = 1).
+    - name: filter_has_enough_capacity
+      description: |
+        This step will filter out hosts that do not have enough available capacity
+        to host the requested flavor. If enabled, this step will subtract the
+        current reservations residing on this host from the available capacity.
+      params:
+        - {key: lockReserved, boolValue: false}
+    - name: filter_allowed_projects
+      description: |
+        This step filters hosts based on allowed projects defined in the
+        hypervisor resource. Note that hosts allowing all projects are still
+        accessible and will not be filtered out. In this way some hypervisors
+        are made accessible to some projects only.
+    - name: filter_aggregate_metadata
+      description: |
+        This step filters hosts based on metadata defined in their aggregates. For
+        example, if an aggregate has the metadata "filter_tenant_id": "<project_id>",
+        only hosts in that aggregate that match the project ID in the nova request
+        will pass this filter.
+    - name: filter_live_migratable
+      description: |
+        This step ensures that the target host of a live migration can accept
+        the migrating VM, by checking cpu architecture, cpu features, emulated
+        devices, and cpu modes.
+    - name: filter_requested_destination
+      description: |
+        This step filters hosts based on the `requested_destination` instruction
+        from the nova scheduler request spec. It supports filtering by host and
+        by aggregates. Aggregates use AND logic between list elements, with
+        comma-separated UUIDs within an element using OR logic.
+    - name: filter_quota_enforcement
+      description: |
+        This step enforces project quota by checking whether the request has
+        headroom under the project's committed resources or pay-as-you-go quota.
+        If a matching CommittedResource has unused capacity, the request is accepted.
+        Otherwise, PAYG headroom is checked for ram, cores, and instances.
+        Rejects all hosts if neither tier has headroom.
+        When dryRun is true the filter runs in shadow mode: it logs and emits
+        the cortex_nova_filter_quota_enforcement_decisions_total metric for
+        would-be rejects but never actually removes hosts.
+      params:
+        - {key: dryRun, boolValue: true}
+  weighers:
+    - name: kvm_prefer_smaller_hosts
+      params:
+        - {key: resourceWeights, floatMapValue: {"memory": 1.0}}
+      description: |
+        This step pulls virtual machines onto smaller hosts (by capacity). This
+        ensures that larger hosts are not overly fragmented with small VMs,
+        and can still accommodate larger VMs when they need to be scheduled.
+    - name: kvm_instance_group_soft_affinity
+      description: |
+        This weigher implements the "soft affinity" and "soft anti-affinity" policy
+        for instance groups in nova.
+        It assigns a weight to each host based on how many instances of the same
+        instance group are already running on that host. The more instances of the
+        same group on a host, the lower (for soft-anti-affinity) or higher
+        (for soft-affinity) the weight, which makes it less likely or more likely,
+        respectively, for the scheduler to choose that host for new instances of
+        the same group.
+    - name: kvm_binpack
+      params:
+        - {key: resourceWeights, floatMapValue: {"memory": 1.0}}
+      description: |
+        This step implements a binpacking weigher for workloads on kvm hypervisors.
+        It pulls the requested vm into the smallest gaps possible, to ensure
+        other hosts with less allocation stay free for bigger vms.
+        In this pipeline, the binpacking will focus on hana virtual machines.
+    - name: kvm_failover_evacuation
+      description: |
+        This weigher prefers hosts with active failover reservations during
+        evacuation requests. Hosts matching a failover reservation where the
+        VM is allocated get a higher weight, encouraging placement on
+        pre-reserved failover capacity. For non-evacuation requests, this
+        weigher has no effect.
+    - name: kvm_committed_resource_reservation
+      description: |
+        This weigher boosts hosts that have a ready CommittedResourceReservation
+        matching the request's project, resource group, and availability zone,
+        with enough free memory capacity for the requested VM. Hosts without a
+        matching reservation or without enough free capacity receive a lower weight.
 {{- end }}
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
@@ -139,7 +139,7 @@ cortex-scheduling-controllers:
     # Pipeline used for the empty-state capacity probe (ignores allocations and reservations).
     capacityTotalPipeline: "kvm-report-capacity"
     # Pipeline used for the current-state capacity probe (considers current VM allocations).
-    capacityPlaceablePipeline: "kvm-general-purpose-load-balancing"
+    capacityPlaceablePipeline: "kvm-general-purpose-load-balancing-no-history"
     # How often the capacity controller re-runs its scheduler probes.
     capacityReconcileInterval: 5m
     enabledTasks:
@@ -154,11 +154,9 @@ cortex-scheduling-controllers:
     committedResourceReservationController:
       # Maps flavor group IDs to pipeline names; "*" acts as catch-all fallback
       flavorGroupPipelines:
-        "2152": "kvm-hana-bin-packing"  # HANA flavor group
-        "2101": "kvm-general-purpose-load-balancing"  # General Purpose flavor group
-        "*": "kvm-general-purpose-load-balancing"  # Catch-all fallback
+        "*": "kvm-general-purpose-load-balancing-no-history"  # Catch-all fallback
       # Fallback pipeline when no flavorGroupPipelines entry matches
-      pipelineDefault: "kvm-general-purpose-load-balancing"
+      pipelineDefault: "kvm-general-purpose-load-balancing-no-history"
       # How often to re-verify active Reservation CRDs (healthy state)
       requeueIntervalActive: "5m"
       # Back-off interval when knowledge is unavailable
diff --git a/internal/scheduling/reservations/capacity/config.go b/internal/scheduling/reservations/capacity/config.go
@@ -47,7 +47,7 @@ func DefaultConfig() Config {
 	return Config{
 		ReconcileInterval: metav1.Duration{Duration: 5 * time.Minute},
 		TotalPipeline:     "kvm-report-capacity",
-		PlaceablePipeline: "kvm-general-purpose-load-balancing",
+		PlaceablePipeline: "kvm-general-purpose-load-balancing-no-history",
 		SchedulerURL:      "http://localhost:8080/scheduler/nova/external",
 	}
 }

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ func DefaultConfig() Config {`
`47`	`47`	`return Config{`
`48`	`48`	`ReconcileInterval: metav1.Duration{Duration: 5 * time.Minute},`
`49`	`49`	`TotalPipeline: "kvm-report-capacity",`
`50`		`- PlaceablePipeline: "kvm-general-purpose-load-balancing",`
	`50`	`+ PlaceablePipeline: "kvm-general-purpose-load-balancing-no-history",`
`51`	`51`	`SchedulerURL: "http://localhost:8080/scheduler/nova/external",`
`52`	`52`	`}`
`53`	`53`	`}`