@@ -5,13 +5,127 @@ package e2e
55
66import (
77 "context"
8+ "encoding/json"
9+ "fmt"
10+ "math"
811 "net/http"
912 "net/url"
13+ "slices"
14+ "strings"
15+ "time"
1016
1117 . "github.com/onsi/ginkgo/v2"
1218 . "github.com/onsi/gomega"
19+
20+ corev1 "k8s.io/api/core/v1"
21+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+ "k8s.io/apimachinery/pkg/types"
23+
24+ "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-01/compute"
25+
26+ machinev1beta1 "github.com/openshift/api/machine/v1beta1"
27+
28+ "github.com/Azure/ARO-RP/pkg/api"
29+ "github.com/Azure/ARO-RP/pkg/api/validate"
30+ "github.com/Azure/ARO-RP/pkg/util/stringutils"
1331)
1432
33+ const (
34+ masterMachineRoleLabelSelector = "machine.openshift.io/cluster-api-machine-role=master"
35+ machineLabelInstanceType = "machine.openshift.io/instance-type"
36+ nodeLabelInstanceType = "node.kubernetes.io/instance-type"
37+ )
38+
39+ func getControlPlaneVMs (ctx context.Context ) []compute.VirtualMachine {
40+ oc , err := clients .OpenshiftClusters .Get (ctx , vnetResourceGroup , clusterName )
41+ Expect (err ).NotTo (HaveOccurred ())
42+ clusterResourceGroup := stringutils .LastTokenByte (* oc .ClusterProfile .ResourceGroupID , '/' )
43+ vms , err := clients .VirtualMachines .List (ctx , clusterResourceGroup )
44+ Expect (err ).NotTo (HaveOccurred ())
45+ return slices .DeleteFunc (vms , func (vm compute.VirtualMachine ) bool {
46+ Expect (vm .Name ).ToNot (BeNil ())
47+ return ! strings .Contains (* vm .Name , "master" )
48+ })
49+ }
50+
51+ // getControlPlaneVMSize retrieves the VM size of one of the control plane
52+ // (master) VMs in the cluster by listing all VMs in the cluster resource group
53+ // and returning the size of the first VM whose name contains "master".
54+ func getControlPlaneVMSize (ctx context.Context ) string {
55+ vms := getControlPlaneVMs (ctx )
56+ Expect (vms ).NotTo (BeEmpty ())
57+ Expect (vms [0 ].HardwareProfile ).NotTo (BeNil ())
58+ return string (vms [0 ].HardwareProfile .VMSize )
59+ }
60+
61+ // nextLargerSupportedMasterVMSize returns the supported master VM size in the
62+ // same family as currentVMSize that has the smallest core count strictly
63+ // greater than currentVMSize's core count. It returns an error if currentVMSize
64+ // is not in the supported master list, or if no larger size exists in the same
65+ // family.
66+ func nextLargerSupportedMasterVMSize (currentVMSize string ) (string , error ) {
67+ supportedMasterSizes := validate .SupportedVMSizesByRole (validate .VMRoleMaster )
68+ currentInfo , ok := supportedMasterSizes [api .VMSize (currentVMSize )]
69+ if ! ok {
70+ return "" , fmt .Errorf ("current VM size %q is not in the supported master list" , currentVMSize )
71+ }
72+
73+ targetSku := ""
74+ targetCores := math .MaxInt
75+ for size , info := range supportedMasterSizes {
76+ if info .Family != currentInfo .Family {
77+ continue
78+ }
79+ if info .CoreCount <= currentInfo .CoreCount {
80+ continue
81+ }
82+ if info .CoreCount < targetCores {
83+ targetCores = info .CoreCount
84+ targetSku = string (size )
85+ }
86+ }
87+
88+ if targetSku == "" {
89+ return "" , fmt .Errorf ("no supported master VM size larger than %q (family %s, %d cores) is available" , currentVMSize , currentInfo .Family , currentInfo .CoreCount )
90+ }
91+ return targetSku , nil
92+ }
93+
94+ // validateMasterVMSizeLabels makes sure that master machine and node Resources in the cluster have the correct vmsize labels. It verifies that the following are equal to the targetSku
95+ // - metadata.labels."machine.openshift.io/instance-type" for machine
96+ // - spec.ProviderSpec.value.vmSize for machine
97+ // - metadata.labels."node.kubernetes.io/instance-type" for node
98+ // for each of the master nodes
99+ //
100+ // There is no return value, as this is supposed to be called directly from ginkgo test cases. This function validates the labels via [github.com/onsi/gomega.Expect] statements
101+ func validateMasterVMSizeLabels (ctx context.Context , targetSku string ) {
102+ masterMachinesList , err := clients .MachineAPI .MachineV1beta1 ().Machines ("openshift-machine-api" ).List (ctx , metav1.ListOptions {
103+ LabelSelector : masterMachineRoleLabelSelector ,
104+ })
105+ Expect (err ).ToNot (HaveOccurred ())
106+
107+ for _ , ma := range masterMachinesList .Items {
108+ By (fmt .Sprintf ("Checking machine and node labels for %s" , ma .GetName ()))
109+ sizeLabelVal , ok := ma .GetObjectMeta ().GetLabels ()[machineLabelInstanceType ]
110+ Expect (ok ).To (BeTrue ())
111+ Expect (sizeLabelVal ).To (Equal (targetSku ))
112+
113+ var machineProvSpec machinev1beta1.AzureMachineProviderSpec
114+ Expect (json .Unmarshal (ma .Spec .ProviderSpec .Value .Raw , & machineProvSpec )).ToNot (HaveOccurred ())
115+ Expect (machineProvSpec .VMSize ).To (Equal (targetSku ))
116+
117+ Expect (ma .Status .NodeRef ).ToNot (BeNil ())
118+
119+ var curNode corev1.Node
120+ err = clients .KubeClient .Get (ctx , types.NamespacedName {Name : ma .Status .NodeRef .Name }, & curNode )
121+ Expect (err ).ToNot (HaveOccurred ())
122+
123+ nodeSizeLabelVal , ok := curNode .GetLabels ()[nodeLabelInstanceType ]
124+ Expect (ok ).To (BeTrue ())
125+ Expect (nodeSizeLabelVal ).To (Equal (targetSku ))
126+ }
127+ }
128+
15129var _ = Describe ("[Admin API] Resize control plane" , func () {
16130 BeforeEach (skipIfNotInDevelopmentEnv )
17131
@@ -39,4 +153,110 @@ var _ = Describe("[Admin API] Resize control plane", func() {
39153 Expect (err ).NotTo (HaveOccurred ())
40154 Expect (resp .StatusCode ).To (Equal (http .StatusBadRequest ))
41155 })
156+
157+ It ("should not resize when size is already the same" , func (ctx context.Context ) {
158+ By ("Getting the current machine size" )
159+ preResizeVMSize := getControlPlaneVMSize (ctx )
160+ Expect (preResizeVMSize ).ToNot (BeZero ())
161+
162+ By (fmt .Sprintf ("Resizing to the current machine size: %s" , preResizeVMSize ))
163+
164+ params := url.Values {
165+ "deallocateVM" : []string {"false" },
166+ "vmSize" : []string {preResizeVMSize },
167+ }
168+
169+ resp , err := adminRequest (ctx , http .MethodPost ,
170+ "/admin" + clusterResourceID + "/resizecontrolplane" ,
171+ params , true , nil , nil )
172+ Expect (err ).NotTo (HaveOccurred ())
173+ Expect (resp .StatusCode ).To (Equal (http .StatusOK ))
174+
175+ controlPlaneVms := getControlPlaneVMs (ctx )
176+ Expect (controlPlaneVms ).ToNot (BeEmpty ())
177+ for _ , vm := range controlPlaneVms {
178+ Expect (vm .HardwareProfile ).ToNot (BeNil ())
179+ Expect (string (vm .HardwareProfile .VMSize )).To (Equal (preResizeVMSize ))
180+ }
181+ })
182+
183+ It ("Should not attempt to resize if there is no quota" , func (ctx context.Context ) {
184+ By ("Finding a supported Master VM Size without Quota" )
185+ usageRes , err := clients .Usages .List (ctx , _env .Location ())
186+ Expect (err ).ToNot (HaveOccurred ())
187+ supportedSizes := validate .SupportedVMSizesByRole (validate .VMRoleMaster )
188+ // looking for supported vms with 0 quota
189+ targetSku := ""
190+ for size , sizeInfo := range supportedSizes {
191+ for _ , u := range usageRes {
192+ if u .Name == nil ||
193+ u .Name .Value == nil ||
194+ * u .Name .Value != sizeInfo .Family ||
195+ u .Limit == nil {
196+ continue
197+ }
198+
199+ if * u .Limit == 0 {
200+ targetSku = size .String ()
201+ }
202+ }
203+ }
204+
205+ if targetSku == "" {
206+ Skip ("Can't run test. No supported SKU without quota found" )
207+ }
208+
209+ By (fmt .Sprintf ("Trying to resize controlplane vms to %s" , targetSku ))
210+ params := url.Values {
211+ "deallocateVM" : []string {"false" },
212+ "vmSize" : []string {targetSku },
213+ }
214+
215+ out := api.CloudError {}
216+ resp , err := adminRequest (ctx , http .MethodPost , "/admin" + clusterResourceID + "/resizecontrolplane" , params , true , nil , & out )
217+
218+ Expect (err ).NotTo (HaveOccurred ())
219+ Expect (resp .StatusCode ).To (Equal (http .StatusBadRequest ))
220+ Expect (out .Message ).To (Equal ("Pre-flight validation failed." ))
221+ Expect (out .Details ).To (HaveLen (1 ))
222+ Expect (out .Details [0 ].Code ).To (Equal ("ResourceQuotaExceeded" ))
223+ })
224+
225+ It ("should do the resize when target size is different" , Label (slow ), FlakeAttempts (1 ), Serial , func (ctx context.Context ) {
226+ By ("Getting the current machine size" )
227+ preResizeVMSize := getControlPlaneVMSize (ctx )
228+ Expect (preResizeVMSize ).ToNot (BeZero ())
229+
230+ // Pick the next-larger VM size within the same family from the
231+ // supported-master list. This keeps the resize on a well-tested size
232+ // while avoiding arbitrary family swaps.
233+ targetSku , err := nextLargerSupportedMasterVMSize (preResizeVMSize )
234+ if err != nil {
235+ Skip (err .Error ())
236+ }
237+
238+ By (fmt .Sprintf ("Resizing from %s to %s" , preResizeVMSize , targetSku ))
239+ params := url.Values {
240+ "deallocateVM" : []string {"false" },
241+ "vmSize" : []string {targetSku },
242+ }
243+
244+ resp , err := adminRequest (ctx , http .MethodPost , "/admin" + clusterResourceID + "/resizecontrolplane" , params , true , nil , nil )
245+
246+ Expect (err ).NotTo (HaveOccurred ())
247+ Expect (resp .StatusCode ).To (Equal (http .StatusOK ))
248+
249+ By ("Validating vm size after resize" )
250+ controlPlaneVms := getControlPlaneVMs (ctx )
251+ Expect (controlPlaneVms ).ToNot (BeEmpty ())
252+ for _ , vm := range controlPlaneVms {
253+ Expect (vm .HardwareProfile ).ToNot (BeNil ())
254+ Expect (string (vm .HardwareProfile .VMSize )).To (Equal (targetSku ))
255+ Expect (vm .ProvisioningState ).ToNot (BeNil ())
256+ Expect (* vm .ProvisioningState ).To (Equal (string (compute .ProvisioningStateSucceeded )))
257+ }
258+
259+ By ("Validating machine and node labels" )
260+ validateMasterVMSizeLabels (ctx , targetSku )
261+ }, NodeTimeout (30 * time .Minute ))
42262})
0 commit comments