|
| 1 | +package oci |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "time" |
| 7 | + |
| 8 | + "github.com/oracle/oci-cloud-controller-manager/pkg/flexcidr" |
| 9 | + "github.com/oracle/oci-cloud-controller-manager/pkg/oci/client" |
| 10 | + "github.com/oracle/oci-go-sdk/v65/core" |
| 11 | + "github.com/pkg/errors" |
| 12 | + "go.uber.org/zap" |
| 13 | + v1 "k8s.io/api/core/v1" |
| 14 | + "k8s.io/apimachinery/pkg/types" |
| 15 | + utilruntime "k8s.io/apimachinery/pkg/util/runtime" |
| 16 | + "k8s.io/apimachinery/pkg/util/wait" |
| 17 | + coreinformers "k8s.io/client-go/informers/core/v1" |
| 18 | + clientset "k8s.io/client-go/kubernetes" |
| 19 | + "k8s.io/client-go/tools/cache" |
| 20 | + "k8s.io/client-go/util/workqueue" |
| 21 | +) |
| 22 | + |
| 23 | +const flexCIDRRetryDelay = time.Minute |
| 24 | + |
| 25 | +type FlexCIDRController struct { |
| 26 | + nodeInformer coreinformers.NodeInformer |
| 27 | + serviceInformer coreinformers.ServiceInformer |
| 28 | + kubeClient clientset.Interface |
| 29 | + cloud *CloudProvider |
| 30 | + queue workqueue.RateLimitingInterface |
| 31 | + logger *zap.SugaredLogger |
| 32 | + instanceCache cache.Store |
| 33 | + ociClient client.Interface |
| 34 | +} |
| 35 | + |
| 36 | +func NewFlexCIDRController( |
| 37 | + nodeInformer coreinformers.NodeInformer, |
| 38 | + serviceInformer coreinformers.ServiceInformer, |
| 39 | + kubeClient clientset.Interface, |
| 40 | + cloud *CloudProvider, |
| 41 | + logger *zap.SugaredLogger, |
| 42 | + instanceCache cache.Store, |
| 43 | + ociClient client.Interface) *FlexCIDRController { |
| 44 | + |
| 45 | + controller := &FlexCIDRController{ |
| 46 | + nodeInformer: nodeInformer, |
| 47 | + serviceInformer: serviceInformer, |
| 48 | + kubeClient: kubeClient, |
| 49 | + cloud: cloud, |
| 50 | + queue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), |
| 51 | + logger: logger, |
| 52 | + instanceCache: instanceCache, |
| 53 | + ociClient: ociClient, |
| 54 | + } |
| 55 | + |
| 56 | + controller.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ |
| 57 | + AddFunc: func(obj interface{}) { |
| 58 | + node := obj.(*v1.Node) |
| 59 | + controller.queue.Add(node.Name) |
| 60 | + }, |
| 61 | + UpdateFunc: func(_, newObj interface{}) { |
| 62 | + node := newObj.(*v1.Node) |
| 63 | + controller.queue.Add(node.Name) |
| 64 | + }, |
| 65 | + }) |
| 66 | + |
| 67 | + return controller |
| 68 | +} |
| 69 | + |
| 70 | +func (fcc *FlexCIDRController) Run(stopCh <-chan struct{}) { |
| 71 | + defer utilruntime.HandleCrash() |
| 72 | + defer fcc.queue.ShutDown() |
| 73 | + |
| 74 | + fcc.logger.Info("Starting flex CIDR controller") |
| 75 | + |
| 76 | + if !cache.WaitForCacheSync(stopCh, fcc.nodeInformer.Informer().HasSynced, fcc.serviceInformer.Informer().HasSynced) { |
| 77 | + utilruntime.HandleError(fmt.Errorf("timed out waiting for flex CIDR controller caches to sync")) |
| 78 | + return |
| 79 | + } |
| 80 | + |
| 81 | + wait.Until(fcc.runWorker, time.Second, stopCh) |
| 82 | +} |
| 83 | + |
| 84 | +func (fcc *FlexCIDRController) runWorker() { |
| 85 | + for fcc.processNextItem() { |
| 86 | + } |
| 87 | +} |
| 88 | + |
| 89 | +func (fcc *FlexCIDRController) processNextItem() bool { |
| 90 | + key, quit := fcc.queue.Get() |
| 91 | + if quit { |
| 92 | + return false |
| 93 | + } |
| 94 | + defer fcc.queue.Done(key) |
| 95 | + |
| 96 | + if err := fcc.processItem(key.(string)); err != nil { |
| 97 | + fcc.logger.Errorf("Error processing flex CIDR for node %s (will retry): %v", key, err) |
| 98 | + fcc.queue.AddRateLimited(key) |
| 99 | + } else { |
| 100 | + fcc.queue.Forget(key) |
| 101 | + } |
| 102 | + |
| 103 | + return true |
| 104 | +} |
| 105 | + |
| 106 | +func (fcc *FlexCIDRController) processItem(key string) error { |
| 107 | + logger := fcc.logger.With("node", key) |
| 108 | + |
| 109 | + node, err := fcc.nodeInformer.Lister().Get(key) |
| 110 | + if err != nil { |
| 111 | + return err |
| 112 | + } |
| 113 | + |
| 114 | + if len(node.Spec.PodCIDRs) > 0 && len(node.Spec.ProviderID) == 0 { |
| 115 | + logger.Debug("node already has podCIDRs but providerID is empty, skipping") |
| 116 | + return nil |
| 117 | + } |
| 118 | + |
| 119 | + instance, instanceID, err := fcc.getInstanceByNode(node, logger) |
| 120 | + if err != nil { |
| 121 | + return err |
| 122 | + } |
| 123 | + if instance == nil { |
| 124 | + return nil |
| 125 | + } |
| 126 | + |
| 127 | + if err := fcc.instanceCache.Add(instance); err != nil { |
| 128 | + logger.With(zap.Error(err)).Debug("failed to add instance to cache") |
| 129 | + } |
| 130 | + |
| 131 | + if instance.LifecycleState != core.InstanceLifecycleStateRunning { |
| 132 | + logger.Infof("instance %s not running yet, requeueing", instanceID) |
| 133 | + fcc.queue.AddAfter(key, flexCIDRRetryDelay) |
| 134 | + return nil |
| 135 | + } |
| 136 | + |
| 137 | + config, hasConfig := flexcidr.ParsePrimaryVnicConfig(instance) |
| 138 | + if !hasConfig { |
| 139 | + logger.Debug("instance metadata does not include flex CIDR configuration, skipping") |
| 140 | + return nil |
| 141 | + } |
| 142 | + |
| 143 | + clusterIPFamily, err := flexcidr.GetClusterIpFamily(context.Background(), fcc.serviceInformer.Lister()) |
| 144 | + if err != nil { |
| 145 | + logger.With(zap.Error(err)).Info("cluster IP family not ready yet, requeueing") |
| 146 | + fcc.queue.AddAfter(key, flexCIDRRetryDelay) |
| 147 | + return nil |
| 148 | + } |
| 149 | + |
| 150 | + primaryVNIC, err := fcc.ociClient.Compute().GetPrimaryVNICForInstance(context.Background(), *instance.CompartmentId, instanceID) |
| 151 | + if err != nil { |
| 152 | + return errors.Wrap(err, "GetPrimaryVNICForInstance") |
| 153 | + } |
| 154 | + |
| 155 | + flexCIDRManager := &flexcidr.FlexCIDR{ |
| 156 | + Logger: logger, |
| 157 | + PrimaryVnicConfig: config, |
| 158 | + ClusterIpFamily: clusterIPFamily, |
| 159 | + OciCoreClient: fcc.ociClient.Networking(nil), |
| 160 | + } |
| 161 | + |
| 162 | + flexCIDRs, err := flexCIDRManager.GetOrCreateFlexCidrList(*primaryVNIC.Id) |
| 163 | + if err != nil { |
| 164 | + return err |
| 165 | + } |
| 166 | + if !flexCIDRManager.ValidateFlexCidrList(flexCIDRs) { |
| 167 | + return fmt.Errorf("computed flex CIDRs %v are invalid", flexCIDRs) |
| 168 | + } |
| 169 | + if flexcidr.StringSlicesEqualIgnoreOrder(node.Spec.PodCIDRs, flexCIDRs) { |
| 170 | + logger.Debugf("node already has expected podCIDRs %v", flexCIDRs) |
| 171 | + return nil |
| 172 | + } |
| 173 | + |
| 174 | + return flexcidr.PatchNodePodCIDRs(context.Background(), fcc.kubeClient, node.Name, flexCIDRs, logger) |
| 175 | +} |
| 176 | + |
| 177 | +func (fcc *FlexCIDRController) getInstanceByNode(node *v1.Node, logger *zap.SugaredLogger) (*core.Instance, string, error) { |
| 178 | + ctx, cancel := context.WithTimeout(context.Background(), timeout) |
| 179 | + defer cancel() |
| 180 | + |
| 181 | + providerID := node.Spec.ProviderID |
| 182 | + var err error |
| 183 | + if providerID == "" { |
| 184 | + providerID, err = fcc.cloud.InstanceID(ctx, types.NodeName(node.Name)) |
| 185 | + if err != nil { |
| 186 | + return nil, "", err |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + instanceID, err := MapProviderIDToResourceID(providerID) |
| 191 | + if err != nil { |
| 192 | + logger.With(zap.Error(err)).Error("failed to map providerID to instanceID") |
| 193 | + return nil, "", err |
| 194 | + } |
| 195 | + |
| 196 | + instance, err := fcc.ociClient.Compute().GetInstance(ctx, instanceID) |
| 197 | + if err != nil { |
| 198 | + logger.With(zap.Error(err)).Error("failed to fetch instance") |
| 199 | + return nil, "", err |
| 200 | + } |
| 201 | + |
| 202 | + return instance, instanceID, nil |
| 203 | +} |
0 commit comments