Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cmd/mapt/cmd/aws/services/snc.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ const (

sncProfile = "profile"
sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai, nvidia. The ai profile automatically includes servicemesh and serverless-serving as prerequisites and raises the minimum instance size to 16 vCPUs. The nvidia profile installs NFD and the NVIDIA GPU Operator"

operatorChannel = "operator-channel"
operatorChannelDesc = "override the OLM subscription channel for an operator (--operator-channel serverless-operator=preview,nfd=4.17)"
catalogSource = "catalog-source"
catalogSourceDesc = "override the OLM catalog source with a custom index image (--catalog-source serverless-operator=quay.io/my-org/my-index:latest)"
)

func GetOpenshiftSNCCmd() *cobra.Command {
Expand Down Expand Up @@ -92,7 +97,9 @@ func createSNC() *cobra.Command {
PullSecretFile: viper.GetString(pullSecretFile),
Timeout: viper.GetString(params.Timeout),
ServiceEndpoints: params.NetworkServiceEndpoints(),
Profiles: profiles}); err != nil {
Profiles: profiles,
OperatorChannels: viper.GetStringMapString(operatorChannel),
CatalogSources: viper.GetStringMapString(catalogSource)}); err != nil {
return err
}
return nil
Expand All @@ -107,6 +114,8 @@ func createSNC() *cobra.Command {
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc)
flagSet.StringSliceP(sncProfile, "", []string{}, sncProfileDesc)
flagSet.StringToStringP(operatorChannel, "", nil, operatorChannelDesc)
flagSet.StringToStringP(catalogSource, "", nil, catalogSourceDesc)
params.AddComputeRequestFlags(flagSet)
params.AddSpotFlags(flagSet)
params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc)
Expand Down
50 changes: 50 additions & 0 deletions docs/aws/openshift-snc.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,56 @@ Multiple profiles can be specified as a comma-separated list (e.g., `--profile v
| `nvidia` | Installs the [NVIDIA GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/openshift/latest/install-gpu-ocp.html) on the cluster. Automatically installs [Node Feature Discovery](https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/specialized_hardware_and_driver_enablement/psap-node-feature-discovery-operator) (NFD) as a prerequisite and creates a ClusterPolicy with the recommended OpenShift defaults (CRI-O runtime, OCP driver toolkit). The cluster must run on a GPU-capable instance type (e.g. `g4dn`, `g5`, `p4d`).|


### Operator overrides

Profiles install operators using the default OLM channel (`stable`) and catalog (`redhat-operators`). Two flags allow overriding these per operator, which is useful for testing pre-release operator builds:

#### `--operator-channel`

Override the OLM subscription channel for a specific operator:

```bash
mapt aws openshift-snc create \
--profile serverless-serving \
--operator-channel serverless-operator=candidate
```

Multiple operators can be overridden at once:

```bash
--operator-channel serverless-operator=preview,nfd=4.17
```

#### `--catalog-source`

Use a custom index image instead of the default catalog. This creates a `CatalogSource` CR in `openshift-marketplace` and points the operator's subscription to it:

```bash
mapt aws openshift-snc create \
--profile nvidia \
--catalog-source gpu-operator-certified=quay.io/my-team/gpu-operator-index:test-v1.0
```

Both flags can be combined:

```bash
mapt aws openshift-snc create \
--profile ai \
--operator-channel serverless-operator=candidate \
--catalog-source rhods-operator=quay.io/my-team/rhoai-index:nightly
```

When neither flag is provided, operators use the defaults: channel `stable` and catalog `redhat-operators` (unless overridden in the profile definition, e.g. `gpu-operator-certified` and `nfd` use `certified-operators`).

The keys are operator package names as they appear in OLM. The operators installed by each profile are:

| Profile | Operator package names |
|---------|----------------------|
| `serverless-serving` / `serverless-eventing` / `serverless` | `serverless-operator` |
| `servicemesh` | `servicemeshoperator3` |
| `ai` | `rhods-operator`, `servicemeshoperator`, `authorino-operator`, `serverless-operator` |
| `nvidia` | `gpu-operator-certified`, `nfd` |

### Adding new profiles

To add a new profile:
Expand Down
27 changes: 18 additions & 9 deletions pkg/provider/aws/action/snc/snc.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ type openshiftSNCRequest struct {
pullSecretFile *string
serviceEndpoints []string
allocationData *allocation.AllocationResult
profiles []string
diskSize *int
profiles []string
operatorChannels map[string]string
catalogSources map[string]string
diskSize *int
}

func (r *openshiftSNCRequest) validate() error {
Expand All @@ -67,10 +69,13 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiSNC.SNCArgs) (_ *apiSNC.SNCResult
if err != nil {
return nil, err
}
// Validate profiles
// Validate profiles and operator overrides
if err := profile.Validate(args.Profiles); err != nil {
return nil, err
}
if err := profile.ValidateOperatorOverrides(args.OperatorChannels, args.CatalogSources); err != nil {
return nil, err
}
// Compose request
prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main")
r := openshiftSNCRequest{
Expand All @@ -82,8 +87,10 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiSNC.SNCArgs) (_ *apiSNC.SNCResult
pullSecretFile: &args.PullSecretFile,
timeout: &args.Timeout,
serviceEndpoints: args.ServiceEndpoints,
profiles: args.Profiles,
diskSize: args.ComputeRequest.DiskSize}
profiles: args.Profiles,
operatorChannels: args.OperatorChannels,
catalogSources: args.CatalogSources,
diskSize: args.ComputeRequest.DiskSize}
if args.Spot != nil {
r.spot = args.Spot.Spot
}
Expand Down Expand Up @@ -290,10 +297,12 @@ func (r *openshiftSNCRequest) deploy(ctx *pulumi.Context) error {
deletedWith = c.AutoscalingGroup
}
if err := profile.Deploy(ctx, r.profiles, &profile.DeployArgs{
K8sProvider: k8sProvider,
Kubeconfig: kubeconfig,
Prefix: *r.prefix,
DeletedWith: deletedWith,
K8sProvider: k8sProvider,
Kubeconfig: kubeconfig,
Prefix: *r.prefix,
DeletedWith: deletedWith,
OperatorChannels: r.operatorChannels,
CatalogSources: r.catalogSources,
}); err != nil {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/target/service/snc/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ type SNCArgs struct {
Spot *spotTypes.SpotArgs
Timeout string
ServiceEndpoints []string
Profiles []string
Profiles []string
OperatorChannels map[string]string
CatalogSources map[string]string
}

type SNCResults struct {
Expand Down
10 changes: 10 additions & 0 deletions pkg/target/service/snc/profile/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,18 @@ func installOperator(ctx *pulumi.Context, args *DeployArgs, oi operatorInstall)
catalogSource = catalogSourceRedHat
}

if override, ok := args.OperatorChannels[oi.packageName]; ok {
channel = override
}
if cs, ok := args.catalogSourceCRs[oi.packageName]; ok {
catalogSource = cs.Name
}

deps := append([]pulumi.Resource{}, args.Deps...)
deps = append(deps, oi.extraDeps...)
if cs, ok := args.catalogSourceCRs[oi.packageName]; ok {
deps = append(deps, cs.Resource)
}

// If ogName is provided, create a dedicated namespace and OperatorGroup.
if oi.ogName != "" {
Expand Down
68 changes: 68 additions & 0 deletions pkg/target/service/snc/profile/profile.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package profile

import (
"crypto/sha256"
"fmt"
"maps"
"slices"

"github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes"
"github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions"
corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1"
metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
Expand Down Expand Up @@ -63,6 +65,18 @@ type DeployArgs struct {
// so that Pulumi skips deleting them individually during destroy — the
// resources disappear when the VM is terminated.
DeletedWith pulumi.Resource
// OperatorChannels maps operator packageName to an OLM channel override.
OperatorChannels map[string]string
// CatalogSources maps operator packageName to a custom index image URL.
CatalogSources map[string]string

// catalogSourceCRs maps packageName to the CatalogSource CR info.
catalogSourceCRs map[string]catalogSourceInfo
}

type catalogSourceInfo struct {
Name string
Resource pulumi.Resource
}

// Validate checks that all requested profiles are supported and
Expand All @@ -88,6 +102,10 @@ func Validate(profiles []string) error {
// The AI profile implicitly brings in Service Mesh v2 (Maistra) and
// serverless-serving as prerequisites for Kserve.
func Deploy(ctx *pulumi.Context, profiles []string, args *DeployArgs) error {
if err := args.ensureCatalogSources(ctx); err != nil {
return err
}

needServing := false
needEventing := false
needAI := false
Expand Down Expand Up @@ -194,6 +212,56 @@ func (a *DeployArgs) newNamespace(ctx *pulumi.Context, name string, nsName pulum
a.k8sOpts(extra...)...)
}

func ValidateOperatorOverrides(channels, catalogs map[string]string) error {
for pkg, ch := range channels {
if pkg == "" || ch == "" {
return fmt.Errorf("invalid --operator-channel: both package name and channel must be non-empty (got %q=%q)", pkg, ch)
}
}
for pkg, img := range catalogs {
if pkg == "" || img == "" {
return fmt.Errorf("invalid --catalog-source: both package name and index image must be non-empty (got %q=%q)", pkg, img)
}
}
return nil
}

// ensureCatalogSources creates CatalogSource CRs for any custom index images
// specified via --catalog-source, so that operator subscriptions can reference them.
func (a *DeployArgs) ensureCatalogSources(ctx *pulumi.Context) error {
if len(a.CatalogSources) == 0 {
return nil
}
a.catalogSourceCRs = make(map[string]catalogSourceInfo, len(a.CatalogSources))
for pkg, indexImage := range a.CatalogSources {
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(indexImage)))[:8]
csName := fmt.Sprintf("mapt-cs-%s-%s", pkg, hash)
cs, err := apiextensions.NewCustomResource(ctx, csName,
&apiextensions.CustomResourceArgs{
ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"),
Kind: pulumi.String("CatalogSource"),
Metadata: &metav1.ObjectMetaArgs{
Name: pulumi.String(csName),
Namespace: pulumi.String("openshift-marketplace"),
},
OtherFields: map[string]interface{}{
"spec": map[string]interface{}{
"sourceType": "grpc",
"image": indexImage,
"displayName": fmt.Sprintf("MAPT custom catalog for %s", pkg),
"publisher": "MAPT",
},
},
},
a.k8sOpts(pulumi.DependsOn(a.Deps))...)
if err != nil {
return err
}
a.catalogSourceCRs[pkg] = catalogSourceInfo{Name: csName, Resource: cs}
}
return nil
}

// k8sOpts returns the common Pulumi resource options for K8s resources:
// the K8s provider and (when set) the DeletedWith option. Extra options
// (e.g. DependsOn) can be appended.
Expand Down