diff --git a/Makefile b/Makefile index ea64bccd6..3e948eb93 100644 --- a/Makefile +++ b/Makefile @@ -232,11 +232,11 @@ endif .PHONY: helm-dependency-update helm-dependency-update: - @helm dependency update chart/infra-server + @helm dependency update chart/infra-server >&2 create-namespaces: - @kubectl create namespace argo >/dev/null 2>&1 || echo "namespace/argo already exists"; exit 0 - @kubectl create namespace monitoring >/dev/null 2>&1 || echo "namespace/monitoring already exists"; exit 0 + @kubectl create namespace argo >/dev/null 2>&1 || echo "namespace/argo already exists" >&2; exit 0 + @kubectl create namespace monitoring >/dev/null 2>&1 || echo "namespace/monitoring already exists" >&2; exit 0 ## Render template .PHONY: helm-template diff --git a/chart/infra-server/argo-values.yaml b/chart/infra-server/argo-values.yaml index 0ac701bb8..a0802e4d2 100644 --- a/chart/infra-server/argo-values.yaml +++ b/chart/infra-server/argo-values.yaml @@ -3,6 +3,9 @@ argo-workflows: server: authModes: - server + # We install the CRDs separately, because they cannot be upgraded with Helm through the dependant chart. + crds: + install: false controller: # Default values that will apply to all Workflows from this controller, unless overridden on the Workflow-level diff --git a/chart/infra-server/static/flavors.yaml b/chart/infra-server/static/flavors.yaml index c2da01641..a95a70578 100644 --- a/chart/infra-server/static/flavors.yaml +++ b/chart/infra-server/static/flavors.yaml @@ -504,7 +504,7 @@ name: OpenShift 4.x Perf&Scale description: OpenShift 4.x Perf&Scale availability: stable - workflow: configuration/workflow-openshift-4-perf-scale.yaml + workflow: configuration/workflow-openshift-4.yaml aliases: - ocp-4-perf-scale parameters: @@ -576,6 +576,11 @@ value: false kind: optional + - name: install-hypershift + description: should Hypershift be installed + value: false + kind: optional + - name: trusted-certs-enabled description: Should trusted certificates be created value: false @@ -589,6 +594,13 @@ Consult OCP documentation for details. {{ .Chart.Annotations.ocpCredentialsMode }} is the value used by stackrox CI. + - name: keep-failed-cluster + description: Keep failed cluster + value: false + kind: optional + help: | + *Only for debugging infra issues.* If in doubt, please keep to false. + - name: ssd-storage-class description: Ensure an SSD StorageClass is the default StorageClass for the cluster value: true diff --git a/chart/infra-server/static/test-qa-demo.yaml b/chart/infra-server/static/test-qa-demo.yaml index f9c9514ea..f3ec25922 100644 --- a/chart/infra-server/static/test-qa-demo.yaml +++ b/chart/infra-server/static/test-qa-demo.yaml @@ -23,8 +23,8 @@ spec: - name: whalesay container: - image: docker/whalesay:latest - command: [cowsay] + image: busybox:latest + command: [echo] args: - "hello world to: " - '{{ "{{" }}workflow.parameters.name{{ "}}" }}' diff --git a/chart/infra-server/static/test-simulate.yaml b/chart/infra-server/static/test-simulate.yaml index ba7c5f311..f1deced94 100644 --- a/chart/infra-server/static/test-simulate.yaml +++ b/chart/infra-server/static/test-simulate.yaml @@ -26,9 +26,12 @@ spec: templates: - name: start - steps: - - - name: create - template: simulate + dag: + tasks: + - name: create + templateRef: + name: test-simulate + template: simulate arguments: parameters: - name: delay-seconds @@ -37,12 +40,20 @@ spec: value: '{{ "{{" }}workflow.parameters.create-outcome{{ "}}" }}' - name: test-gcs value: '{{ "{{"}}workflow.parameters.test-gcs{{ "}}" }}' - - - name: wait - template: wait + + - name: wait + dependencies: [create] + templateRef: + name: common + template: wait + - name: stop - steps: - - - name: destroy - template: simulate + dag: + tasks: + - name: destroy + templateRef: + name: test-simulate + template: simulate arguments: parameters: - name: delay-seconds @@ -51,60 +62,3 @@ spec: value: '{{ "{{" }}workflow.parameters.destroy-outcome{{ "}}" }}' - name: test-gcs value: '{{ "{{"}}workflow.parameters.test-gcs{{ "}}" }}' - - - name: simulate - inputs: - parameters: - - name: delay-seconds - - name: outcome - - name: test-gcs - script: - image: gcr.io/google.com/cloudsdktool/google-cloud-cli:stable - command: [bash] - source: | - set -x - - delay() { - start=0 - while sleep 1; do - if [[ $((start++)) -ge {{ "{{" }}inputs.parameters.delay-seconds{{ "}}" }} ]]; then - break - fi - done - } - - upload_or_delete_gcs_object() { - gcloud auth activate-service-account --key-file /tmp/google-credentials.json - gcloud config set core/disable_prompts True - - BUCKET_NAME="infra-e2e-upload-test" - FILE="{{ "{{" }}workflow.name{{ "}}" }}" - touch "${FILE}" - - DESTINATION="gs://${BUCKET_NAME}/${FILE}" - - if gsutil -q stat "${DESTINATION}"; then - echo "File exists. Deleting..." - gsutil rm "${DESTINATION}" - else - echo "File does not exist. Proceeding to upload." - gsutil cp "${FILE}" "${DESTINATION}" - fi - } - - if [[ "{{ "{{" }}inputs.parameters.test-gcs{{ "}}" }}" == "true" ]]; then - upload_or_delete_gcs_object - fi - - if [[ {{ "{{" }}inputs.parameters.delay-seconds{{ "}}" }} -gt 0 ]]; then - delay - fi - - [[ "{{ "{{" }}inputs.parameters.outcome{{ "}}" }}" == "success" ]] || exit 1 - - volumeMounts: - - name: credentials - mountPath: /tmp - - - name: wait - suspend: {} diff --git a/chart/infra-server/static/workflow-openshift-4-perf-scale.yaml b/chart/infra-server/static/workflow-openshift-4-perf-scale.yaml deleted file mode 100644 index 707d16f16..000000000 --- a/chart/infra-server/static/workflow-openshift-4-perf-scale.yaml +++ /dev/null @@ -1,184 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: openshift-4-perf-scale- -spec: - entrypoint: start - onExit: stop - arguments: - parameters: - - name: name - - name: openshift-version - value: "" - - name: master-node-type - value: "" - - name: master-node-count - value: "" - - name: worker-node-type - value: "" - - name: worker-node-count - value: "" - - name: region - value: "" - - name: pull-secret - value: "" - - name: fips-enabled - - name: trusted-certs-enabled - - name: credentials-mode - - name: ssd-storage-class - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 10Mi - volumes: - - name: credentials - secret: - secretName: openshift-4-gcp-service-account - - templates: - - name: start - steps: - - - name: create - template: create - - - - name: gather - template: gather - - - - name: wait - template: wait - - - name: stop - steps: - - - name: destroy - template: destroy - - - name: create - outputs: - artifacts: - - name: kubeconfig - path: /data/auth/kubeconfig - archive: - none: {} - - name: kubeadmin-password - path: /data/auth/kubeadmin-password - archive: - none: {} - - name: url - path: /data/url - archive: - none: {} - - name: dotenv - path: /data/dotenv - archive: - none: {} - - name: SSH_ACCESS - path: /data/ssh/SSH_ACCESS.md - archive: - none: {} - - name: data - path: /data - archive: - tar: {} - container: - image: quay.io/stackrox-io/ci:automation-flavors-openshift-4-{{ .Chart.Annotations.automationFlavorsVersion }} - imagePullPolicy: Always - command: - - entrypoint.sh - args: - - create - - '{{ "{{" }}workflow.parameters.name{{ "}}" }}' - - ocp.infra.rox.systems - env: - - name: GOOGLE_CREDENTIALS - valueFrom: - secretKeyRef: - name: openshift-4-gcp-service-account - key: google-credentials.json - - name: GCP_PROJECT - value : "acs-team-temp-dev" - - name: PULL_SECRET - valueFrom: - secretKeyRef: - name: redhat-pull-secret - key: REDHAT_PULL_SECRET - - name: USER_PULL_SECRET - value: '{{ "{{" }}workflow.parameters.pull-secret{{ "}}" }}' - - name: OPENSHIFT_VERSION - value: '{{ "{{" }}workflow.parameters.openshift-version{{ "}}" }}' - - name: MASTER_NODE_COUNT - value: '{{ "{{" }}workflow.parameters.master-node-count{{ "}}" }}' - - name: WORKER_NODE_COUNT - value: '{{ "{{" }}workflow.parameters.worker-node-count{{ "}}" }}' - - name: MASTER_NODE_TYPE - value: '{{ "{{" }}workflow.parameters.master-node-type{{ "}}" }}' - - name: WORKER_NODE_TYPE - value: '{{ "{{" }}workflow.parameters.worker-node-type{{ "}}" }}' - - name: REGION - value: '{{ "{{" }}workflow.parameters.region{{ "}}" }}' - - name: FIPS_ENABLED - value: '{{ "{{" }}workflow.parameters.fips-enabled{{ "}}" }}' - - name: TRUSTED_CERTS_ENABLED - value: '{{ "{{" }}workflow.parameters.trusted-certs-enabled{{ "}}" }}' - - name: CREDENTIALS_MODE - value: '{{ "{{" }}workflow.parameters.credentials-mode{{ "}}" }}' - - name: SSD_STORAGE_CLASS - value: '{{ "{{" }}workflow.parameters.ssd-storage-class{{ "}}" }}' - volumeMounts: - - name: data - mountPath: /data - - - name: gather - script: - image: busybox - command: [sh] - source: | - cd /data - . ./dotenv - echo "${OPENSHIFT_CONSOLE_URL}" > cluster-console-url - echo "${OPENSHIFT_CONSOLE_USERNAME}" > cluster-console-username - echo "${OPENSHIFT_CONSOLE_PASSWORD}" > cluster-console-password - volumeMounts: - - name: data - mountPath: /data - outputs: - artifacts: - - name: cluster-console-url - path: /data/cluster-console-url - archive: - none: {} - - name: cluster-console-username - path: /data/cluster-console-username - archive: - none: {} - - name: cluster-console-password - path: /data/cluster-console-password - archive: - none: {} - - - name: wait - suspend: {} - - - name: destroy - container: - image: quay.io/stackrox-io/ci:automation-flavors-openshift-4-{{ .Chart.Annotations.automationFlavorsVersion }} - imagePullPolicy: Always - command: - - entrypoint.sh - args: - - destroy - - '{{ "{{" }}workflow.parameters.name{{ "}}" }}' - env: - - name: GOOGLE_CREDENTIALS - valueFrom: - secretKeyRef: - name: openshift-4-gcp-service-account - key: google-credentials.json - - name: GCP_PROJECT - value : "acs-team-temp-dev" - volumeMounts: - - name: data - mountPath: /data diff --git a/chart/infra-server/templates/secrets.yaml b/chart/infra-server/templates/secrets.yaml index 2147de435..ddf06699f 100644 --- a/chart/infra-server/templates/secrets.yaml +++ b/chart/infra-server/templates/secrets.yaml @@ -46,9 +46,6 @@ data: workflow-openshift-4-demo.yaml: |- {{- tpl (.Files.Get "static/workflow-openshift-4-demo.yaml" ) . | b64enc | nindent 4 }} - workflow-openshift-4-perf-scale.yaml: |- - {{- tpl (.Files.Get "static/workflow-openshift-4-perf-scale.yaml" ) . | b64enc | nindent 4 }} - workflow-eks.yaml: |- {{- tpl (.Files.Get "static/workflow-eks.yaml" ) . | b64enc | nindent 4 }} diff --git a/chart/infra-server/templates/workflowtemplates/common.yaml b/chart/infra-server/templates/workflowtemplates/common.yaml new file mode 100644 index 000000000..a8f6bad03 --- /dev/null +++ b/chart/infra-server/templates/workflowtemplates/common.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: common + namespace: default +spec: + templates: + - name: wait + suspend: {} diff --git a/chart/infra-server/templates/workflowtemplates/test-simulate.yaml b/chart/infra-server/templates/workflowtemplates/test-simulate.yaml new file mode 100644 index 000000000..c36a5a86c --- /dev/null +++ b/chart/infra-server/templates/workflowtemplates/test-simulate.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: test-simulate + namespace: default +spec: + templates: + - name: simulate + inputs: + parameters: + - name: delay-seconds + - name: outcome + - name: test-gcs + script: + image: gcr.io/google.com/cloudsdktool/google-cloud-cli:stable + command: [bash] + source: | + set -x + + delay() { + start=0 + while sleep 1; do + if [[ $((start++)) -ge {{ "{{" }}inputs.parameters.delay-seconds{{ "}}" }} ]]; then + break + fi + done + } + + upload_or_delete_gcs_object() { + gcloud auth activate-service-account --key-file /tmp/google-credentials.json + gcloud config set core/disable_prompts True + + BUCKET_NAME="infra-e2e-upload-test" + FILE="{{ "{{" }}workflow.name{{ "}}" }}" + touch "${FILE}" + + DESTINATION="gs://${BUCKET_NAME}/${FILE}" + + if gsutil -q stat "${DESTINATION}"; then + echo "File exists. Deleting..." + gsutil rm "${DESTINATION}" + else + echo "File does not exist. Proceeding to upload." + gsutil cp "${FILE}" "${DESTINATION}" + fi + } + + if [[ "{{ "{{" }}inputs.parameters.test-gcs{{ "}}" }}" == "true" ]]; then + upload_or_delete_gcs_object + fi + + if [[ {{ "{{" }}inputs.parameters.delay-seconds{{ "}}" }} -gt 0 ]]; then + delay + fi + + [[ "{{ "{{" }}inputs.parameters.outcome{{ "}}" }}" == "success" ]] || exit 1 + + volumeMounts: + - name: credentials + mountPath: /tmp diff --git a/pkg/service/cluster/cluster.go b/pkg/service/cluster/cluster.go index c69f2dfc4..ecfeda4a8 100644 --- a/pkg/service/cluster/cluster.go +++ b/pkg/service/cluster/cluster.go @@ -733,7 +733,14 @@ func determinePodName(node v1alpha1.NodeStatus) string { parts := strings.Split(node.ID, "-") baseName := strings.Join(parts[:len(parts)-1], "-") randomNumber := parts[len(parts)-1] - return fmt.Sprintf("%s-%s-%s", baseName, node.TemplateName, randomNumber) + + var templateName string + if node.TemplateRef != nil { + templateName = node.TemplateRef.Template + } else { + templateName = node.TemplateName + } + return fmt.Sprintf("%s-%s-%s", baseName, templateName, randomNumber) } func (s *clusterImpl) startSlackCheck() { diff --git a/scripts/deploy/helm.sh b/scripts/deploy/helm.sh index df4ee2853..a13f250ed 100755 --- a/scripts/deploy/helm.sh +++ b/scripts/deploy/helm.sh @@ -25,6 +25,14 @@ check_not_empty() { done } +install_crds() { + argo_chart_file=$(find "chart/infra-server/charts" -name "argo-workflows-*.tgz" 2>/dev/null | head -1) + ARGO_WORKFLOWS_APP_VERSION="$(tar -xzOf "${argo_chart_file}" argo-workflows/Chart.yaml | yq eval '.appVersion' -)" + echo "Using argo-workflows app version: ${ARGO_WORKFLOWS_APP_VERSION}" >&2 + kubectl apply --kustomize \ + "https://github.com/argoproj/argo-workflows/manifests/base/crds/minimal?ref=${ARGO_WORKFLOWS_APP_VERSION}" >&2 +} + template() { # Need to use helm upgrade --dry-run to have .Capabilities context available helm upgrade \ @@ -102,4 +110,5 @@ diff() { } check_not_empty TASK TAG ENVIRONMENT +install_crds eval "$TASK"