diff --git a/.github/workflows/cdn-shared-publish.yml b/.github/workflows/cdn-shared-publish.yml index d5f595c..e1b803c 100644 --- a/.github/workflows/cdn-shared-publish.yml +++ b/.github/workflows/cdn-shared-publish.yml @@ -5,24 +5,47 @@ on: workflow_call: inputs: AWS_REGION: - required: true + description: "Region for AWS resources." + required: false type: string - GHA_ROLE: - required: true + default: us-east-1 + DOMAIN: + description: "Indicates the standard CDN or a custom domain for the URL to the CDN (only standard or custom accepted)." + required: false type: string + default: standard ENVIRONMENT: + description: "The AWS environment where the resources will be deployed." required: true type: string - S3URI: + GHA_ROLE: + description: "The IAM Role linked to the OIDC connection." required: true type: string - DOMAIN: + S3URI: + description: "Legacy (deprecated) full S3 URI for the sync target in AWS." required: false type: string - default: standard + SOURCE_PATH: + description: "The path in the caller repository containing the files to sync to the S3 bucket." + required: false + type: string + default: . SYNC_PARAMS: + description: "Additional parameters for the aws s3 sync command, specific to the caller repository." required: false type: string + TARGET_PATH: + description: "The prefix in the S3 bucket to which the repository files should be synced (must start with slash)." + required: false + type: string + default: / + + +permissions: + # These are the minimum permissions to allow for OIDC connection to AWS + id-token: write + contents: read # Set defaults defaults: @@ -31,97 +54,189 @@ defaults: jobs: publish: - name: Publish content to CDN + # Start with validating the inputs from the caller workflow and prepping + # environment variables for the synchronization job. + name: Publish runs-on: ubuntu-latest - permissions: - id-token: write - contents: read steps: - - uses: actions/checkout@v4 - - - name: DEV Configure AWS credentials - # Only run this step if the environment is "dev" - if: ${{ inputs.ENVIRONMENT == 'dev' }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: actions/checkout@v6 with: - role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_DEV }}:role/${{ inputs.GHA_ROLE }} - aws-region: ${{ inputs.AWS_REGION }} - - - name: STAGE Configure AWS credentials - # Only run this step if the environment is "stage" - if: ${{ inputs.ENVIRONMENT == 'stage' }} - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_STAGE }}:role/${{ inputs.GHA_ROLE }} - aws-region: ${{ inputs.AWS_REGION }} + persist-credentials: false - - name: PROD Configure AWS credentials - # Only run this step if the environment is "prod" - if: ${{ inputs.ENVIRONMENT == 'prod' }} - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCT_PROD }}:role/${{ inputs.GHA_ROLE }} - aws-region: ${{ inputs.AWS_REGION }} - - - name: Sync custom domain CDN S3 content - # Only run this step if this is custom domain content (e.g., a folder at the root of bucket) - if: ${{ inputs.DOMAIN == 'custom' }} + - name: Validate + # Verify that the DOMAIN & ENVIRONMENT inputs are using the correct + # values. Verify that the SOURCE_PATH and TARGET_PATH inputs are + # formatted correctly and ensure this supports legacy caller workflows. + # Validate the SYNC_PARAMS input to only allow `--exclude` and + # `--include` parameters and then construct a single VALID_SYNC_PARAMS + # environment variable with proper quoting for use in the sync step. + id: validate + env: + DOMAIN: ${{ inputs.DOMAIN }} + ENVIRONMENT: ${{ inputs.ENVIRONMENT }} + SOURCE_PATH: ${{ inputs.SOURCE_PATH }} + S3URI: ${{ inputs.S3URI }} + TARGET_PATH: ${{ inputs.TARGET_PATH }} + SYNC_PARAMS: ${{ inputs.SYNC_PARAMS }} run: | - if [ '${{ inputs.SYNC_PARAMS }}' != '' ]; then - aws s3 sync . ${{ inputs.S3URI }} --delete --exclude ".github/*" --exclude ".git/*" --exclude ".gitignore" ${{ inputs.SYNC_PARAMS }} + case "$DOMAIN" in + standard|custom) + echo "Valid DOMAIN=$DOMAIN input, proceed." + ;; + *) + echo "Invalid DOMAIN=$DOMAIN input, exiting." + exit 1 + ;; + esac + case "$ENVIRONMENT" in + dev|stage|prod) + echo "Valid ENVIRONMENT=$ENVIRONMENT input, proceed." + ;; + *) + echo "Invalid ENVIRONMENT=$ENVIRONMENT input, exiting." + exit 1 + ;; + esac + if [[ "${SOURCE_PATH:0:1}" == "." ]]; then + echo "Valid SOURCE_PATH=$SOURCE_PATH, proceed." else - aws s3 sync . ${{ inputs.S3URI }} --delete --exclude ".github/*" --exclude ".git/*" --exclude ".gitignore" + echo "Invalid SOURCE_PATH=$SOURCE_PATH, exiting." + exit 1 fi - echo "Content is synchronized to ${{ inputs.S3URI }}" >> $GITHUB_STEP_SUMMARY - - name: Sync standard CDN S3 content - # Only run this step if this is standard content (e.g., a subfolder of the cdn/ folder) - if: ${{ inputs.DOMAIN == 'standard' }} - run: | - if [ '${{ inputs.SYNC_PARAMS }}' != '' ]; then - aws s3 sync ./$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') ${{ inputs.S3URI }} --delete --exclude ".github/*" --exclude ".git/*" --exclude ".gitignore" ${{ inputs.SYNC_PARAMS }} + if [[ "$S3URI" == "" ]]; then + if [[ "${TARGET_PATH:0:1}" == "/" ]]; then + echo "Valid TARGET_PATH=$TARGET_PATH, proceed." + else + echo "Invalid TARGET_PATH=$TARGET_PATH, exiting." + exit 1 + fi else - aws s3 sync ./$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') ${{ inputs.S3URI }} --delete --exclude ".github/*" --exclude ".git/*" --exclude ".gitignore" + echo "Legacy caller workflow that passed an S3_URI value." + if [[ "$DOMAIN" == "standard" ]]; then + echo "LEGACY_TARGET_PATH=/$(echo "$S3URI" | awk -F/ '{print $5}')" >> $GITHUB_ENV + echo "LEGACY_SOURCE_PATH=$(echo "$S3URI" | awk -F/ '{print $5}')" >> $GITHUB_ENV + else + echo "LEGACY_TARGET_PATH=/$(echo "$S3URI" | awk -F/ '{print $4}')" >> $GITHUB_ENV + fi + echo "LEGACY=true" >> $GITHUB_ENV fi - echo "Content is synchronized to ${{ inputs.S3URI }}" >> $GITHUB_STEP_SUMMARY - - name: Invalidate cache - run: | - if [ '${{ inputs.DOMAIN }}' == 'standard' ]; then - aws cloudfront create-invalidation --distribution-id $(aws ssm get-parameter --name "/tfvars/libraries-website/standard-cdn-id" --query 'Parameter.Value' --output text) --paths "/*" - echo "The cache for the $(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') folder has been cleared." >> $GITHUB_STEP_SUMMARY + if [[ -n "$SYNC_PARAMS" ]]; then + temp_params="${SYNC_PARAMS//--include/}" + temp_params="${temp_params//--exclude/}" + # If there's still a -- in there, it's an invalid flag + if [[ $temp_params =~ -- ]]; then + echo "Invalid SYNC_PARAMS: only --include and --exclude parameters are allowed, exiting." + exit 1 + fi + echo "Valid SYNC_PARAMS, proceed." + echo "VALID_SYNC_PARAMS=--exclude \".github/*\" --exclude \".git/*\" --exclude \".gitignore\" $SYNC_PARAMS" >> $GITHUB_ENV else - aws cloudfront create-invalidation --distribution-id $(aws ssm get-parameter --name "/tfvars/libraries-website/custom-cdn-id" --query 'Parameter.Value' --output text) --paths "/$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}')/*" - echo "The cache for the $(echo ${{ inputs.S3URI }} | awk -F/ '{print $4}') site has been cleared." >> $GITHUB_STEP_SUMMARY + echo "VALID_SYNC_PARAMS=--exclude \".github/*\" --exclude \".git/*\" --exclude \".gitignore\"" >> $GITHUB_ENV fi - - name: Generate DEV Summary - # Only run this step if the environment is "dev" - if: ${{ inputs.ENVIRONMENT == 'dev' }} + - name: Set Environment + # Prepare environment variables for the synchronization job. + id: env + env: + AWS_DEV_ACCT: ${{ secrets.AWS_ACCT_DEV }} + AWS_STAGE_ACCT: ${{ secrets.AWS_ACCT_STAGE }} + AWS_PROD_ACCT: ${{ secrets.AWS_ACCT_PROD }} + ENVIRONMENT: ${{ inputs.ENVIRONMENT }} + GHA_ROLE: ${{ inputs.GHA_ROLE }} + run: | + case "$ENVIRONMENT" in + dev) + echo "AWS_ROLE=arn:aws:iam::$AWS_DEV_ACCT:role/$GHA_ROLE" >> $GITHUB_ENV + echo "CDN_DOMAIN=dev1.mitlibrary.net" >> $GITHUB_ENV + echo "AWS_ROLE and CDN_DOMAIN set for synchronization job to Dev1" + ;; + stage) + echo "AWS_ROLE=arn:aws:iam::$AWS_STAGE_ACCT:role/$GHA_ROLE" >> $GITHUB_ENV + echo "CDN_DOMAIN=stage.mitlibrary.net" >> $GITHUB_ENV + echo "AWS_ROLE and CDN_DOMAIN set for synchronization job to Stage-Workloads" + ;; + prod) + echo "AWS_ROLE=arn:aws:iam::$AWS_PROD_ACCT:role/$GHA_ROLE" >> $GITHUB_ENV + echo "CDN_DOMAIN=libraries.mit.edu" >> $GITHUB_ENV + echo "AWS_ROLE and CDN_DOMAIN set for synchronization job to Prod-Workloads" + ;; + esac + + - name: Configure AWS Credentials + id: aws_credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + aws-region: ${{ inputs.AWS_REGION }} + role-to-assume: ${{ env.AWS_ROLE }} + + - name: Set S3 Target URI + # Set the correct S3 URI for the synchronization job + id: s3_target + env: + AWS_REGION: ${{ inputs.AWS_REGION }} + DOMAIN: ${{ inputs.DOMAIN }} + TARGET_PATH: ${{ env.LEGACY && env.LEGACY_TARGET_PATH || inputs.TARGET_PATH }} run: | - if [ '${{ inputs.DOMAIN }}' == 'standard' ]; then - echo "The updates to https://cdn.dev1.mitlibrary.net/$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') are now available" >> $GITHUB_STEP_SUMMARY + BUCKET=$(aws ssm get-parameter \ + --region "$AWS_REGION" \ + --name "/tfvars/libraries-website/cdn-origin-bucket-name" \ + --query 'Parameter.Value' \ + --output text) + if [[ "$DOMAIN" == "standard" ]]; then + echo "DISTRIBUTION_ID=$(aws ssm get-parameter \ + --name "/tfvars/libraries-website/standard-cdn-id" \ + --query 'Parameter.Value' \ + --output text)" >> $GITHUB_ENV + echo "S3_URI=s3://$BUCKET/cdn$TARGET_PATH/" >> $GITHUB_ENV else - echo "The updates to https://$(echo ${{ inputs.S3URI }} | awk -F/ '{print $4}').dev1.mitlibrary.net site are now available" >> $GITHUB_STEP_SUMMARY + echo "DISTRIBUTION_ID=$(aws ssm get-parameter \ + --name "/tfvars/libraries-website/custom-cdn-id" \ + --query 'Parameter.Value' \ + --output text)" >> $GITHUB_ENV + echo "S3_URI=s3://$BUCKET$TARGET_PATH/" >> $GITHUB_ENV fi - - name: Generate STAGE Summary - # Only run this step if the environment is "stage" - if: ${{ inputs.ENVIRONMENT == 'stage' }} + - name: Sync To CDN S3 Bucket + env: + S3_URI: ${{ env.S3_URI }} + SOURCE_PATH: ${{ env.LEGACY && env.LEGACY_SOURCE_PATH || inputs.SOURCE_PATH }} + VALID_SYNC_PARAMS: ${{ env.VALID_SYNC_PARAMS }} run: | - if [ '${{ inputs.DOMAIN }}' == 'standard' ]; then - echo "The updates to https://cdn.stage.mitlibrary.net/$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') are now available" >> $GITHUB_STEP_SUMMARY + echo "### Content synchronization to $S3_URI." >> $GITHUB_STEP_SUMMARY + if [[ "$S3_URI" == *"cdn/"* ]]; then + echo "Standard CDN content is synchronizing" else - echo "The updates to https://$(echo ${{ inputs.S3URI }} | awk -F/ '{print $4}').stage.mitlibrary.net site are now available" >> $GITHUB_STEP_SUMMARY + echo "Custom CDN content is synchronizing" fi + cd "$GITHUB_WORKSPACE" + eval "aws s3 sync \"$SOURCE_PATH\" \"$S3_URI\" \ + --delete \ + $VALID_SYNC_PARAMS" + echo "Content is synchronized to $S3_URI." >> $GITHUB_STEP_SUMMARY - - name: Generate PROD Summary - # Only run this step if the environment is "prod" - if: ${{ inputs.ENVIRONMENT == 'prod' }} + - name: Invalidate cache + env: + CDN_DOMAIN: ${{ env.CDN_DOMAIN }} + DISTRIBUTION_ID: ${{ env.DISTRIBUTION_ID }} + DOMAIN: ${{ inputs.DOMAIN }} + TARGET_PATH: ${{ env.LEGACY && env.LEGACY_TARGET_PATH || inputs.TARGET_PATH }} run: | - if [ '${{ inputs.DOMAIN }}' == 'standard' ]; then - echo "The updates to https://cdn.libraries.mit.edu/$(echo ${{ inputs.S3URI }} | awk -F/ '{print $5}') are now available" >> $GITHUB_STEP_SUMMARY + echo "### CDN cache invalidation" >> $GITHUB_STEP_SUMMARY + echo "Start CDN Cache invalidation." + INVALIDATION_ID=$(aws cloudfront create-invalidation \ + --distribution-id "$DISTRIBUTION_ID" \ + --paths "$TARGET_PATH" \ + --query 'Invalidation.Id' \ + --output text) + aws cloudfront wait invalidation-completed \ + --distribution-id "$DISTRIBUTION_ID" \ + --id "$INVALIDATION_ID" + echo "The cache has been cleared." >> $GITHUB_STEP_SUMMARY + if [[ "$DOMAIN" == "standard" ]]; then + echo "The updates to https://cdn.$CDN_DOMAIN$TARGET_PATH are now available." >> $GITHUB_STEP_SUMMARY else - echo "The updates to https://$(echo ${{ inputs.S3URI }} | awk -F/ '{print $4}').libraries.mit.edu site are now available" >> $GITHUB_STEP_SUMMARY + echo "The updates to the https://$TARGET_PATH.$CDN_DOMAIN site are now available." >> $GITHUB_STEP_SUMMARY fi diff --git a/README.md b/README.md index c923c86..79b3d74 100644 --- a/README.md +++ b/README.md @@ -266,34 +266,34 @@ It also assumes that the appropriate infrastructure is in place, particularly th ## Automated Publishing to CDN -There are multiple static HTML repositories (future-of-libraries and open-access-task-force) that will benefit from automated publishing to the S3-based CDN in our AWS Organization. The publishing automation (for both stage & prod) is handled by one shared workflow, [cdn-shared-publish.yml](./.github/workflows/cdn-shared-publish.yml), that covers all three tiers (dev/stage/prod) as well as both the standard CDN and the custom domain CDN. +There are multiple static HTML repositories ([future-of-libraries](https://github.com/MITLibraries/future-of-libraries-static) and [grandchallenges](https://github.com/MITLibraries/grandchallenges-static)) that benefit from automated publishing to the S3-based CDN in our AWS Organization. Additionally, the [web-images-static](https://github.com/MITLibraries/web-images-static) also benefits from automated publishing to the S3-based CDN. The publishing automation is handled by one shared workflow, [cdn-shared-publish.yml](./.github/workflows/cdn-shared-publish.yml) that covers all three tiers (dev/stage/prod) as well as both the standard CDN and the custom domain CDN. -This workflow assumes that the calling repository is structured in a very particular way! - -- For custom domain repos, all the content to be published to the `` folder in the S3 bucket **must** live at the root of the repository. -- For standard domain repos, all the content to be published to the `cdn/` folder in the S3 bucket **must** live in a top level folder named ``. - - For a custom domain example see [future-of-libraries-static](https://github.com/mitlibraries/future-of-libraries-static). - - For a standard CDN example see [web-images-static](https://github.com/mitlibraries/web-images-static). +- For a custom domain example see [future-of-libraries-static](https://github.com/mitlibraries/future-of-libraries-static). +- For a standard CDN example see [web-images-static](https://github.com/mitlibraries/web-images-static). ### CDN Requirements -The following values must be passed in to the shared workflow from the caller workflow: +There are a number of inputs to the shared workflow, some optional and some required. Here's a summary of the inputs. -- `AWS_REGION` (*string*, **required**): the region where the S3 bucket lives -- `DOMAIN` (*string*, **optional**): the default value is `standard` which refers to the standard CDN. If the content in question is associated with the custom domain CDN, then the caller workflow must pass the value `custom` instead of relying on the default. -- `ENVIRONMENT` (*string*, **required**): either `stage` or `prod` (this workflow is not intended for the `dev` environment) +- `AWS_REGION` (*string*, **optional**, default = `us-east-1`): the region where the S3 bucket lives +- `DOMAIN` (*string*, **optional**, default = `standard`): the default value of `standard` refers to the standard CDN. If the content in question is associated with the custom domain CDN, then the caller workflow must pass the value `custom` instead of relying on the default. +- `ENVIRONMENT` (*string*, **required**): one of `dev`, `stage`, or `prod` - `GHA_ROLE` (*string*, **required**): the OIDC role (managed by the [mitlib-tf-workloads-libraries-website](https://github.com/MITLibraries/mitlib-tf-workloads-libraries-website) repository) -- `SYNC_PARAMS` (*string*, **optional**): this is a string that is appended to the `aws s3 sync` command. If nothing is passed from the caller workflow, it is ignored. This is intended to be used for adding additional `--exclude` arguments for any other files/folders in the web content repo that shouldn't be published to the S3 bucket for the site. - - The typical use for the web dev is to exclude additional top level folders (e.g., `--exclude "docs/*"`) or exclude the top level README (`--exclude "README.md"`). - - It **can** be used to exclude everything except for one top level folder (e.g., `--exclude "*" --include "use_only_this_folder/*"`). +- **[deprecated]** `S3URI` (*string*, **optional**, **no default**): the full S3 URI (including the path) where the files should be uploaded. This was the old way of handling the target for the content synchronization. The new method for handling the sources & target are handled with `SOURCE_PATH` and `TARGET_PATH` detailed below +- `SOURCE_PATH` (*string*, **optional**, default = `.`): this is the relative path in the caller repository to the content that should be synced to the S3 bucket. The default value of `.` references the root of the repository. The combination of `SOURCE_PATH` and `TARGET_PATH` (see below) fully replace the `S3URI` input +- `SYNC_PARAMS` (*string*, **optional**, **no default**): this is a string that is appended to the `aws s3 sync` command. If nothing is passed from the caller workflow, the value for this in the workflow is `""`. This is intended to be used for adding additional `--exclude` arguments for any other files/folders in the web content repo that shouldn't be published to the S3 bucket for the site. + - The typical use for the web developer is to exclude additional top level folders (e.g., `--exclude "docs/*"`) or exclude the top level README (`--exclude "README.md"`). + - It **can** be used to exclude everything except for one top level folder (e.g., `--exclude "*" --include "use_only_this_folder/*"`) - for more details on the additional parameters that can be used for `SYNC_PARAMS` see - [AWS CLI s3 reference](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/index.html) - [AWS CLI s3 sync reference](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/sync.html) - - The fixed behavior of this workflow is to ignore the `.gitignore` file, the `.git` directory, and the `.github` directory. -- `S3URI` (*string*, **required**): the full S3 URI (including the path) where the files should be uploaded + - The fixed behavior of this workflow is to ignore the `.gitignore` file, the `.git` directory, and the `.github` directory +- `TARGET_PATH` (*string*, **optional**, default = `/`): this is the prefix in the S3 bucket where the caller repository content should be synchronized. The combination of `SOURCE_PATH` (see above) and `TARGET_PATH` fully replace the `S3URI` input To make life easy for the web developers, the [mitlib-tf-workloads-libraries-website](https://github.com/MITLibraries/mitlib-tf-workloads-libraries-website) repository generates the correct caller workflow for the custom domain sites and stores it as a Terraform output in TfCloud. This can be copy/pasted into the repository containing the content to be published to the CDN. +**NOTE**: The `S3URI` input is deprecated (replaced by `SOURCE_PATH` and `TARGET_PATH`) and will be removed once all the legacy caller workflows are updated. The default values for `SOURCE_PATH` and `TARGET_PATH` match the behavior of the `S3URI` method. + ## Automated Lambda@Edge Deployments There are multiple Lambda@Edge functions in our CloudFront distributions. The Lambda update & deployment as well as the CloudFront re-deployment (via Terraform) are centralized here to make it easier to add additional Lambda functions in the future. See [cf-lambda-deploy.yml](./.github/workflows/cf-lambda-shared-deploy.yml) for the actual workflow. See [Lambda@Edge CloudFront Deployment Model](https://mitlibraries.atlassian.net/l/cp/SP3QNj1s) for an overview of the deployment process.