|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Checks for usage of iframe and non-cloudflare URL for videos in documentation. |
| 4 | +# Assumes that Jekyll markdown documents compile and render correctly, i.e. |
| 5 | +# no malformed HTML tags or liquid tags. |
| 6 | +set -eu |
| 7 | + |
| 8 | +source ./scripts/shellUtils.sh |
| 9 | +title "Enforce no iframe usage for videos and cloudflare CDN links" |
| 10 | +HAS_VIOLATION=false |
| 11 | + |
| 12 | +# Use diff to find all changed markdown files in the docs/ directory compared |
| 13 | +# the current HEAD commit on main. This will mirror behavior on the workflow |
| 14 | +# when the local copy of origin/main is up-to-date. We exclude the README.md |
| 15 | +# files from linting. |
| 16 | +CHANGED_FILES="$(git diff origin/main..HEAD --name-only --diff-filter=MAR -- ':docs/*.md' ':(exclude,icase)*README.md')" |
| 17 | + |
| 18 | +# RegEx to match the opening iframe tags. |
| 19 | +# Matches iframes like: <iframe src="https://myembeddedvideo.com" allowfullscreen width="10" height="10" allow="autoplay"> |
| 20 | +# Broken into 2 consecutive non-capturing groups "(?:[...])". |
| 21 | +# Non-capturing groups are part of the matching pattern, but not returned in matches. |
| 22 | +# First Group: "<iframe[^>]*?" |
| 23 | +# - Checks for the iframe opening tag <iframe, and "[^>]*" non-greedily |
| 24 | +# consumes characters except for ">" (as few characters as possible |
| 25 | +# upto the closing tag). |
| 26 | +# Second Group: OR of three attribute tags "(\s*width|\s*height|\s*src)+" |
| 27 | +# - This "+" checks for at least one of the attributes in the group, in any order. |
| 28 | +# This is what we use to check if it's an embed. |
| 29 | +# - Each attribute tag consists of a named capturing group that saves quoted value |
| 30 | +# after the "=": |
| 31 | +# - [\"\"'](?<width>[^\"\"']+)[\"\"'] |
| 32 | +# This matches the opening quotation marks followed by the named capturing |
| 33 | +# group (?<width>[....]) and the [^\"\"'] matches any character that isn't |
| 34 | +# a closing quotation mark. This doesn't allow for empty width attribute. |
| 35 | +# Followed by the closing quotation mark. |
| 36 | +# Finally, [^>]*? consumes all remaining characters after the attributes and |
| 37 | +# closes the iframe tag with ">". We consider there to be an iframe in the file |
| 38 | +# if all this is satisfied. |
| 39 | +REGEX="(?:<iframe[^>]*?)(?:\s*width=[\"\"'](?<width>[^\"\"']+)[\"\"']|\s*height=[\"\"'](?<height>[^'\"\"]+)[\"\"']|\s*src=[\"\"'](?<src>[^'\"\"]+[\"\"']))+[^>]*?>" |
| 40 | +while IFS= read -r FILE; do |
| 41 | + while IFS= read -r MATCH; do |
| 42 | + error "$FILE:$MATCH Do not use iframes for video embeds." |
| 43 | + HAS_VIOLATION=true |
| 44 | + done < <(pcregrep -n "$REGEX" "$FILE") |
| 45 | +done <<< "$CHANGED_FILES" |
| 46 | + |
| 47 | +# RegEx to match liquid Jekyll tag for included videos, and extracts the src. |
| 48 | +# Matches includes like: {% include video.html src="https://incorrectlyembeddedvideo.com" %} |
| 49 | +# |
| 50 | +# The regex begins by checking for the opening "{% include video.html" with any |
| 51 | +# white spacing that wouldn't break the liquid tag. Followed by some white space. |
| 52 | +# |
| 53 | +# Next we have one non-capturing group "(?:[...])+" for attributes "thumbnail=..." and "src=....". |
| 54 | +# We expect at least one of these elements to appear. |
| 55 | +# Each attribute is formatted as: \s*ATTR=[\"\"'](?<ATTR>[^\"\"']+)[\"\"'] |
| 56 | +# - This is zero or more whitespace followed by the opening "ATTR=", opening |
| 57 | +# quotes with non-empty string inside, and closing quotes. We capture the |
| 58 | +# value of the attribute using a named capturing group "(?<ATTR>[...])". |
| 59 | +# The attribute is closed then with some closing quotation marks "[\"\"']". |
| 60 | +# We close the regex with optional white space and ending %}. We extract |
| 61 | +REGEX="{%\s*include\s+video\.html\s+(?:\s*thumbnail=[\"\"'](?<thumbnail>[^'\"\"]+)[\"\"']|\s*src=[\"\"'](?<src>[^'\"\"]+[\"\"']))+\s*%}" |
| 62 | + |
| 63 | +# RegEx to match a cloudflare CDN URL. Expects leading customer number and trailing content ID. |
| 64 | +# We match the "https://" followed by some subdomain "(?:\S+)" of alphanumeric-characters |
| 65 | +# usually customer information, then ".cloudflarestream.com/" and the optional trailing |
| 66 | +# remaining characters "(?:\S*)" usually content ID. |
| 67 | +CDN_REGEX="https:\/\/(?:\S+)\.cloudflarestream\.com\/(?:\S*)" |
| 68 | +while IFS= read -r FILE; do |
| 69 | + while IFS= read -r MATCH; do |
| 70 | + if ! echo "$MATCH" | pcregrep -q "$CDN_REGEX"; then |
| 71 | + error "$FILE:$MATCH Video URL must be from Cloudflare CDN." |
| 72 | + HAS_VIOLATION=true |
| 73 | + fi |
| 74 | + done < <(pcregrep -n "$REGEX" "$FILE") |
| 75 | +done <<< "$CHANGED_FILES" |
| 76 | + |
| 77 | +if [[ $HAS_VIOLATION == true ]]; then |
| 78 | + error "Documentation has video violations" |
| 79 | + exit 1 |
| 80 | +fi |
| 81 | + |
| 82 | +success "No violations." |
0 commit comments