diff --git a/.github/workflows/update-waf-firewall.yml b/.github/workflows/update-waf-firewall.yml new file mode 100644 index 00000000..5f59c00f --- /dev/null +++ b/.github/workflows/update-waf-firewall.yml @@ -0,0 +1,106 @@ +name: Update WAF IP blacklists + +on: + push: + branches: + - 'main' + - 'staging' + - 'waf_block_ip' + +permissions: + id-token: write + contents: read + +env: + AWS_REGION: us-east-1 + SCOPE: REGIONAL + IPSET_NAME_STAGING: ipset-block-ohm-staging + IPSET_NAME_PROD: ipset-block-ohm-production + # --- Archivos actualizados a .yaml --- + FILE_STAGING: firewall/ip-blacklist-staging.yaml + FILE_PROD: firewall/ip-blacklist-production.yaml + +jobs: + update-waf-ipset: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Install jq and yq + run: | + sudo apt-get update && sudo apt-get install -y jq + sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq + sudo chmod +x /usr/bin/yq + + - name: Resolve target env, IP set and file + id: target + run: | + # This logic remains the same, but will now point to .yaml files + if [[ "${{ github.ref_name }}" == "main" ]]; then + echo "IPSET_NAME=${IPSET_NAME_PROD}" >> $GITHUB_OUTPUT + echo "IP_FILE=${FILE_PROD}" >> $GITHUB_OUTPUT + else + echo "IPSET_NAME=${IPSET_NAME_STAGING}" >> $GITHUB_OUTPUT + echo "IP_FILE=${FILE_STAGING}" >> $GITHUB_OUTPUT + fi + + - name: Build IP list from YAML + id: iplist + shell: bash + run: | + TMP=$(mktemp) + FILE="${{ steps.target.outputs.IP_FILE }}" + if [[ ! -f "$FILE" ]]; then + echo "File $FILE not found" >&2 + exit 1 + fi + # --- Cambio principal: Usar yq para leer el YAML --- + # Extrae cada IP de la lista 'block_ips' y la pone en una nueva línea + yq '.block_ips[]' "$FILE" > "$TMP" + + # La validación y el resto del script no necesitan cambios + INVALID=$(grep -Ev '^([0-9]{1,3}\.){3}[0-9]{1,3}(/[0-9]{1,2})?$|^([0-9a-fA-F:]+)(/[0-9]{1,3})?$' "$TMP" || true) + if [[ -n "$INVALID" ]]; then + echo "Invalid entries:"; echo "$INVALID"; exit 1 + fi + + sort -u "$TMP" > "${TMP}.uniq" + LIST=$(paste -sd' ' "${TMP}.uniq") + echo "addresses=$LIST" >> $GITHUB_OUTPUT + echo "Addresses to apply:"; cat "${TMP}.uniq" + + - name: Get IP set Id and LockToken + id: getipset + run: | + NAME="${{ steps.target.outputs.IPSET_NAME }}" + DATA=$(aws wafv2 list-ip-sets --scope $SCOPE --region $AWS_REGION --query "IPSets[?Name=='${NAME}'].[Id,ARN]" --output json) + if [[ "$DATA" == "[]" ]]; then + echo "IP set ${NAME} not found in ${AWS_REGION}" >&2; exit 1 + fi + ID=$(echo "$DATA" | jq -r '.[0][0]') + LOCK=$(aws wafv2 get-ip-set --scope $SCOPE --region $AWS_REGION --id "$ID" --name "$NAME" --query "LockToken" --output text) + echo "IPSET_ID=$ID" >> $GITHUB_OUTPUT + echo "LOCK_TOKEN=$LOCK" >> $GITHUB_OUTPUT + + - name: Update IP set (replace full list) + run: | + aws wafv2 update-ip-set \ + --scope $SCOPE \ + --region $AWS_REGION \ + --id "${{ steps.getipset.outputs.IPSET_ID }}" \ + --name "${{ steps.target.outputs.IPSET_NAME }}" \ + --lock-token "${{ steps.getipset.outputs.LOCK_TOKEN }}" \ + --addresses ${{ steps.iplist.outputs.addresses }} + + - name: Summary + run: | + echo "Updated IP set: ${{ steps.target.outputs.IPSET_NAME }}" \ No newline at end of file diff --git a/firewall/README.md b/firewall/README.md new file mode 100644 index 00000000..e8fc9786 --- /dev/null +++ b/firewall/README.md @@ -0,0 +1,4 @@ +## Blocking High-Probability Bot IPs Based on Traffic Patterns + +We’re blocking IPs with a high probability of being bots. Analysis shows these IPs generated excessive traffic on the site, following clear bot-like patterns. + diff --git a/firewall/ip-blacklist-production.yaml b/firewall/ip-blacklist-production.yaml new file mode 100644 index 00000000..2f4c23b7 --- /dev/null +++ b/firewall/ip-blacklist-production.yaml @@ -0,0 +1,46 @@ +block_ips: + - 2.189.5.0/24 + - 31.46.225.0/24 + - 43.128.0.0/11 + - 46.151.194.0/24 + - 47.79.196.0/24 + - 47.79.218.0/23 + - 49.51.0.0/16 + - 101.32.0.0/15 + - 106.71.168.0/24 + - 119.28.0.0/16 + - 124.156.0.0/16 + - 129.226.0.0/16 + - 138.199.0.0/16 + - 143.244.49.0/24 + - 143.244.50.0/24 + - 143.244.56.0/24 + - 143.244.60.0/24 + - 150.109.0.0/16 + - 156.146.43.0/24 + - 156.146.56.0/24 + - 157.131.223.0/24 + - 160.238.138.0/24 + - 162.62.0.0/16 + - 162.198.71.0/24 + - 169.150.207.0/24 + - 169.150.220.0/24 + - 169.150.236.0/24 + - 169.150.247.0/24 + - 169.150.249.0/24 + - 170.106.0.0/16 + - 172.56.13.0/24 + - 173.88.145.0/24 + - 180.191.169.0/24 + - 185.59.220.0/24 + - 185.93.1.0/24 + - 185.93.2.0/24 + - 185.111.111.0/24 + - 190.197.0.0/24 + - 192.184.146.0/24 + - 195.91.2.0/24 + - 195.181.163.0/24 + - 205.194.32.0/24 + - 209.184.121.0/24 + - 212.102.40.0/24 + - 213.136.70.0/24 \ No newline at end of file diff --git a/firewall/ip-blacklist-staging.yaml b/firewall/ip-blacklist-staging.yaml new file mode 100644 index 00000000..940a2c3b --- /dev/null +++ b/firewall/ip-blacklist-staging.yaml @@ -0,0 +1,11 @@ +block_ips: + # DataForSEO Bot + - 136.243.220.208/29 + - 136.243.228.176/29 + - 136.243.228.192/29 + # Bingbot (Microsoft) + - 157.55.39.0/24 + - 207.46.13.0/24 + # Thinkbot &&& APNIC (Asia-Pacific Network Information Centre), No need acces to staging + - 43.0.0.0/8 + \ No newline at end of file diff --git a/firewall/queries/boots.sql b/firewall/queries/boots.sql new file mode 100644 index 00000000..2f6b683c --- /dev/null +++ b/firewall/queries/boots.sql @@ -0,0 +1,61 @@ +/* This query is used to create the external table for the alb logs. */ + +CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs ( + type STRING, + time STRING, + elb STRING, + client_ip STRING, + client_port INT, + target_ip STRING, + target_port INT, + request_processing_time DOUBLE, + target_processing_time DOUBLE, + response_processing_time DOUBLE, + elb_status_code INT, + target_status_code STRING, + received_bytes BIGINT, + sent_bytes BIGINT, + request_verb STRING, + request_url STRING, + request_proto STRING, + user_agent STRING, + ssl_cipher STRING, + ssl_protocol STRING, + target_group_arn STRING, + trace_id STRING, + domain_name STRING, + chosen_cert_arn STRING, + matched_rule_priority STRING, + request_creation_time STRING, + actions_executed STRING, + redirect_url STRING, + error_reason STRING, + target_port_list STRING, + target_status_code_list STRING, + classification STRING, + classification_reason STRING +) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' +WITH SERDEPROPERTIES ( + 'serialization.format' = '1', + 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"' +) +LOCATION 's3://openhistoricalmap-elb-logs/alb_production/AWSLogs/618380242247/elasticloadbalancing/us-east-1/'; + +/* This query is used to find the bots in the alb logs. */ +SELECT + user_agent, + client_ip, + COUNT(*) AS request_count +FROM + alb_logs +WHERE + from_iso8601_timestamp(time) >= (now() - interval '48' hour) + AND + (LOWER(user_agent) LIKE '%bot%' OR LOWER(user_agent) LIKE '%spider%' OR LOWER(user_agent) LIKE '%crawler%') +GROUP BY + user_agent, + client_ip +ORDER BY + request_count DESC +LIMIT 50;