Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions .github/workflows/update-waf-firewall.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
name: Update WAF IP blacklists

on:
push:
branches:
- 'main'
- 'staging'
- 'waf_block_ip'

permissions:
id-token: write
contents: read

env:
AWS_REGION: us-east-1
SCOPE: REGIONAL
IPSET_NAME_STAGING: ipset-block-ohm-staging
IPSET_NAME_PROD: ipset-block-ohm-production
# --- Archivos actualizados a .yaml ---
FILE_STAGING: firewall/ip-blacklist-staging.yaml
FILE_PROD: firewall/ip-blacklist-production.yaml

jobs:
update-waf-ipset:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}

- name: Install jq and yq
run: |
sudo apt-get update && sudo apt-get install -y jq
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq
sudo chmod +x /usr/bin/yq

- name: Resolve target env, IP set and file
id: target
run: |
# This logic remains the same, but will now point to .yaml files
if [[ "${{ github.ref_name }}" == "main" ]]; then
echo "IPSET_NAME=${IPSET_NAME_PROD}" >> $GITHUB_OUTPUT
echo "IP_FILE=${FILE_PROD}" >> $GITHUB_OUTPUT
else
echo "IPSET_NAME=${IPSET_NAME_STAGING}" >> $GITHUB_OUTPUT
echo "IP_FILE=${FILE_STAGING}" >> $GITHUB_OUTPUT
fi

- name: Build IP list from YAML
id: iplist
shell: bash
run: |
TMP=$(mktemp)
FILE="${{ steps.target.outputs.IP_FILE }}"
if [[ ! -f "$FILE" ]]; then
echo "File $FILE not found" >&2
exit 1
fi
# --- Cambio principal: Usar yq para leer el YAML ---
# Extrae cada IP de la lista 'block_ips' y la pone en una nueva línea
yq '.block_ips[]' "$FILE" > "$TMP"

# La validación y el resto del script no necesitan cambios
INVALID=$(grep -Ev '^([0-9]{1,3}\.){3}[0-9]{1,3}(/[0-9]{1,2})?$|^([0-9a-fA-F:]+)(/[0-9]{1,3})?$' "$TMP" || true)
if [[ -n "$INVALID" ]]; then
echo "Invalid entries:"; echo "$INVALID"; exit 1
fi

sort -u "$TMP" > "${TMP}.uniq"
LIST=$(paste -sd' ' "${TMP}.uniq")
echo "addresses=$LIST" >> $GITHUB_OUTPUT
echo "Addresses to apply:"; cat "${TMP}.uniq"

- name: Get IP set Id and LockToken
id: getipset
run: |
NAME="${{ steps.target.outputs.IPSET_NAME }}"
DATA=$(aws wafv2 list-ip-sets --scope $SCOPE --region $AWS_REGION --query "IPSets[?Name=='${NAME}'].[Id,ARN]" --output json)
if [[ "$DATA" == "[]" ]]; then
echo "IP set ${NAME} not found in ${AWS_REGION}" >&2; exit 1
fi
ID=$(echo "$DATA" | jq -r '.[0][0]')
LOCK=$(aws wafv2 get-ip-set --scope $SCOPE --region $AWS_REGION --id "$ID" --name "$NAME" --query "LockToken" --output text)
echo "IPSET_ID=$ID" >> $GITHUB_OUTPUT
echo "LOCK_TOKEN=$LOCK" >> $GITHUB_OUTPUT

- name: Update IP set (replace full list)
run: |
aws wafv2 update-ip-set \
--scope $SCOPE \
--region $AWS_REGION \
--id "${{ steps.getipset.outputs.IPSET_ID }}" \
--name "${{ steps.target.outputs.IPSET_NAME }}" \
--lock-token "${{ steps.getipset.outputs.LOCK_TOKEN }}" \
--addresses ${{ steps.iplist.outputs.addresses }}

- name: Summary
run: |
echo "Updated IP set: ${{ steps.target.outputs.IPSET_NAME }}"
4 changes: 4 additions & 0 deletions firewall/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## Blocking High-Probability Bot IPs Based on Traffic Patterns

We’re blocking IPs with a high probability of being bots. Analysis shows these IPs generated excessive traffic on the site, following clear bot-like patterns.

46 changes: 46 additions & 0 deletions firewall/ip-blacklist-production.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
block_ips:
- 2.189.5.0/24
- 31.46.225.0/24
- 43.128.0.0/11
- 46.151.194.0/24
- 47.79.196.0/24
- 47.79.218.0/23
- 49.51.0.0/16
- 101.32.0.0/15
- 106.71.168.0/24
- 119.28.0.0/16
- 124.156.0.0/16
- 129.226.0.0/16
- 138.199.0.0/16
- 143.244.49.0/24
- 143.244.50.0/24
- 143.244.56.0/24
- 143.244.60.0/24
- 150.109.0.0/16
- 156.146.43.0/24
- 156.146.56.0/24
- 157.131.223.0/24
- 160.238.138.0/24
- 162.62.0.0/16
- 162.198.71.0/24
- 169.150.207.0/24
- 169.150.220.0/24
- 169.150.236.0/24
- 169.150.247.0/24
- 169.150.249.0/24
- 170.106.0.0/16
- 172.56.13.0/24
- 173.88.145.0/24
- 180.191.169.0/24
- 185.59.220.0/24
- 185.93.1.0/24
- 185.93.2.0/24
- 185.111.111.0/24
- 190.197.0.0/24
- 192.184.146.0/24
- 195.91.2.0/24
- 195.181.163.0/24
- 205.194.32.0/24
- 209.184.121.0/24
- 212.102.40.0/24
- 213.136.70.0/24
11 changes: 11 additions & 0 deletions firewall/ip-blacklist-staging.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
block_ips:
# DataForSEO Bot
- 136.243.220.208/29
- 136.243.228.176/29
- 136.243.228.192/29
# Bingbot (Microsoft)
- 157.55.39.0/24
- 207.46.13.0/24
# Thinkbot &&& APNIC (Asia-Pacific Network Information Centre), No need acces to staging
- 43.0.0.0/8

61 changes: 61 additions & 0 deletions firewall/queries/boots.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/* This query is used to create the external table for the alb logs. */

CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs (
type STRING,
time STRING,
elb STRING,
client_ip STRING,
client_port INT,
target_ip STRING,
target_port INT,
request_processing_time DOUBLE,
target_processing_time DOUBLE,
response_processing_time DOUBLE,
elb_status_code INT,
target_status_code STRING,
received_bytes BIGINT,
sent_bytes BIGINT,
request_verb STRING,
request_url STRING,
request_proto STRING,
user_agent STRING,
ssl_cipher STRING,
ssl_protocol STRING,
target_group_arn STRING,
trace_id STRING,
domain_name STRING,
chosen_cert_arn STRING,
matched_rule_priority STRING,
request_creation_time STRING,
actions_executed STRING,
redirect_url STRING,
error_reason STRING,
target_port_list STRING,
target_status_code_list STRING,
classification STRING,
classification_reason STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"'
)
LOCATION 's3://openhistoricalmap-elb-logs/alb_production/AWSLogs/618380242247/elasticloadbalancing/us-east-1/';

/* This query is used to find the bots in the alb logs. */
SELECT
user_agent,
client_ip,
COUNT(*) AS request_count
FROM
alb_logs
WHERE
from_iso8601_timestamp(time) >= (now() - interval '48' hour)
AND
(LOWER(user_agent) LIKE '%bot%' OR LOWER(user_agent) LIKE '%spider%' OR LOWER(user_agent) LIKE '%crawler%')
GROUP BY
user_agent,
client_ip
ORDER BY
request_count DESC
LIMIT 50;