Skip to content

Commit 507c339

Browse files
chore: 슬랙 웹 훅 URL 값을 도플러로 분리, alertmanager 리포지토리 등록 (#539)
* fix: 프로메테우스 v3 오탈자 수정 * chore: 슬랙 웹 훅 URL 값을 도플러로 분리. alertmanager 리포지토리 등록.
1 parent d397e2c commit 507c339

4 files changed

Lines changed: 127 additions & 2 deletions

File tree

.github/workflows/deploy.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ jobs:
135135
"GITHUB_TOKEN=$(aws ssm get-parameter --name /tt/github-token --with-decryption --query Parameter.Value --output text --region ap-northeast-2)",
136136
"echo \"$GITHUB_TOKEN\" | docker login ghcr.io -u ${{ github.actor }} --password-stdin 2>/dev/null",
137137
138+
"set -a && source doppler.env && set +a",
139+
"envsubst < monitoring/alertmanager/alertmanager.yml > /tmp/alertmanager-resolved.yml",
140+
"cp /tmp/alertmanager-resolved.yml monitoring/alertmanager/alertmanager.yml",
141+
138142
"doppler run --project \"$DOPPLER_PROJECT\" --config \"$DOPPLER_CONFIG\" -- docker compose pull",
139143
"doppler run --project \"$DOPPLER_PROJECT\" --config \"$DOPPLER_CONFIG\" -- docker compose up -d --force-recreate",
140144

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ src/main/generated/
6060
!docker/monitoring/prometheus/prometheus.yml
6161
!docker/monitoring/prometheus/rules/*.yml
6262
!docker/monitoring/grafana/provisioning/**/*.yml
63+
!docker/monitoring/alertmanager/alertmanager.yml
6364

6465
### custom ###
6566
src/main/resources/application-secret.yml

docker/docker-compose.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,6 @@ services:
396396
ports:
397397
- "9093:9093"
398398
environment:
399-
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL}
400399
volumes:
401400
- ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
402401
- alertmanager_data:/alertmanager
@@ -462,4 +461,4 @@ volumes:
462461
redis-node-6-data:
463462
prometheus_data:
464463
grafana_data:
465-
alertmanager_data:
464+
alertmanager_data:
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# 각 도메인은 자신의 SLACK_WEBHOOK_XXX 환경변수 사용
2+
3+
global:
4+
resolve_timeout: 5m
5+
6+
# 라우팅 설정
7+
route:
8+
receiver: 'slack-auth-default'
9+
group_by: [ 'alertname', 'severity', 'domain' ]
10+
group_wait: 30s
11+
group_interval: 5m
12+
repeat_interval: 4h
13+
14+
routes:
15+
# ==================== CRITICAL (즉시 알림) ====================
16+
# 계정 잠금 폭증 - 공격 가능성
17+
- match:
18+
alertname: HighAccountLockRate
19+
receiver: 'slack-security-critical'
20+
group_wait: 10s
21+
repeat_interval: 30m
22+
23+
# 보안 위협 탐지
24+
- match:
25+
alertname: SecurityThreatDetected
26+
receiver: 'slack-security-critical'
27+
group_wait: 10s
28+
repeat_interval: 30m
29+
30+
# 로그인 실패 급증 - Brute Force 가능성
31+
- match:
32+
alertname: LoginFailureSpike
33+
receiver: 'slack-security-critical'
34+
group_wait: 10s
35+
repeat_interval: 30m
36+
37+
# ==================== WARNING ====================
38+
# 로그인 성공률 저하
39+
- match:
40+
alertname: LowLoginSuccessRate
41+
receiver: 'slack-auth-warning'
42+
repeat_interval: 2h
43+
44+
# 이메일 발송 실패율 증가
45+
- match:
46+
alertname: HighEmailFailureRate
47+
receiver: 'slack-email-warning'
48+
repeat_interval: 2h
49+
50+
# 이메일 인증 실패 급증
51+
- match:
52+
alertname: HighVerificationFailureRate
53+
receiver: 'slack-email-warning'
54+
repeat_interval: 2h
55+
56+
# Rate Limit 다수 트리거
57+
- match:
58+
alertname: RateLimitTriggered
59+
receiver: 'slack-auth-warning'
60+
repeat_interval: 1h
61+
62+
# ==================== INFO ====================
63+
# 회원가입/탈퇴 통계
64+
- match:
65+
domain: member
66+
severity: info
67+
receiver: 'slack-member-info'
68+
repeat_interval: 24h
69+
70+
# ==================== Auth 도메인 수신자 ====================
71+
receivers:
72+
# 기본 (라우팅 안 된 알림)
73+
- name: 'slack-auth-default'
74+
slack_configs:
75+
- api_url: '${SLACK_WEBHOOK_AUTH}'
76+
channel: '#tt-auth-alerts'
77+
send_resolved: true
78+
79+
# 보안 Critical - 즉시 대응 필요
80+
- name: 'slack-security-critical'
81+
slack_configs:
82+
- api_url: '${SLACK_WEBHOOK_AUTH}'
83+
channel: '#tt-auth-alerts'
84+
send_resolved: true
85+
86+
# Auth Warning - 주의 필요
87+
- name: 'slack-auth-warning'
88+
slack_configs:
89+
- api_url: '${SLACK_WEBHOOK_AUTH}'
90+
channel: '#tt-auth-alerts'
91+
send_resolved: true
92+
93+
# Email Warning
94+
- name: 'slack-email-warning'
95+
slack_configs:
96+
- api_url: '${SLACK_WEBHOOK_AUTH}'
97+
channel: '#tt-auth-alerts'
98+
send_resolved: true
99+
100+
# Member 정보성 알림 (일간 리포트 등)
101+
- name: 'slack-member-info'
102+
slack_configs:
103+
- api_url: '${SLACK_WEBHOOK_AUTH}'
104+
channel: '#tt-auth-alerts'
105+
send_resolved: false
106+
107+
# 알림 억제 규칙
108+
inhibit_rules:
109+
# Critical 있으면 같은 alertname의 Warning 억제
110+
- source_match:
111+
severity: 'critical'
112+
target_match:
113+
severity: 'warning'
114+
equal: [ 'alertname' ]
115+
116+
# 전체 서비스 다운이면 개별 알림 억제
117+
- source_match:
118+
alertname: 'ServiceDown'
119+
target_match_re:
120+
alertname: '.+'
121+
equal: [ 'instance' ]

0 commit comments

Comments
 (0)