Skip to content

Commit b431047

Browse files
committed
Add CloudWatch SNS monitoring scenario
- Add specification for CloudWatch monitoring with SNS alerts scenario - Implement Python scenario demonstrating alarm creation and SNS notifications - Add integration tests for scenario - Add snippet markers for documentation injection - Update SNS and CloudWatch metadata files with new scenario
1 parent 2e84d96 commit b431047

6 files changed

Lines changed: 786 additions & 0 deletions

File tree

.doc_gen/metadata/cloudwatch_metadata.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,3 +1157,45 @@ cloudwatch_Scenario_GettingStarted:
11571157
- cwt.abapv1.getting_started_with_cwt
11581158
services:
11591159
cloudwatch: {PutMetricAlarm, DisableAlarmActions, DescribeAlarms, DeleteAlarms}
1160+
1161+
cloudwatch_Scenario_SnsMonitoring:
1162+
title: Monitor infrastructure with &CW; alarms and &SNS; notifications using an &AWS; SDK
1163+
title_abbrev: Monitor infrastructure with &CW; and &SNS;
1164+
synopsis: create &CW; alarms that send &SNS; notifications when infrastructure metrics breach thresholds.
1165+
category: Scenarios
1166+
guide_topic:
1167+
title: Using &CW; alarms with &SNS;
1168+
url: cloudwatch/latest/monitoring/alarm-actions.html
1169+
languages:
1170+
Python:
1171+
versions:
1172+
- sdk_version: 3
1173+
github: python/example_code/sns
1174+
excerpts:
1175+
- description: Create an &SNS; topic and email subscription for alarm notifications.
1176+
snippet_tags:
1177+
- python.example_code.sns.CreateTopicForAlarm
1178+
- description: Create a &CW; alarm with an &SNS; action.
1179+
snippet_tags:
1180+
- python.example_code.cloudwatch.PutMetricAlarmWithSns
1181+
- description: Create a &CW; dashboard to visualize metrics.
1182+
snippet_tags:
1183+
- python.example_code.cloudwatch.PutDashboard
1184+
- description: Publish metric data to trigger the alarm.
1185+
snippet_tags:
1186+
- python.example_code.cloudwatch.PutMetricDataForAlarm
1187+
- description: Check the alarm state.
1188+
snippet_tags:
1189+
- python.example_code.cloudwatch.DescribeAlarmsState
1190+
- description: Retrieve alarm history.
1191+
snippet_tags:
1192+
- python.example_code.cloudwatch.DescribeAlarmHistory
1193+
- description: Clean up monitoring resources.
1194+
snippet_tags:
1195+
- python.example_code.cloudwatch.DeleteMonitoringResources
1196+
- description: Run the complete scenario.
1197+
snippet_tags:
1198+
- python.example_code.sns.Scenario_CloudWatchSnsMonitoring
1199+
services:
1200+
sns: {CreateTopic, Subscribe, Unsubscribe, DeleteTopic}
1201+
cloudwatch: {PutMetricAlarm, PutMetricData, DescribeAlarms, DescribeAlarmHistory, DeleteAlarms, PutDashboard, DeleteDashboards}

.doc_gen/metadata/sns_metadata.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,3 +1393,42 @@ sns_UsageSmsTopic:
13931393
- sns.java2.PublishTextSMS.main
13941394
services:
13951395
sns: {}
1396+
1397+
sns_Scenario_CloudWatchSnsMonitoring:
1398+
title: Monitor infrastructure with &CW; alarms and &SNS; notifications using an &AWS; SDK
1399+
title_abbrev: Monitor infrastructure with &CW; and &SNS;
1400+
synopsis: create &CW; alarms that send &SNS; notifications when infrastructure metrics breach thresholds.
1401+
category: Scenarios
1402+
languages:
1403+
Python:
1404+
versions:
1405+
- sdk_version: 3
1406+
github: python/example_code/sns
1407+
excerpts:
1408+
- description: Create an &SNS; topic and email subscription for alarm notifications.
1409+
snippet_tags:
1410+
- python.example_code.sns.CreateTopicForAlarm
1411+
- description: Create a &CW; alarm with an &SNS; action.
1412+
snippet_tags:
1413+
- python.example_code.cloudwatch.PutMetricAlarmWithSns
1414+
- description: Create a &CW; dashboard to visualize metrics.
1415+
snippet_tags:
1416+
- python.example_code.cloudwatch.PutDashboard
1417+
- description: Publish metric data to trigger the alarm.
1418+
snippet_tags:
1419+
- python.example_code.cloudwatch.PutMetricDataForAlarm
1420+
- description: Check the alarm state.
1421+
snippet_tags:
1422+
- python.example_code.cloudwatch.DescribeAlarmsState
1423+
- description: Retrieve alarm history.
1424+
snippet_tags:
1425+
- python.example_code.cloudwatch.DescribeAlarmHistory
1426+
- description: Clean up monitoring resources.
1427+
snippet_tags:
1428+
- python.example_code.cloudwatch.DeleteMonitoringResources
1429+
- description: Run the complete scenario.
1430+
snippet_tags:
1431+
- python.example_code.sns.Scenario_CloudWatchSnsMonitoring
1432+
services:
1433+
sns: {CreateTopic, Subscribe, Unsubscribe, DeleteTopic}
1434+
cloudwatch: {PutMetricAlarm, PutMetricData, DescribeAlarms, DescribeAlarmHistory, DeleteAlarms, PutDashboard, DeleteDashboards}
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Purpose
6+
7+
Shows how to use Amazon CloudWatch to monitor infrastructure metrics and send
8+
alerts via Amazon SNS when thresholds are breached.
9+
10+
This scenario demonstrates:
11+
1. Creating an SNS topic and email subscription for alarm notifications
12+
2. Creating a CloudWatch alarm that monitors a custom metric
13+
3. Publishing metric data to trigger alarm state changes
14+
4. Retrieving alarm history to view state transitions
15+
5. Cleaning up all created resources
16+
"""
17+
18+
import logging
19+
import sys
20+
import time
21+
from datetime import datetime, timezone
22+
23+
import boto3
24+
from botocore.exceptions import ClientError
25+
26+
sys.path.append("../..")
27+
from demo_tools import question as q
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
# snippet-start:[python.example_code.sns.Scenario_CloudWatchSnsMonitoring]
33+
class CloudWatchSnsMonitoringScenario:
34+
"""Runs an interactive scenario demonstrating CloudWatch monitoring with SNS alerts."""
35+
36+
def __init__(self, sns_resource, cloudwatch_client):
37+
"""
38+
:param sns_resource: A Boto3 SNS resource.
39+
:param cloudwatch_client: A Boto3 CloudWatch client.
40+
"""
41+
self.sns_resource = sns_resource
42+
self.cloudwatch_client = cloudwatch_client
43+
self.topic = None
44+
self.subscription = None
45+
self.alarm_name = None
46+
self.namespace = None
47+
self.metric_name = "ErrorCount"
48+
self.dashboard_name = None
49+
50+
def run_scenario(self):
51+
"""Runs the CloudWatch SNS monitoring scenario."""
52+
print("-" * 88)
53+
print("Welcome to the CloudWatch Monitoring with SNS Alerts Scenario.")
54+
print("-" * 88)
55+
print(
56+
"This scenario demonstrates how to:\n"
57+
"1. Create an SNS topic for alarm notifications\n"
58+
"2. Create a CloudWatch alarm that monitors a custom metric\n"
59+
"3. Publish metric data to trigger the alarm\n"
60+
"4. Receive email notifications when the alarm state changes\n"
61+
)
62+
63+
try:
64+
self._setup_phase()
65+
self._publish_metrics_phase()
66+
self._demonstrate_alarm_phase()
67+
except Exception as e:
68+
logger.exception("Scenario failed: %s", e)
69+
print(f"\nThe scenario encountered an error: {e}")
70+
finally:
71+
self._cleanup_phase()
72+
73+
def _setup_phase(self):
74+
"""Setup phase: Create SNS topic, subscription, alarm, and dashboard."""
75+
# snippet-start:[python.example_code.sns.CreateTopicForAlarm]
76+
print("\n" + "-" * 88)
77+
print("Setup Phase")
78+
print("-" * 88)
79+
80+
email = q.ask("Enter an email address to receive alarm notifications: ", q.non_empty)
81+
82+
print("\nCreating SNS topic...")
83+
self.topic = self.sns_resource.create_topic(Name="cloudwatch-alarms-topic")
84+
print(f"SNS topic created: {self.topic.arn}")
85+
86+
print("Creating email subscription...")
87+
self.subscription = self.topic.subscribe(Protocol="email", Endpoint=email)
88+
print("Email subscription created. Please check your email and confirm the subscription.")
89+
# snippet-end:[python.example_code.sns.CreateTopicForAlarm]
90+
91+
# snippet-start:[python.example_code.cloudwatch.PutMetricAlarmWithSns]
92+
self.alarm_name = q.ask("\nEnter a name for the CloudWatch alarm: ", q.non_empty)
93+
94+
namespace_input = input("Enter a name for the custom metric namespace (default: CustomApp/Monitoring): ")
95+
self.namespace = namespace_input.strip() if namespace_input.strip() else "CustomApp/Monitoring"
96+
97+
print(f"\nCreating CloudWatch alarm '{self.alarm_name}'...")
98+
print(f"Alarm will trigger when {self.metric_name} >= 10 for 1 evaluation period (1 minute).")
99+
100+
self.cloudwatch_client.put_metric_alarm(
101+
AlarmName=self.alarm_name,
102+
ComparisonOperator="GreaterThanOrEqualToThreshold",
103+
EvaluationPeriods=1,
104+
MetricName=self.metric_name,
105+
Namespace=self.namespace,
106+
Period=60,
107+
Statistic="Average",
108+
Threshold=10.0,
109+
ActionsEnabled=True,
110+
AlarmActions=[self.topic.arn],
111+
AlarmDescription="Alarm when error count exceeds threshold",
112+
)
113+
print("CloudWatch alarm created successfully.")
114+
# snippet-end:[python.example_code.cloudwatch.PutMetricAlarmWithSns]
115+
116+
# snippet-start:[python.example_code.cloudwatch.PutDashboard]
117+
self.dashboard_name = "monitoring-dashboard"
118+
print(f"\nCreating CloudWatch dashboard '{self.dashboard_name}'...")
119+
120+
dashboard_body = {
121+
"widgets": [
122+
{
123+
"type": "metric",
124+
"properties": {
125+
"metrics": [[self.namespace, self.metric_name]],
126+
"period": 60,
127+
"stat": "Average",
128+
"region": self.cloudwatch_client.meta.region_name,
129+
"title": "Error Count Monitoring"
130+
}
131+
}
132+
]
133+
}
134+
135+
import json
136+
self.cloudwatch_client.put_dashboard(
137+
DashboardName=self.dashboard_name,
138+
DashboardBody=json.dumps(dashboard_body)
139+
)
140+
141+
region = self.cloudwatch_client.meta.region_name
142+
print(f"Dashboard '{self.dashboard_name}' created successfully.")
143+
print(f"View at: https://console.aws.amazon.com/cloudwatch/home?region={region}#dashboards:name={self.dashboard_name}")
144+
print("-" * 88)
145+
# snippet-end:[python.example_code.cloudwatch.PutDashboard]
146+
147+
def _publish_metrics_phase(self):
148+
"""Publish metric data to demonstrate alarm triggering."""
149+
# snippet-start:[python.example_code.cloudwatch.PutMetricDataForAlarm]
150+
print("\n" + "-" * 88)
151+
print("Publishing Metric Data")
152+
print("-" * 88)
153+
154+
print("Publishing normal metric data (ErrorCount = 5)...")
155+
self.cloudwatch_client.put_metric_data(
156+
Namespace=self.namespace,
157+
MetricData=[
158+
{
159+
"MetricName": self.metric_name,
160+
"Value": 5.0,
161+
"Unit": "Count",
162+
"Timestamp": datetime.now(timezone.utc)
163+
}
164+
]
165+
)
166+
print("Metric data published successfully.")
167+
# snippet-end:[python.example_code.cloudwatch.PutMetricDataForAlarm]
168+
169+
# snippet-start:[python.example_code.cloudwatch.DescribeAlarmsState]
170+
time.sleep(5)
171+
print("\nChecking alarm state...")
172+
alarms = self.cloudwatch_client.describe_alarms(AlarmNames=[self.alarm_name])
173+
if alarms["MetricAlarms"]:
174+
alarm = alarms["MetricAlarms"][0]
175+
print(f"Current alarm state: {alarm['StateValue']}")
176+
print(f"Alarm reason: {alarm['StateReason']}")
177+
# snippet-end:[python.example_code.cloudwatch.DescribeAlarmsState]
178+
179+
print("\nPublishing high metric data to trigger alarm (ErrorCount = 15)...")
180+
self.cloudwatch_client.put_metric_data(
181+
Namespace=self.namespace,
182+
MetricData=[
183+
{
184+
"MetricName": self.metric_name,
185+
"Value": 15.0,
186+
"Unit": "Count",
187+
"Timestamp": datetime.now(timezone.utc)
188+
}
189+
]
190+
)
191+
print("Metric data published successfully.")
192+
193+
print("\nWaiting for alarm state to change (this may take up to 1 minute)...")
194+
for _ in range(12):
195+
time.sleep(5)
196+
alarms = self.cloudwatch_client.describe_alarms(AlarmNames=[self.alarm_name])
197+
if alarms["MetricAlarms"]:
198+
alarm = alarms["MetricAlarms"][0]
199+
if alarm["StateValue"] == "ALARM":
200+
print(f"Alarm state changed to: {alarm['StateValue']}")
201+
print(f"Alarm reason: {alarm['StateReason']}")
202+
print(f"\nAn email notification has been sent to your email address.")
203+
print("Check your email for the alarm notification.")
204+
break
205+
print("-" * 88)
206+
207+
def _demonstrate_alarm_phase(self):
208+
"""Retrieve and display alarm history."""
209+
# snippet-start:[python.example_code.cloudwatch.DescribeAlarmHistory]
210+
print("\n" + "-" * 88)
211+
print("Alarm History")
212+
print("-" * 88)
213+
214+
print("Retrieving alarm history...")
215+
history = self.cloudwatch_client.describe_alarm_history(
216+
AlarmName=self.alarm_name,
217+
HistoryItemType="StateUpdate",
218+
MaxRecords=5
219+
)
220+
221+
if history["AlarmHistoryItems"]:
222+
print("\nRecent alarm state changes:")
223+
for i, item in enumerate(history["AlarmHistoryItems"], 1):
224+
timestamp = item["Timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ")
225+
print(f"{i}. {timestamp}: {item['HistorySummary']}")
226+
else:
227+
print("No alarm history available yet.")
228+
print("-" * 88)
229+
# snippet-end:[python.example_code.cloudwatch.DescribeAlarmHistory]
230+
231+
def _cleanup_phase(self):
232+
"""Cleanup phase: Delete all created resources."""
233+
# snippet-start:[python.example_code.cloudwatch.DeleteMonitoringResources]
234+
if not self.alarm_name:
235+
return
236+
237+
print("\n" + "-" * 88)
238+
print("Cleanup")
239+
print("-" * 88)
240+
241+
delete_resources = q.ask(
242+
"Delete all resources created by this scenario? (y/n) ", q.is_yesno
243+
)
244+
245+
if delete_resources:
246+
try:
247+
if self.dashboard_name:
248+
print("\nDeleting CloudWatch dashboard...")
249+
self.cloudwatch_client.delete_dashboards(DashboardNames=[self.dashboard_name])
250+
print("Dashboard deleted successfully.")
251+
252+
print("\nDeleting CloudWatch alarm...")
253+
self.cloudwatch_client.delete_alarms(AlarmNames=[self.alarm_name])
254+
print("Alarm deleted successfully.")
255+
256+
if self.subscription:
257+
print("\nUnsubscribing from SNS topic...")
258+
self.subscription.delete()
259+
print("Subscription removed.")
260+
261+
if self.topic:
262+
print("\nDeleting SNS topic...")
263+
self.topic.delete()
264+
print("SNS topic deleted successfully.")
265+
266+
print("\nAll resources cleaned up successfully.")
267+
except ClientError as e:
268+
print(f"Error during cleanup: {e}")
269+
else:
270+
print("\nResources will remain active.")
271+
print(f"SNS Topic ARN: {self.topic.arn if self.topic else 'N/A'}")
272+
print(f"Alarm Name: {self.alarm_name}")
273+
274+
print("-" * 88)
275+
print("CloudWatch Monitoring with SNS Alerts scenario completed.")
276+
# snippet-end:[python.example_code.cloudwatch.DeleteMonitoringResources]
277+
278+
279+
# snippet-end:[python.example_code.sns.Scenario_CloudWatchSnsMonitoring]
280+
281+
if __name__ == "__main__":
282+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
283+
284+
scenario = CloudWatchSnsMonitoringScenario(
285+
boto3.resource("sns"),
286+
boto3.client("cloudwatch")
287+
)
288+
scenario.run_scenario()

0 commit comments

Comments
 (0)