Skip to content

Commit 7260eb4

Browse files
authored
feat(aws): AWSX-2034 Continue if we fail to validate api key but storage of failed event enabled (#1026)
* feat(aws): AWSX-2034 Continue if we fail to validate api key but storage of failed event enabled Signed-off-by: Vincent Boutour <vincent.boutour@datadoghq.com> * feat(aws): AWSX-2034 Adding failed metric for event Signed-off-by: Vincent Boutour <vincent.boutour@datadoghq.com> * fixup! feat(aws): AWSX-2034 Continue if we fail to validate api key but storage of failed event enabled Signed-off-by: Vincent Boutour <vincent.boutour@datadoghq.com> --------- Signed-off-by: Vincent Boutour <vincent.boutour@datadoghq.com>
1 parent 987f806 commit 7260eb4

9 files changed

Lines changed: 90 additions & 61 deletions

File tree

.github/workflows/aws_unit_test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
AWS_DEFAULT_REGION: us-east-1
1919
DD_API_KEY: "11111111111111111111111111111111"
2020
DD_ADDITIONAL_TARGET_LAMBDAS: "ironmaiden,megadeth"
21+
DD_STORE_FAILED_EVENTS: "true"
2122
run: |
2223
pip install boto3 mock approvaltests
2324
python -m unittest discover ./aws/logs_monitoring/

aws/logs_monitoring/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ If you can't install the Forwarder using the provided CloudFormation template, y
9595
8. Set environment variable `DD_STORE_FAILED_EVENTS` to `true` to enable the forwarder to also store event data in the S3 bucket. In case of exceptions when sending logs, metrics or traces to intake, the forwarder will store relevant data in the S3 bucket. On custom invocations i.e. on receiving an event with the `retry` keyword set to a non empty string (which can be manually triggered - see below), the forwarder will retry sending the stored events. When successful it will clear up the storage in the bucket.
9696

9797
```bash
98-
aws lambda invoke --function-name <function-name> --payload '{"retry":"true"}' out
98+
aws lambda invoke --function-name <function-name> --payload '{"retry":"true"}' --cli-binary-format raw-in-base64-out --log-type Tail /dev/stdout
9999
```
100100

101101
<div class="alert alert-warning">

aws/logs_monitoring/forwarder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ def _forward_logs(self, logs, key=None):
128128
if DD_STORE_FAILED_EVENTS and len(failed_logs) > 0 and not key:
129129
self.storage.store_data(RetryPrefix.LOGS, failed_logs)
130130

131+
if len(failed_logs) > 0:
132+
send_event_metric("logs_failed", failed_logs)
133+
131134
send_event_metric("logs_forwarded", len(logs_to_forward) - len(failed_logs))
132135

133136
def _forward_metrics(self, metrics, key=None):
@@ -156,6 +159,9 @@ def _forward_metrics(self, metrics, key=None):
156159
if DD_STORE_FAILED_EVENTS and len(failed_metrics) > 0 and not key:
157160
self.storage.store_data(RetryPrefix.METRICS, failed_metrics)
158161

162+
if len(failed_metrics) > 0:
163+
send_event_metric("metrics_failed", failed_metrics)
164+
159165
send_event_metric("metrics_forwarded", len(metrics) - len(failed_metrics))
160166

161167
def _forward_traces(self, traces, key=None):

aws/logs_monitoring/lambda_function.py

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from hashlib import sha1
1010

1111
import boto3
12-
import requests
1312
from datadog import api
1413
from datadog_lambda.wrapper import datadog_lambda_wrapper
1514

@@ -22,8 +21,9 @@
2221
DD_API_URL,
2322
DD_FORWARDER_VERSION,
2423
DD_RETRY_KEYWORD,
25-
DD_SITE,
2624
DD_SKIP_SSL_VALIDATION,
25+
DD_STORE_FAILED_EVENTS,
26+
is_api_key_valid,
2727
)
2828
from steps.enrichment import enrich
2929
from steps.parsing import parse
@@ -33,42 +33,12 @@
3333
logger = logging.getLogger()
3434
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))
3535

36-
# DD_API_KEY must be set
37-
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
38-
raise Exception(
39-
"Missing Datadog API key. Set DD_API_KEY environment variable. "
40-
"See: https://docs.datadoghq.com/serverless/forwarder/"
41-
)
42-
# Check if the API key is the correct number of characters
43-
if len(DD_API_KEY) != 32:
44-
raise Exception(
45-
f"""
46-
Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}.
47-
Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys
48-
"""
49-
)
50-
# Validate the API key
51-
logger.debug("Validating the Datadog API key")
5236

53-
with requests.Session() as s:
54-
retries = requests.adapters.Retry(
55-
total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
37+
if not is_api_key_valid() and not DD_STORE_FAILED_EVENTS:
38+
raise Exception(
39+
"Failed to check if API Key is valid and no storage of failed events, aborting."
5640
)
5741

58-
s.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
59-
s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
60-
61-
validation_res = s.get(
62-
"{}/api/v1/validate?api_key={}".format(DD_API_URL, DD_API_KEY),
63-
verify=(not DD_SKIP_SSL_VALIDATION),
64-
timeout=10,
65-
)
66-
if not validation_res.ok:
67-
raise Exception(
68-
f"Datadog API key validation failed (HTTP {validation_res.status_code}). "
69-
f"Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). "
70-
"See: https://docs.datadoghq.com/getting_started/site/"
71-
)
7242

7343
# Force the layer to use the exact same API key and host as the forwarder
7444
api._api_key = DD_API_KEY

aws/logs_monitoring/logs/datadog_http_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(
6868
self._session = None
6969
self._ssl_validation = not skip_ssl_validation
7070
self._futures = []
71+
7172
if logger.isEnabledFor(logging.DEBUG):
7273
logger.debug(
7374
f"Initialized http client for logs intake: "

aws/logs_monitoring/settings.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import boto3
1212
import botocore.config
13+
import requests
1314

1415
logger = logging.getLogger()
1516
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))
@@ -230,6 +231,49 @@ def __init__(self, name, pattern, placeholder, enabled=True):
230231
DD_API_KEY = DD_API_KEY.strip()
231232
os.environ["DD_API_KEY"] = DD_API_KEY
232233

234+
235+
def is_api_key_valid():
236+
# DD_API_KEY must be set
237+
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
238+
raise Exception(
239+
"Missing Datadog API key. Set DD_API_KEY environment variable. "
240+
"See: https://docs.datadoghq.com/serverless/forwarder/"
241+
)
242+
243+
# Check if the API key is the correct number of characters
244+
if len(DD_API_KEY) != 32:
245+
raise Exception(
246+
f"""
247+
Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}.
248+
Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys
249+
"""
250+
)
251+
252+
# Validate the API key
253+
logger.debug("Validating the Datadog API key")
254+
255+
with requests.Session() as s:
256+
retries = requests.adapters.Retry(
257+
total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
258+
)
259+
260+
s.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
261+
s.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
262+
263+
validation_res = s.get(
264+
"{}/api/v1/validate?api_key={}".format(DD_API_URL, DD_API_KEY),
265+
verify=(not DD_SKIP_SSL_VALIDATION),
266+
timeout=10,
267+
)
268+
if not validation_res.ok:
269+
logger.error(
270+
f"Datadog API key validation failed (HTTP {validation_res.status_code}). Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). See: https://docs.datadoghq.com/getting_started/site/"
271+
)
272+
return False
273+
274+
return True
275+
276+
233277
# DD_MULTILINE_LOG_REGEX_PATTERN: Multiline Log Regular Expression Pattern
234278
DD_MULTILINE_LOG_REGEX_PATTERN = get_env_var(
235279
"DD_MULTILINE_LOG_REGEX_PATTERN", default=None

aws/logs_monitoring/tests/run_unit_tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22

33
export DD_API_KEY=11111111111111111111111111111111
44
export DD_ADDITIONAL_TARGET_LAMBDAS=ironmaiden,megadeth
5+
export DD_STORE_FAILED_EVENTS="true"
56
export DD_S3_BUCKET_NAME=dd-s3-bucket
67
python3 -m unittest discover .

aws/logs_monitoring/tools/integration_tests/docker-compose.yml

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,50 +27,54 @@ services:
2727
AWS_SECURITY_TOKEN: "${AWS_SECURITY_TOKEN}"
2828
AWS_SESSION_TOKEN: "${AWS_SESSION_TOKEN}"
2929
AWS_DEFAULT_REGION: us-east-1
30-
DD_LOG_LEVEL: ${LOG_LEVEL:-info}
30+
DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}"
3131
DD_API_KEY: abcdefghijklmnopqrstuvwxyz012345 # Must be 32 characters exactly
32-
DD_URL: recorder # Used for logs intake
33-
DD_PORT: 8080 # API port to use
34-
DD_SITE: datadog.com
3532
DD_API_URL: http://recorder:8080
36-
DD_LOGS_INTAKE_URL: recorder:8080
37-
DD_TRACE_INTAKE_URL: http://recorder:8080
38-
DD_NO_SSL: "true"
39-
DD_SKIP_SSL_VALIDATION: "true"
40-
DD_USE_COMPRESSION: "false"
41-
DD_ADDITIONAL_TARGET_LAMBDAS: "${EXTERNAL_LAMBDAS}"
42-
DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}"
4333
DD_FETCH_LAMBDA_TAGS: "${DD_FETCH_LAMBDA_TAGS:-false}"
4434
DD_FETCH_LOG_GROUP_TAGS: "${DD_FETCH_LOG_GROUP_TAGS:-false}"
4535
DD_FETCH_STEP_FUNCTIONS_TAGS: "${DD_FETCH_STEP_FUNCTIONS_TAGS:-false}"
46-
DD_STORE_FAILED_EVENTS: "false"
36+
DD_LOG_LEVEL: ${LOG_LEVEL:-info}
37+
DD_LOGS_INTAKE_URL: recorder:8080
38+
DD_NO_SSL: "true"
39+
DD_PORT: 8080 # API port to use
40+
DD_S3_BUCKET_NAME: "${DD_S3_BUCKET_NAME}"
41+
DD_SITE: datadog.com
42+
DD_SKIP_SSL_VALIDATION: "true"
43+
DD_STORE_FAILED_EVENTS: "${DD_STORE_FAILED_EVENTS:-true}"
4744
DD_TRACE_ENABLED: "true"
45+
DD_TRACE_INTAKE_URL: http://recorder:8080
46+
DD_URL: recorder # Used for logs intake
47+
DD_USE_COMPRESSION: "false"
4848
expose:
4949
- 8080
5050
depends_on:
5151
recorder:
5252
condition: service_healthy
5353
healthcheck:
54-
test: ["CMD", "curl", "-f", "http://localhost:8080/2015-03-31/functions/function/invocations"]
55-
interval: 10s
56-
timeout: 5s
57-
retries: 3
58-
54+
test:
55+
[
56+
"CMD",
57+
"curl",
58+
"-f",
59+
"http://localhost:8080/2015-03-31/functions/function/invocations",
60+
]
61+
interval: 10s
62+
timeout: 5s
5963

6064
tester:
6165
image: ${PYTHON_BASE}
6266
command: /bin/sh -c 'pip install "deepdiff<6" && python -m unittest discover'
6367
volumes:
6468
- ./tester:/tester
65-
- ${SNAPSHOTS_DIR_NAME}:/snapshots
69+
- "${SNAPSHOTS_DIR_NAME}:/snapshots"
6670
working_dir: /tester
6771
environment:
68-
RECORDER_URL: http://recorder:8080/recording
6972
FORWARDER_URL: http://forwarder:8080/2015-03-31/functions/function/invocations
70-
UPDATE_SNAPSHOTS: ${UPDATE_SNAPSHOTS:-false}
71-
SNAPSHOTS_DIR_NAME: ${SNAPSHOTS_DIR_NAME}
73+
RECORDER_URL: http://recorder:8080/recording
74+
SNAPSHOTS_DIR_NAME: "${SNAPSHOTS_DIR_NAME}"
75+
UPDATE_SNAPSHOTS: "${UPDATE_SNAPSHOTS:-false}"
7276
depends_on:
73-
forwarder:
74-
condition: service_healthy
75-
recorder:
76-
condition: service_healthy
77+
forwarder:
78+
condition: service_healthy
79+
recorder:
80+
condition: service_healthy

aws/logs_monitoring/tools/integration_tests/integration_tests.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ CACHE_TEST=false
2323
DD_FETCH_LAMBDA_TAGS="true"
2424
DD_FETCH_LOG_GROUP_TAGS="true"
2525
DD_FETCH_STEP_FUNCTIONS_TAGS="true"
26+
DD_STORE_FAILED_EVENTS="true"
2627

2728
script_start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
2829
echo "Starting script time: $script_start_time"
@@ -154,6 +155,7 @@ LOG_LEVEL=${LOG_LEVEL} \
154155
DD_FETCH_LAMBDA_TAGS=${DD_FETCH_LAMBDA_TAGS} \
155156
DD_FETCH_LOG_GROUP_TAGS=${DD_FETCH_LOG_GROUP_TAGS} \
156157
DD_FETCH_STEP_FUNCTIONS_TAGS=${DD_FETCH_STEP_FUNCTIONS_TAGS} \
158+
DD_STORE_FAILED_EVENTS=${DD_STORE_FAILED_EVENTS} \
157159
docker compose up --build --abort-on-container-exit
158160

159161
if [ $ADDITIONAL_LAMBDA == true ]; then

0 commit comments

Comments
 (0)