Skip to content

Commit 43bec96

Browse files
committed
refactor: Improve error handling for exception
Signed-off-by: Vincent Boutour <vincent.boutour@datadoghq.com>
1 parent 8edda81 commit 43bec96

13 files changed

Lines changed: 73 additions & 62 deletions

aws/logs_monitoring/caching/base_tags_cache.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ def write_cache_to_s3(self, data):
6060
DD_S3_BUCKET_NAME, self.get_cache_name_with_prefix()
6161
)
6262
s3_object.put(Body=(bytes(json.dumps(data).encode("UTF-8"))))
63-
except ClientError:
63+
except ClientError as e:
6464
send_forwarder_internal_metrics("s3_cache_write_failure")
65-
self.logger.debug("Unable to write new cache to S3", exc_info=True)
65+
self.logger.debug(f"Unable to write new cache to S3: {e}", exc_info=True)
6666

6767
def acquire_s3_cache_lock(self):
6868
"""Acquire cache lock"""
@@ -76,16 +76,16 @@ def acquire_s3_cache_lock(self):
7676
last_modified_unix_time = get_last_modified_time(file_content)
7777
if last_modified_unix_time + DD_S3_CACHE_LOCK_TTL_SECONDS >= time():
7878
return False
79-
except Exception:
80-
self.logger.debug("Unable to get cache lock file")
79+
except Exception as e:
80+
self.logger.debug(f"Unable to get cache lock file: {e}")
8181

8282
# lock file doesn't exist, create file to acquire lock
8383
try:
8484
cache_lock_object.put(Body=(bytes("lock".encode("UTF-8"))))
8585
send_forwarder_internal_metrics("s3_cache_lock_acquired")
8686
self.logger.debug("S3 cache lock acquired")
87-
except ClientError:
88-
self.logger.debug("Unable to write S3 cache lock file", exc_info=True)
87+
except ClientError as e:
88+
self.logger.debug(f"Unable to write S3 cache lock file: {e}", exc_info=True)
8989
return False
9090

9191
return True
@@ -99,9 +99,9 @@ def release_s3_cache_lock(self):
9999
cache_lock_object.delete()
100100
send_forwarder_internal_metrics("s3_cache_lock_released")
101101
self.logger.debug("S3 cache lock released")
102-
except ClientError:
102+
except ClientError as e:
103103
send_forwarder_internal_metrics("s3_cache_lock_release_failure")
104-
self.logger.debug("Unable to release S3 cache lock", exc_info=True)
104+
self.logger.debug(f"Unable to release S3 cache lock: {e}", exc_info=True)
105105

106106
def get_cache_from_s3(self):
107107
"""Retrieves tags cache from s3 and returns the body along with
@@ -113,9 +113,9 @@ def get_cache_from_s3(self):
113113
file_content = cache_object.get()
114114
tags_cache = json.loads(file_content["Body"].read().decode("utf-8"))
115115
last_modified_unix_time = get_last_modified_time(file_content)
116-
except:
116+
except Exception as e:
117117
send_forwarder_internal_metrics("s3_cache_fetch_failure")
118-
self.logger.debug("Unable to fetch cache from S3", exc_info=True)
118+
self.logger.debug(f"Unable to fetch cache from S3: {e}", exc_info=True)
119119
return {}, -1
120120

121121
return tags_cache, last_modified_unix_time

aws/logs_monitoring/caching/cloudwatch_log_group_cache.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ def _get_log_group_tags_from_cache(self, cache_file_name):
103103
)
104104
tags_cache = json.loads(response.get("Body").read().decode("utf-8"))
105105
last_modified_unix_time = int(response.get("LastModified").timestamp())
106-
except Exception:
106+
except Exception as e:
107107
send_forwarder_internal_metrics("loggroup_cache_fetch_failure")
108108
self.logger.exception(
109-
"Failed to get log group tags from cache", exc_info=True
109+
f"Failed to get log group tags from cache: {e}", exc_info=True
110110
)
111111
return None, -1
112112

@@ -120,10 +120,10 @@ def _update_log_group_tags_cache(self, log_group, tags):
120120
Key=cache_file_name,
121121
Body=(bytes(json.dumps(tags).encode("UTF-8"))),
122122
)
123-
except Exception:
123+
except Exception as e:
124124
send_forwarder_internal_metrics("loggroup_cache_write_failure")
125125
self.logger.exception(
126-
"Failed to update log group tags cache", exc_info=True
126+
f"Failed to update log group tags cache: {e}", exc_info=True
127127
)
128128

129129
def _is_expired(self, last_modified):
@@ -150,8 +150,8 @@ def _get_log_group_tags(self, log_group_arn):
150150
response = self.cloudwatch_logs_client.list_tags_for_resource(
151151
resourceArn=log_group_arn
152152
)
153-
except Exception:
154-
self.logger.exception("Failed to get log group tags", exc_info=True)
153+
except Exception as e:
154+
self.logger.exception(f"Failed to get log group tags: {e}", exc_info=True)
155155
formatted_tags = None
156156
if response is not None:
157157
formatted_tags = [

aws/logs_monitoring/caching/lambda_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def build_tags_cache(self):
4343

4444
except ClientError as e:
4545
self.logger.exception(
46-
"Encountered a ClientError when trying to fetch tags. You may need to give "
47-
"this Lambda's role the 'tag:GetResources' permission"
46+
f"Failed to fetch Lambda tags: {e}. "
47+
"Add 'tag:GetResources' permission to the Forwarder's IAM role."
4848
)
4949
additional_tags = [
5050
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"

aws/logs_monitoring/caching/s3_tags_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def build_tags_cache(self):
4242
except ClientError as e:
4343
self.logger.exception(
4444
"Encountered a ClientError when trying to fetch tags. You may need to give "
45-
"this Lambda's role the 'tag:GetResources' permission"
45+
f"this Lambda's role the 'tag:GetResources' permission: {e}"
4646
)
4747
additional_tags = [
4848
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"

aws/logs_monitoring/caching/step_functions_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def build_tags_cache(self):
5151
except ClientError as e:
5252
self.logger.exception(
5353
"Encountered a ClientError when trying to fetch tags. You may need to give "
54-
"this Lambda's role the 'tag:GetResources' permission"
54+
f"this Lambda's role the 'tag:GetResources' permission: {e}"
5555
)
5656
additional_tags = [
5757
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"

aws/logs_monitoring/forwarder.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ def _forward_logs(self, logs, key=None):
116116
for batch in batcher.batch(logs_to_forward):
117117
try:
118118
client.send(batch)
119-
except Exception:
120-
logger.exception(f"Exception while forwarding log batch {batch}")
119+
except Exception as e:
120+
logger.exception(
121+
f"Exception while forwarding log batch {batch}: {e}"
122+
)
121123
failed_logs.extend(batch)
122124
else:
123125
if logger.isEnabledFor(logging.DEBUG):
@@ -142,9 +144,9 @@ def _forward_metrics(self, metrics, key=None):
142144
for metric in metrics:
143145
try:
144146
send_log_metric(metric)
145-
except Exception:
147+
except Exception as e:
146148
logger.exception(
147-
f"Exception while forwarding metric {json.dumps(metric)}"
149+
f"Exception while forwarding metric {json.dumps(metric)}: {e}"
148150
)
149151
failed_metrics.append(metric)
150152
else:
@@ -168,9 +170,9 @@ def _forward_traces(self, traces, key=None):
168170
try:
169171
serialized_trace_paylods = json.dumps(traces)
170172
self.trace_connection.send_traces(serialized_trace_paylods)
171-
except Exception:
173+
except Exception as e:
172174
logger.exception(
173-
f"Exception while forwarding traces {serialized_trace_paylods}"
175+
f"Exception while forwarding traces {serialized_trace_paylods}: {e}"
174176
)
175177
if DD_STORE_FAILED_EVENTS and not key:
176178
self.storage.store_data(RetryPrefix.TRACES, traces)

aws/logs_monitoring/lambda_function.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,46 @@
44
# Copyright 2021 Datadog, Inc.
55

66
import json
7-
import os
8-
import boto3
97
import logging
10-
import requests
8+
import os
119
from hashlib import sha1
1210

13-
from datadog_lambda.wrapper import datadog_lambda_wrapper
11+
import boto3
12+
import requests
1413
from datadog import api
15-
from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics
16-
from steps.parsing import parse
17-
from steps.enrichment import enrich
18-
from steps.transformation import transform
19-
from steps.splitting import split
14+
from datadog_lambda.wrapper import datadog_lambda_wrapper
15+
2016
from caching.cache_layer import CacheLayer
17+
from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics
2118
from forwarder import Forwarder
2219
from settings import (
20+
DD_ADDITIONAL_TARGET_LAMBDAS,
2321
DD_API_KEY,
24-
DD_SKIP_SSL_VALIDATION,
2522
DD_API_URL,
2623
DD_FORWARDER_VERSION,
27-
DD_ADDITIONAL_TARGET_LAMBDAS,
2824
DD_RETRY_KEYWORD,
25+
DD_SITE,
26+
DD_SKIP_SSL_VALIDATION,
2927
)
28+
from steps.enrichment import enrich
29+
from steps.parsing import parse
30+
from steps.splitting import split
31+
from steps.transformation import transform
3032

3133
logger = logging.getLogger()
3234
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))
3335

3436
# DD_API_KEY must be set
3537
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
36-
raise Exception("Missing Datadog API key")
38+
raise Exception(
39+
"Missing Datadog API key. Set DD_API_KEY environment variable. "
40+
"See: https://docs.datadoghq.com/serverless/forwarder/"
41+
)
3742
# Check if the API key is the correct number of characters
3843
if len(DD_API_KEY) != 32:
3944
raise Exception(
40-
"The API key is not the expected length. "
41-
"Please confirm that your API key is correct"
45+
f"Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. "
46+
f"Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys"
4247
)
4348
# Validate the API key
4449
logger.debug("Validating the Datadog API key")
@@ -57,7 +62,11 @@
5762
timeout=10,
5863
)
5964
if not validation_res.ok:
60-
raise Exception("The API key is not valid.")
65+
raise Exception(
66+
f"Datadog API key validation failed (HTTP {validation_res.status_code}). "
67+
f"Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). "
68+
"See: https://docs.datadoghq.com/getting_started/site/"
69+
)
6170

6271
# Force the layer to use the exact same API key and host as the forwarder
6372
api._api_key = DD_API_KEY

aws/logs_monitoring/logs/datadog_http_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ def send(self, logs):
9595
"""
9696
try:
9797
data = self._scrubber.scrub("[{}]".format(",".join(logs)))
98-
except ScrubbingException:
99-
raise Exception("could not scrub the payload")
98+
except ScrubbingException as e:
99+
raise Exception(f"could not scrub the payload: {e}")
100100
if DD_USE_COMPRESSION:
101101
data = compress_logs(data, DD_COMPRESSION_LEVEL)
102102

aws/logs_monitoring/logs/datadog_matcher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ def match(self, log):
4444

4545
return True
4646

47-
except ScrubbingException:
48-
raise Exception("could not filter the payload")
47+
except Exception as e:
48+
raise Exception(f"Failed to filter log: {e}")

aws/logs_monitoring/logs/helpers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ def compileRegex(rule, pattern):
3434
if pattern == "":
3535
# If pattern is an empty string, raise exception
3636
raise Exception(
37-
"No pattern provided:\nAdd pattern or remove {} environment variable".format(
38-
rule
39-
)
37+
f"Empty pattern for {rule}. Set a valid regex pattern or remove the {rule} environment variable."
4038
)
4139
try:
4240
return re.compile(pattern)
43-
except Exception:
41+
except re.error as e:
4442
raise Exception(
45-
"could not compile {} regex with pattern: {}".format(rule, pattern)
43+
f"Invalid regex pattern for {rule}: '{pattern}'. Regex error: {e}"
4644
)
45+
except Exception as e:
46+
raise Exception(f"Failed to compile {rule} regex pattern '{pattern}': {e}")
4747

4848

4949
def add_retry_tag(log):

0 commit comments

Comments
 (0)