Skip to content

Commit 7466374

Browse files
authored
fix: Fix deadlock when snap start and using secret for api key (#853)
# Problem When a Lambda (1) uses snap start, and (2) specifies Datadog API key using `DD_API_KEY_SECRET_ARN`, the extension will encounter a deadlock. For a `RwLock`, the extension first gets a read lock: https://github.com/DataDog/datadog-lambda-extension/blob/daf633dd003447d78261e7c371838b5af21073a1/bottlecap/src/secrets/decrypt.rs#L45 then tries to get a write lock: https://github.com/DataDog/datadog-lambda-extension/blob/daf633dd003447d78261e7c371838b5af21073a1/bottlecap/src/secrets/decrypt.rs#L65 which never finishes. This causes the function to time out. This bug was introduced in #717. # This PR Fix this bug by removing the `RwLock` usage. `AwsCredential` is only created and used once in `resolve_secrets()`, and `resolve_secrets()` is only called once, so there's no need to protect this struct with a lock. # Testing Tested on a Lambda with: - Python 3.13 runtime - snap start - using `DD_API_KEY_SECRET_ARN` Before: - The function timed out. - Data failed to be sent to Datadog. After: - The function finished without timeout. - Data was sent to Datadog successfully. # Notes Jira: https://datadoghq.atlassian.net/browse/SLES-2482
1 parent c58a8ce commit 7466374

2 files changed

Lines changed: 27 additions & 42 deletions

File tree

bottlecap/src/bin/bottlecap/main.rs

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use bottlecap::{
2323
},
2424
config::{
2525
self, Config,
26-
aws::{AwsConfig, AwsCredentials, build_lambda_function_arn},
26+
aws::{AwsConfig, build_lambda_function_arn},
2727
flush_strategy::FlushStrategy,
2828
},
2929
event_bus::{Event, EventBus},
@@ -85,7 +85,7 @@ use dogstatsd::{
8585
use reqwest::Client;
8686
use std::{collections::hash_map, env, path::Path, sync::Arc};
8787
use tokio::time::{Duration, Instant};
88-
use tokio::{sync::Mutex as TokioMutex, sync::RwLock, sync::mpsc::Sender, task::JoinHandle};
88+
use tokio::{sync::Mutex as TokioMutex, sync::mpsc::Sender, task::JoinHandle};
8989
use tokio_util::sync::CancellationToken;
9090
use tracing::{debug, error};
9191
use tracing_subscriber::EnvFilter;
@@ -236,7 +236,7 @@ impl PendingFlushHandles {
236236
async fn main() -> anyhow::Result<()> {
237237
let start_time = Instant::now();
238238
init_ustr();
239-
let (aws_config, aws_credentials, config) = load_configs(start_time);
239+
let (aws_config, config) = load_configs(start_time);
240240

241241
enable_logging_subsystem(&config);
242242
log_fips_status(&aws_config.region);
@@ -254,7 +254,7 @@ async fn main() -> anyhow::Result<()> {
254254
.map_err(|e| anyhow::anyhow!("Failed to register extension: {e:?}"))?;
255255

256256
let aws_config = Arc::new(aws_config);
257-
let api_key_factory = create_api_key_factory(&config, &aws_config, aws_credentials);
257+
let api_key_factory = create_api_key_factory(&config, &aws_config);
258258

259259
match extension_loop_active(
260260
Arc::clone(&aws_config),
@@ -285,14 +285,13 @@ fn init_ustr() {
285285
});
286286
}
287287

288-
fn load_configs(start_time: Instant) -> (AwsConfig, AwsCredentials, Arc<Config>) {
288+
fn load_configs(start_time: Instant) -> (AwsConfig, Arc<Config>) {
289289
// First load the AWS configuration
290290
let aws_config = AwsConfig::from_env(start_time);
291-
let aws_credentials = AwsCredentials::from_env();
292291
let lambda_directory: String =
293292
env::var("LAMBDA_TASK_ROOT").unwrap_or_else(|_| "/var/task".to_string());
294293
let config = Arc::new(config::get_config(Path::new(&lambda_directory)));
295-
(aws_config, aws_credentials, config)
294+
(aws_config, config)
296295
}
297296

298297
fn enable_logging_subsystem(config: &Arc<Config>) {
@@ -318,21 +317,15 @@ fn enable_logging_subsystem(config: &Arc<Config>) {
318317
debug!("Logging subsystem enabled");
319318
}
320319

321-
fn create_api_key_factory(
322-
config: &Arc<Config>,
323-
aws_config: &Arc<AwsConfig>,
324-
aws_credentials: AwsCredentials,
325-
) -> Arc<ApiKeyFactory> {
320+
fn create_api_key_factory(config: &Arc<Config>, aws_config: &Arc<AwsConfig>) -> Arc<ApiKeyFactory> {
326321
let config = Arc::clone(config);
327322
let aws_config = Arc::clone(aws_config);
328-
let aws_credentials = Arc::new(RwLock::new(aws_credentials));
329323

330324
Arc::new(ApiKeyFactory::new_from_resolver(Arc::new(move || {
331325
let config = Arc::clone(&config);
332326
let aws_config = Arc::clone(&aws_config);
333-
let aws_credentials = Arc::clone(&aws_credentials);
334327

335-
Box::pin(async move { resolve_secrets(config, aws_config, aws_credentials).await })
328+
Box::pin(async move { resolve_secrets(config, aws_config).await })
336329
})))
337330
}
338331

bottlecap/src/secrets/decrypt.rs

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,11 @@ use serde_json::Value;
1313
use sha2::{Digest, Sha256};
1414
use std::io::Error;
1515
use std::sync::Arc;
16-
use tokio::{sync::RwLock, time::Instant};
16+
use tokio::time::Instant;
1717
use tracing::debug;
1818
use tracing::error;
1919

20-
pub async fn resolve_secrets(
21-
config: Arc<Config>,
22-
aws_config: Arc<AwsConfig>,
23-
aws_credentials: Arc<RwLock<AwsCredentials>>,
24-
) -> Option<String> {
20+
pub async fn resolve_secrets(config: Arc<Config>, aws_config: Arc<AwsConfig>) -> Option<String> {
2521
let api_key_candidate =
2622
if !config.api_key_secret_arn.is_empty() || !config.kms_api_key.is_empty() {
2723
let before_decrypt = Instant::now();
@@ -42,36 +38,32 @@ pub async fn resolve_secrets(
4238
}
4339
};
4440

45-
let aws_credentials_read = aws_credentials.read().await;
41+
let mut aws_credentials = AwsCredentials::from_env();
4642

47-
if aws_credentials_read.aws_secret_access_key.is_empty()
48-
&& aws_credentials_read.aws_access_key_id.is_empty()
49-
&& !aws_credentials_read
43+
if aws_credentials.aws_secret_access_key.is_empty()
44+
&& aws_credentials.aws_access_key_id.is_empty()
45+
&& !aws_credentials
5046
.aws_container_credentials_full_uri
5147
.is_empty()
52-
&& !aws_credentials_read
53-
.aws_container_authorization_token
54-
.is_empty()
48+
&& !aws_credentials.aws_container_authorization_token.is_empty()
5549
{
5650
// We're in Snap Start
57-
let credentials =
58-
match get_snapstart_credentials(&aws_credentials_read, &client).await {
59-
Ok(credentials) => credentials,
60-
Err(err) => {
61-
error!("Error getting Snap Start credentials: {}", err);
62-
return None;
63-
}
64-
};
65-
let mut aws_credentials_write = aws_credentials.write().await;
66-
aws_credentials_write.aws_access_key_id = credentials["AccessKeyId"]
51+
let credentials = match get_snapstart_credentials(&aws_credentials, &client).await {
52+
Ok(credentials) => credentials,
53+
Err(err) => {
54+
error!("Error getting Snap Start credentials: {}", err);
55+
return None;
56+
}
57+
};
58+
aws_credentials.aws_access_key_id = credentials["AccessKeyId"]
6759
.as_str()
6860
.unwrap_or_default()
6961
.to_string();
70-
aws_credentials_write.aws_secret_access_key = credentials["SecretAccessKey"]
62+
aws_credentials.aws_secret_access_key = credentials["SecretAccessKey"]
7163
.as_str()
7264
.unwrap_or_default()
7365
.to_string();
74-
aws_credentials_write.aws_session_token = credentials["Token"]
66+
aws_credentials.aws_session_token = credentials["Token"]
7567
.as_str()
7668
.unwrap_or_default()
7769
.to_string();
@@ -82,15 +74,15 @@ pub async fn resolve_secrets(
8274
&client,
8375
config.api_key_secret_arn.clone(),
8476
aws_config,
85-
&aws_credentials_read,
77+
&aws_credentials,
8678
)
8779
.await
8880
} else {
8981
decrypt_aws_kms(
9082
&client,
9183
config.kms_api_key.clone(),
9284
aws_config,
93-
&aws_credentials_read,
85+
&aws_credentials,
9486
)
9587
.await
9688
};

0 commit comments

Comments
 (0)