Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions LICENSE-3rdparty.yml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions datadog-live-debugger/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ http-body-util = "0.1"
bytes = "1.11.1"

regex = "1.9.3"
regex-lite = "0.1"
percent-encoding = "2.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
Expand Down
2 changes: 1 addition & 1 deletion datadog-live-debugger/src/redacted_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ pub unsafe fn add_redacted_type<I: AsRef<[u8]>>(name: I) {
regex_str.push('|')
}
let name = String::from_utf8_lossy(name);
regex_str.push_str(regex::escape(&name[..name.len() - 1]).as_str());
regex_str.push_str(regex_lite::escape(&name[..name.len() - 1]).as_str());
regex_str.push_str(".*");
} else {
let added_types = &mut (*(&*ADDED_REDACTED_TYPES as *const Vec<Vec<u8>>).cast_mut());
Expand Down
1 change: 0 additions & 1 deletion datadog-remote-config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ tracing = { version = "0.1", default-features = false, optional = true }
serde = "1.0"
serde_json = { version = "1.0", features = ["raw_value"] }
serde_with = "3"
regex = "1.0"

# Test feature
hyper-util = { workspace = true, features = ["service"], optional = true }
Expand Down
15 changes: 11 additions & 4 deletions datadog-remote-config/src/config/agent_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@ use serde::Deserialize;
#[cfg(feature = "test")]
use serde::Serialize;

use regex::Regex;
use serde::de::{self, Deserializer};

fn is_valid_suffixed_case_id(s: &str) -> bool {
let Some(rest) = s
.strip_suffix("-with-debug")
.or_else(|| s.strip_suffix("-with-content"))
else {
return false;
};
!rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit())
}

fn deserialize_case_id<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
Expand All @@ -23,9 +32,7 @@ where
if s.chars().all(|c| c.is_ascii_digit()) {
return Ok(s);
}
let re = Regex::new(r"^\d+-(with-debug|with-content)$")
.map_err(|_| de::Error::custom("Invalid case_id format"))?;
if re.is_match(&s) {
if is_valid_suffixed_case_id(&s) {
return Ok(s);
}
Err(de::Error::custom(
Expand Down
2 changes: 1 addition & 1 deletion datadog-sidecar-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

use proc_macro::TokenStream;
use quote::{format_ident, quote};
use syn::FnArg::Typed;
use syn::__private::Span;
use syn::parse::{Parse, ParseStream};
use syn::FnArg::Typed;
use syn::{parse_macro_input, parse_quote, Arm, Ident, ItemTrait, Pat, TraitItem};

fn snake_to_camel(ident_str: &str) -> String {
Expand Down
2 changes: 1 addition & 1 deletion libdd-common-ffi/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ mod tests {
let vec = vec![0, 2, 4, 6];
let ffi_vec: Vec<u8> = Vec::from(vec.clone());

for (a, b) in vec.iter().zip(ffi_vec.into_iter()) {
for (a, b) in vec.iter().zip(&ffi_vec) {
assert_eq!(a, b)
}
}
Expand Down
1 change: 0 additions & 1 deletion libdd-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ multer = { version = "3.1", optional = true }
bytes = { version = "1.11.1" }
pin-project = "1"
rand = { version = "0.8", optional = true }
regex = "1.5"
# Use hickory-dns instead of the default system DNS resolver to avoid fork safety issues.
# The default resolver can hold locks or other global state that can cause deadlocks
# or corruption when the process forks (e.g., in PHP-FPM or other forking environments).
Expand Down
25 changes: 17 additions & 8 deletions libdd-common/src/azure_app_services.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

use regex::Regex;
use std::env;
use std::sync::LazyLock;

Expand Down Expand Up @@ -104,13 +103,23 @@ impl AzureMetadata {
}

fn extract_resource_group(s: Option<String>) -> Option<String> {
#[allow(clippy::unwrap_used)]
let re: Regex = Regex::new(r".+\+(.+)-.+webspace(-Linux)?").unwrap();

s.as_ref().and_then(|text| {
re.captures(text)
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
})
// /.+\+(.+)-.+webspace(-Linux)?/
let text = s.as_ref()?;
Comment thread
Aaalibaba42 marked this conversation as resolved.
Outdated
let (before_plus, after_plus) = text.rsplit_once('+')?;
if before_plus.is_empty() {
return None;
}
let webspace_pos = after_plus.rfind("webspace")?;
let before_webspace = &after_plus[..webspace_pos];
let dash_pos = before_webspace.rfind('-')?;
if dash_pos + 1 >= before_webspace.len() {
return None;
}
let resource_group = &before_webspace[..dash_pos];
if resource_group.is_empty() {
return None;
}
Some(resource_group.to_string())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, the behavior of this parser is different from the original regexp. For example, foo+bar-baz-webspace-Linux is accepted by the original regexp but isn't accepted by this function (which basically doesn't allow a - just before webspace). On the other hand, it doesn't check anything after webspace, so for example foo+bar-bazwebspace-MacOS is accepted, while it is not by the original regexp. Not sure this is important/intentional, or if the original regexp was wrong, but just in case 👀

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep since this was mostly about capturing the correct thing I omitted that, I don't know if it's relevant, I should check with the consumers of this

}

/*
Expand Down
100 changes: 74 additions & 26 deletions libdd-common/src/entity_id/unix/container_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,85 @@

//! This module provides functions to parse the container id from the cgroup file
use super::CgroupFileParsingError;
use regex::Regex;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::sync::LazyLock;

const UUID_SOURCE: &str =
r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}";
const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}";
const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+";

pub(crate) static LINE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
#[allow(clippy::unwrap_used)]
Regex::new(r"^\d+:[^:]*:(.+)$").unwrap()
});

pub(crate) static CONTAINER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
#[allow(clippy::unwrap_used)]
Regex::new(&format!(
r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$"
))
.unwrap()
});

fn is_lowercase_hex(b: u8) -> bool {
matches!(b, b'0'..=b'9' | b'a'..=b'f')
}

/// Try to match `[0-9a-f]{64}` at the end of `s`.
fn try_match_hex64(s: &str) -> Option<&str> {
if s.len() < 64 {
return None;
}

let candidate = &s[s.len() - 64..];
candidate.bytes().all(is_lowercase_hex).then_some(candidate)
}

/// Try to match a UUID `[0-9a-f]{8}[-_][0-9a-f]{4}[-_]..[-_][0-9a-f]{12}` (36 chars) at the end.
fn try_match_uuid(s: &str) -> Option<&str> {
if s.len() < 36 {
return None;
}

let candidate = &s[s.len() - 36..];
const TEMPLATE: &[u8; 36] = b"hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh";
Comment thread
yannham marked this conversation as resolved.
Outdated
candidate
.as_bytes()
.iter()
.zip(TEMPLATE)
.all(|(&c, &t)| match t {
b'h' => is_lowercase_hex(c),
b'-' => matches!(c, b'-' | b'_'),
_ => false,
})
.then_some(candidate)
}

/// Try to match `[0-9a-f]{32}-\d+` at the end of `s`.
fn try_match_task_id(s: &str) -> Option<&str> {
let (prefix, digits) = s.rsplit_once('-')?;
if digits.is_empty() || !digits.bytes().all(|b| b.is_ascii_digit()) || prefix.len() < 32 {
return None;
}

let hex_start = prefix.len() - 32;
prefix[hex_start..]
.bytes()
.all(is_lowercase_hex)
.then_some(&s[hex_start..])
}

/// Extract a container ID from a cgroup path, matching the pattern
/// `(UUID|HEX64|TASK_ID)(?:.scope)? *$`
pub(super) fn extract_container_id_from_path(path: &str) -> Option<&str> {
let path = {
let trimmed = path.trim_end();
trimmed.strip_suffix(".scope").unwrap_or(trimmed)
};

try_match_hex64(path)
.or_else(|| try_match_uuid(path))
.or_else(|| try_match_task_id(path))
}

/// Parse a cgroup line (`^\d+:[^:]*:(.+)$`) and extract a container ID from the path component.
fn parse_line(line: &str) -> Option<&str> {
// unwrap is OK since if regex matches then the groups must exist
#[allow(clippy::unwrap_used)]
LINE_REGEX
.captures(line)
.and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str()))
.map(|captures| captures.get(1).unwrap().as_str())
let mut parts = line.splitn(3, ':');
let hierarchy_id = parts.next()?;
let _controllers = parts.next()?;
let path = parts.next()?;

if hierarchy_id.is_empty()
|| !hierarchy_id.bytes().all(|b| b.is_ascii_digit())
|| path.is_empty()
{
return None;
}
extract_container_id_from_path(path)
}

/// Extract container id contained in the cgroup file located at `cgroup_path`
Expand Down
41 changes: 26 additions & 15 deletions libdd-common/src/entity_id/unix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,27 @@ pub static ENTITY_ID: LazyLock<Option<&'static str>> = LazyLock::new(|| {
#[cfg(test)]
mod tests {
use super::*;
use regex::Regex;

static IN_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"in-\d+").unwrap());
static CI_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(r"ci-{}", container_id::CONTAINER_REGEX.as_str())).unwrap()
});
enum EntityIdKind {
Inode,
ContainerId,
}

fn matches_entity_id_kind(entity_id: &str, kind: &EntityIdKind) -> bool {
match kind {
EntityIdKind::Inode => {
entity_id.starts_with("in-")
&& entity_id[3..].bytes().all(|b| b.is_ascii_digit())
&& entity_id.len() > 3
}
EntityIdKind::ContainerId => {
entity_id.starts_with("ci-")
&& container_id::extract_container_id_from_path(&entity_id[3..]).is_some()
}
}
}

/// The following test can only be run in isolation because of caching behaviour
fn test_entity_id(filename: &str, expected_result: Option<&Regex>) {
fn test_entity_id(filename: &str, expected_kind: Option<EntityIdKind>) {
let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests"));

let entity_id = compute_entity_id(
Expand All @@ -121,12 +133,11 @@ mod tests {
test_root_dir.join("cgroup").as_path(),
);

if let Some(regex) = expected_result {
if let Some(kind) = expected_kind {
let id = entity_id.as_deref().unwrap();
assert!(
regex.is_match(entity_id.as_deref().unwrap()),
"testing get_entity_id with file {}: {} is not matching the expected regex",
filename,
entity_id.as_deref().unwrap_or("None")
matches_entity_id_kind(id, &kind),
"testing get_entity_id with file {filename}: {id} did not match expected format",
);
} else {
assert_eq!(
Expand All @@ -139,19 +150,19 @@ mod tests {
#[cfg_attr(miri, ignore)]
#[test]
fn test_entity_id_for_v2() {
test_entity_id("cgroup.v2", Some(&*IN_REGEX))
test_entity_id("cgroup.v2", Some(EntityIdKind::Inode))
}

#[cfg_attr(miri, ignore)]
#[test]
fn test_entity_id_for_v1() {
test_entity_id("cgroup.linux", Some(&*IN_REGEX))
test_entity_id("cgroup.linux", Some(EntityIdKind::Inode))
}

#[cfg_attr(miri, ignore)]
#[test]
fn test_entity_id_for_container_id() {
test_entity_id("cgroup.docker", Some(&*CI_REGEX))
test_entity_id("cgroup.docker", Some(EntityIdKind::ContainerId))
}

#[cfg_attr(miri, ignore)]
Expand Down
Loading
Loading