diff --git a/CHANGELOG.md b/CHANGELOG.md index 721db3b612..5661e35d82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- `opentelemetry-sdk`: fix YAML structure injection via environment variable substitution in declarative file configuration; values containing newlines are now emitted as quoted YAML scalars per spec requirement + ([#5091](https://github.com/open-telemetry/opentelemetry-python/pull/5091)) - `opentelemetry-sdk`: Add `create_logger_provider`/`configure_logger_provider` to declarative file configuration, enabling LoggerProvider instantiation from config files without reading env vars ([#4990](https://github.com/open-telemetry/opentelemetry-python/pull/4990)) - `opentelemetry-sdk`: Add `service` resource detector support to declarative file configuration via `detection_development.detectors[].service` diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_configuration/file/_env_substitution.py b/opentelemetry-sdk/src/opentelemetry/sdk/_configuration/file/_env_substitution.py index 0a42809e34..f12e1b0e28 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/_configuration/file/_env_substitution.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/_configuration/file/_env_substitution.py @@ -81,6 +81,23 @@ def replace_var(match) -> str: f"Environment variable '{var_name}' not found and no default provided" ) + # Per spec: "It MUST NOT be possible to inject YAML structures by + # environment variables." Newlines are the primary injection vector — + # a value like "legit\nmalicious_key: val" would create extra YAML + # keys if substituted verbatim. Wrap such values in a YAML + # double-quoted scalar so the newline is treated as literal text. + # Simple values (no newlines) are returned as-is so that YAML type + # coercion still applies per spec ("Node types MUST be interpreted + # after environment variable substitution takes place"). + if "\n" in value or "\r" in value: + escaped = ( + value.replace("\\", "\\\\") + .replace('"', '\\"') + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t") + ) + return f'"{escaped}"' return value return re.sub(pattern, replace_var, text) diff --git a/opentelemetry-sdk/tests/_configuration/file/test_env_substitution.py b/opentelemetry-sdk/tests/_configuration/file/test_env_substitution.py index 1a8bdc9aa2..3db60432c9 100644 --- a/opentelemetry-sdk/tests/_configuration/file/test_env_substitution.py +++ b/opentelemetry-sdk/tests/_configuration/file/test_env_substitution.py @@ -16,6 +16,8 @@ import unittest from unittest.mock import patch +import yaml + from opentelemetry.sdk._configuration.file import ( EnvSubstitutionError, substitute_env_vars, @@ -115,3 +117,45 @@ def test_only_dollar_signs(self): """Test string with only escaped dollar signs.""" result = substitute_env_vars("$$$$") self.assertEqual(result, "$$") + + def test_newline_in_value_prevents_yaml_injection(self): + """Values containing newlines must not inject YAML structure. + + Per spec: "It MUST NOT be possible to inject YAML structures by + environment variables." A value like "legit\\nmalicious_key: val" + must be emitted as a quoted scalar, not raw YAML. + """ + with patch.dict( + os.environ, + {"SERVICE_NAME": "legit-service\nmalicious_key: injected_value"}, + ): + result = substitute_env_vars( + "file_format: '0.1'\nservice_name: ${SERVICE_NAME}" + ) + parsed = yaml.safe_load(result) + self.assertNotIn("malicious_key", parsed) + self.assertIn("legit-service", parsed["service_name"]) + + def test_newline_in_value_preserved_as_literal(self): + """Newline within a value is preserved as a literal newline character.""" + with patch.dict(os.environ, {"MULTI": "line1\nline2"}): + result = substitute_env_vars("key: ${MULTI}") + parsed = yaml.safe_load(result) + self.assertEqual(parsed["key"], "line1\nline2") + + def test_carriage_return_in_value_is_escaped(self): + """Carriage return in value is escaped, not injected.""" + with patch.dict(os.environ, {"VAL": "text\r\nmore"}): + result = substitute_env_vars("key: ${VAL}") + parsed = yaml.safe_load(result) + self.assertIsInstance(parsed["key"], str) + + def test_type_coercion_preserved_for_simple_values(self): + """Simple values without newlines still undergo YAML type coercion per spec.""" + with patch.dict(os.environ, {"BOOL_VAL": "true", "INT_VAL": "42"}): + bool_result = yaml.safe_load( + substitute_env_vars("key: ${BOOL_VAL}") + ) + int_result = yaml.safe_load(substitute_env_vars("key: ${INT_VAL}")) + self.assertIs(bool_result["key"], True) + self.assertEqual(int_result["key"], 42)