Update SSI auto-injection tests to validate workload selection policies (#6501)

annacai21 · web-flow · commit fe4c8b875243 · 2026-03-30T12:05:48.000Z
Co-authored-by: anna.cai &lt;anna.cai@datadoghq.com&gt;
diff --git a/manifests/java.yml b/manifests/java.yml
@@ -2765,7 +2765,6 @@ manifest:
   tests/auto_inject/test_auto_inject_install.py::TestSimpleInstallerAutoInjectManualProfiling::test_profiling:
     - declaration: bug (SCP-962)
       component_version: '>=1.5.0'
-  tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectBlockListInstallManualHost::test_builtin_block_args: bug (INPLAT-1018)
   tests/debugger/test_debugger_capture_expressions.py::Test_Debugger_Line_Capture_Expressions:
     - weblog_declaration:
         "*": missing_feature
diff --git a/tests/auto_inject/test_blocklist_auto_inject.py b/tests/auto_inject/test_blocklist_auto_inject.py
@@ -5,8 +5,8 @@
 from utils.onboarding.injection_log_parser import command_injection_skipped
 
 
-class _AutoInjectBlockListBaseTest:
-    """Base class to test the block list on auto instrumentation"""
+class _AutoInjectWorkloadSelectionBaseTest:
+    """Base class to test workload selection policies on auto instrumentation."""
 
     def _execute_remote_command(self, ssh_client, command):
         """Execute remote command and get remote log file from the vm. You can use this method using env variables or using injection config file"""
@@ -32,35 +32,37 @@ def _execute_remote_command(self, ssh_client, command):
 @features.host_block_list
 @scenarios.installer_auto_injection
 @irrelevant(condition=context.weblog_variant == "test-app-dotnet-iis")
-class TestAutoInjectBlockListInstallManualHost(_AutoInjectBlockListBaseTest):
-    builtin_args_commands_block = {
+class TestAutoInjectWorkloadSelectionInstallManualHost(_AutoInjectWorkloadSelectionBaseTest):
+    """Test that auto instrumentation respects workload selection policies (excluded specific commands and args)."""
+
+    # Commands with args excluded by workload selection policy per language (should not be instrumented)
+    commands_excluded_by_workload_policy = {
         "java": ["java -version", "MY_ENV_VAR=hello java -version"],
-        "donet": [
+        "dotnet": [
             "dotnet restore",
             "dotnet build -c Release",
-            "sudo -E dotnet publish",
+            "dotnet publish",
             "MY_ENV_VAR=hello dotnet build -c Release",
         ],
     }
 
-    builtin_args_commands_injected = {
+    # Commands with args included by workload selection policy per language (should be instrumented)
+    commands_not_excluded_by_workload_policy = {
         "java": [
             "java -jar myjar.jar",
             "sudo -E java -jar myjar.jar",
             "version=-version java -jar myjar.jar",
             "java -Dversion=-version -jar myapp.jar",
         ],
-        "donet": [
+        "dotnet": [
             "dotnet run -- -p build",
             "dotnet build.dll -- -p build",
             "sudo -E dotnet run myapp.dll -- -p build",
-            "sudo dotnet publish",
             "MY_ENV_VAR=build dotnet myapp.dll",
         ],
     }
 
-    builtin_commands_not_injected = [
-        "ps -fea",
+    no_language_found_commands = [
         "touch myfile.txt",
         "hello=hola cat myfile.txt",
         "ls -la",
@@ -72,45 +74,51 @@ class TestAutoInjectBlockListInstallManualHost(_AutoInjectBlockListBaseTest):
         or "alpine" in context.weblog_variant
         or "buildpack" in context.weblog_variant
     )
-    def test_builtin_block_commands(self):
-        """Check that commands are skipped from the auto injection. This commands are defined on the buildIn processes to block"""
+    def test_no_language_found_commands(self):
+        """Check that commands with no language found are skipped from auto injection."""
         virtual_machine = context.virtual_machine
-        logger.info(f"[{virtual_machine.get_ip()}] Executing commands that should be blocked")
+        logger.info(f"[{virtual_machine.get_ip()}] Executing commands with no language found")
         ssh_client = virtual_machine.get_ssh_connection()
-        for command in self.builtin_commands_not_injected:
+        for command in self.no_language_found_commands:
             local_log_file = self._execute_remote_command(ssh_client, command)
-            assert command_injection_skipped(command, local_log_file), f"The command {command} was instrumented!"
+            assert command_injection_skipped(command, local_log_file), (
+                f"The command '{command}' was allowed by auto injection but should have been denied"
+            )
 
     @irrelevant(
         condition="container" in context.weblog_variant
         or "alpine" in context.weblog_variant
         or "buildpack" in context.weblog_variant
     )
-    def test_builtin_block_args(self):
-        """Check that we are blocking command with args. These args are defined in the buildIn args ignore list for each language."""
+    def test_commands_denied_by_workload_selection(self):
+        """Check that commands are skipped from auto injection based on workload selection policies."""
         virtual_machine = context.virtual_machine
-        logger.info(f"[{virtual_machine.get_ip()}] Executing test_builtIn_block_args")
+        logger.info(f"[{virtual_machine.get_ip()}] Executing commands that are denied by workload selection policies")
         language = context.library.name
-        if language in self.builtin_args_commands_block:
-            ssh_client = virtual_machine.get_ssh_connection()
-            for command in self.builtin_args_commands_block[language]:
-                local_log_file = self._execute_remote_command(ssh_client, command)
-                assert command_injection_skipped(command, local_log_file), f"The command {command} was instrumented!"
+        if language not in self.commands_excluded_by_workload_policy:
+            return
+        ssh_client = virtual_machine.get_ssh_connection()
+        for command in self.commands_excluded_by_workload_policy[language]:
+            local_log_file = self._execute_remote_command(ssh_client, command)
+            assert command_injection_skipped(command, local_log_file), (
+                f"The command '{command}' was allowed by auto injection but should have been denied"
+            )
 
     @irrelevant(
         condition="container" in context.weblog_variant
         or "alpine" in context.weblog_variant
         or "buildpack" in context.weblog_variant
     )
-    def test_builtin_instrument_args(self):
-        """Check that we are instrumenting the command with args that it should be instrumented. The args are not included on the buildIn args list"""
+    def test_commands_allowed_by_workload_selection(self):
+        """Check that commands are allowed to be instrumented based on workload selection policies."""
         virtual_machine = context.virtual_machine
-        logger.info(f"[{virtual_machine.get_ip()}] Executing test_builtIn_instrument_args")
+        logger.info(f"[{virtual_machine.get_ip()}] Executing commands that are allowed by workload selection policies")
         language = context.library.name
-        if language in self.builtin_args_commands_injected:
-            ssh_client = virtual_machine.get_ssh_connection()
-            for command in self.builtin_args_commands_injected[language]:
-                local_log_file = self._execute_remote_command(ssh_client, command)
-                assert command_injection_skipped(command, local_log_file) is False, (
-                    f"The command {command} was not instrumented, but it should be instrumented!"
-                )
+        if language not in self.commands_not_excluded_by_workload_policy:
+            return
+        ssh_client = virtual_machine.get_ssh_connection()
+        for command in self.commands_not_excluded_by_workload_policy[language]:
+            local_log_file = self._execute_remote_command(ssh_client, command)
+            assert command_injection_skipped(command, local_log_file) is False, (
+                f"The command '{command}' was denied by auto injection but should have been allowed"
+            )
diff --git a/tests/test_the_test/scenarios.json b/tests/test_the_test/scenarios.json
@@ -3150,13 +3150,13 @@
   "tests/auto_inject/test_auto_inject_install.py::TestContainerAutoInjectInstallScriptAppsec::test_appsec": [
     "CONTAINER_AUTO_INJECTION_INSTALL_SCRIPT_APPSEC"
   ],
-  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectBlockListInstallManualHost::test_builtin_block_commands": [
+  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectWorkloadSelectionInstallManualHost::test_no_language_found_commands": [
     "INSTALLER_AUTO_INJECTION"
   ],
-  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectBlockListInstallManualHost::test_builtin_block_args": [
+  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectWorkloadSelectionInstallManualHost::test_commands_denied_by_workload_selection": [
     "INSTALLER_AUTO_INJECTION"
   ],
-  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectBlockListInstallManualHost::test_builtin_instrument_args": [
+  "tests/auto_inject/test_blocklist_auto_inject.py::TestAutoInjectWorkloadSelectionInstallManualHost::test_commands_allowed_by_workload_selection": [
     "INSTALLER_AUTO_INJECTION"
   ],
   "tests/debugger/test_debugger_code_origins.py::Test_Debugger_Code_Origins::test_code_origin_entry_present": [
diff --git a/utils/onboarding/injection_log_parser.py b/utils/onboarding/injection_log_parser.py
@@ -1,50 +1,52 @@
+import re
 from collections.abc import Callable
-import json
 from pathlib import Path
 
 from utils._logger import logger
 
+WLS_DENIED_INJECTION = "Workload selection denied injection"
+WLS_ALLOWED_INJECTION = "Workload selection allowed injection: continuing"
+NO_KNOWN_RUNTIME = "No known runtime was detected - not injecting!"
+
 
 def exclude_telemetry_logs_filter(line: str):
     return '"command":"telemetry"' not in line and '"caller":"telemetry/' not in line
 
 
 def command_injection_skipped(command_line: str, log_local_path: str):
-    """From parsed log, search on the list of logged commands
-    if one command has been skipped from the instrumentation
+    """Determine if the given command was skipped from auto injection
+    (e.g. by workload selection policies or no language matched).
     """
-    command, command_args = _parse_command(command_line)
-    logger.debug(f"- Checking command: {command_args}")
-    for command_desc in _get_commands_from_log_file(log_local_path, exclude_telemetry_logs_filter):
-        # First line contains the name of the intercepted command
-        first_line_json = json.loads(command_desc[0])
-        if command in first_line_json["inFilename"]:
-            # last line contains the skip message. The command was skipped by build-in deny list or by user deny list
-            last_line_json = json.loads(command_desc[-1])
-            # pylint: disable=R1705
-            if last_line_json["msg"] == "not injecting; on deny list":
-                logger.debug(f"    Command {command_args} was skipped by build-in deny list")
-                return True
-            elif last_line_json["msg"] == "not injecting; on user deny list":
-                logger.debug(f"    Command {command_args} was skipped by user defined deny process list")
-                return True
-            elif last_line_json["msg"] in ["error injecting", "error when parsing", "skipping"] and (
-                last_line_json["error"].startswith(
-                    (
-                        "skipping due to ignore rules for language",
-                        "error when parsing: skipping due to ignore rules for language",
-                    )
-                )
-            ):
-                logger.info(f"    Command {command_args} was skipped by ignore arguments")
-                return True
-            logger.info(f"    Missing injection deny: {last_line_json}")
-            return False
+    command, _ = _parse_command(command_line)
+    logger.debug(f"- Checking command: {command_line}")
+    for process_logs in _get_process_logs_from_log_file(log_local_path, exclude_telemetry_logs_filter):
+        process_exe = _get_exe_from_log_line(process_logs[0])
+        if process_exe is None or command != process_exe:
+            continue
+        if _process_chunk_means_skipped(process_logs):
+            logger.debug(f"    Command '{command_line}' was skipped by workload selection")
+            return True
+        logger.info(f"    Command '{command_line}' was allowed and injected")
+        return False
 
     logger.info(f"    Command {command} was NOT FOUND")
     raise ValueError(f"Command {command} was NOT FOUND")
 
 
+def _process_chunk_means_skipped(chunk: list[str]) -> bool:
+    """True if injection was skipped: denied by workload selection or no known runtime detected."""
+    text = "\n".join(chunk)
+    return WLS_DENIED_INJECTION in text or NO_KNOWN_RUNTIME in text
+
+
+def _get_exe_from_log_line(line: str) -> str | None:
+    """Extract executable name from the log line "process_exe: 'X'"."""
+    match = re.search(r"process_exe:\s*['\"]([^'\"]+)['\"]", line)
+    if match:
+        return Path(match.group(1)).name
+    return None
+
+
 def _parse_command(command: str):
     command_args = command.split()
     command = None
@@ -64,33 +66,33 @@ def _parse_command(command: str):
     return None, None
 
 
-def _get_commands_from_log_file(log_local_path: str, line_filter: Callable):
-    """From instrumentation log file, extract all commands parsed by dd-injection (the log level should be DEBUG)"""
+def _get_process_logs_from_log_file(log_local_path: str, line_filter: Callable):
+    r"""From instrumentation log file, extract all log lines per process.
 
-    store_as_command = False
-    command_lines = []
+    A process chunk starts at the line containing \"process_exe:\" and runs until the next \"process_exe:\". This includes WLS decision
+    lines and post-WLS lines like \"No known runtime was detected - not injecting!\".
+    """
+    process_logs: list[str] = []
     with open(log_local_path, encoding="utf-8") as f:
         for line in f:
             if not line_filter(line):
                 continue
-            if "starting process" in line:
-                store_as_command = True
+            if "process_exe:" in line:
+                if process_logs:
+                    yield process_logs.copy()
+                process_logs = [line]
                 continue
-            if "exiting process" in line:
-                store_as_command = False
-                yield command_lines.copy()
-                command_lines = []
-                continue
-
-            if store_as_command:
-                command_lines.append(line)
+            if process_logs:
+                process_logs.append(line)
+    if process_logs:
+        yield process_logs.copy()
 
 
 def main():
     log_file = "logs_onboarding_host_block_list/host_injection_21711f84-86b3-4125-9a5f-cd129195d99a.log"
     command = "java -Dversion=-version -jar myapp.jar"
     skipped = command_injection_skipped(command, log_file)
-    logger.info(f"The command was skiped? {skipped}")
+    logger.info(f"The command was skipped? {skipped}")
 
 
 if __name__ == "__main__":