Skip to content

Commit 9093884

Browse files
devatsecureclaude
andcommitted
fix: Improve Semgrep PATH resolution with python -m fallback
Add container-specific paths (/home/agentuser/.local/bin, /root/.local/bin) and python -m semgrep as a last-resort fallback when the binary is not on PATH (common in Docker USER switch scenarios and QEMU emulation). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1150bac commit 9093884

File tree

1 file changed

+61
-12
lines changed

1 file changed

+61
-12
lines changed

scripts/semgrep_scanner.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
import logging
1515
import shutil
1616
import subprocess
17+
import sys
1718
from dataclasses import asdict, dataclass
1819
from datetime import datetime, timezone
1920
from pathlib import Path
20-
from typing import Any, Optional
21+
from typing import Any, Optional, Union
2122

2223
logging.basicConfig(level=logging.INFO)
2324
logger = logging.getLogger(__name__)
@@ -63,30 +64,76 @@ def __init__(self, config: Optional[dict] = None):
6364
["*/test/*", "*/tests/*", "*/.git/*", "*/node_modules/*", "*/.venv/*", "*/venv/*", "*/build/*", "*/dist/*"],
6465
)
6566

66-
# Resolve semgrep binary path (shutil.which + common fallbacks)
67-
self._semgrep_bin = self._resolve_semgrep_path()
67+
# Resolve semgrep binary path (shutil.which + common fallbacks + python -m)
68+
# _semgrep_bin is either a str (binary path) or list[str] (e.g. ["python", "-m", "semgrep"])
69+
self._semgrep_bin: Optional[Union[str, list[str]]] = self._resolve_semgrep_path()
6870
if not self._semgrep_bin:
6971
logger.warning("Semgrep not installed. Install with: pip install semgrep")
7072

71-
def _resolve_semgrep_path(self) -> Optional[str]:
72-
"""Resolve the full path to the semgrep binary."""
73+
def _resolve_semgrep_path(self) -> Optional[Union[str, list[str]]]:
74+
"""Resolve the semgrep binary path or fall back to ``python -m semgrep``.
75+
76+
Returns:
77+
A string path to the semgrep binary, a list of args for the
78+
``python -m semgrep`` invocation, or ``None`` if semgrep is not
79+
available at all.
80+
"""
81+
# 1. Try PATH lookup first (fastest)
7382
bin_path = shutil.which("semgrep")
7483
if bin_path:
7584
return bin_path
76-
# Fallback: common install locations not always on PATH in subprocesses
77-
for candidate in ["/opt/homebrew/bin/semgrep", "/usr/local/bin/semgrep", "/usr/bin/semgrep"]:
85+
86+
# 2. Fallback: common install locations not always on PATH in containers
87+
for candidate in [
88+
"/usr/local/bin/semgrep",
89+
"/usr/bin/semgrep",
90+
"/opt/homebrew/bin/semgrep",
91+
"/home/agentuser/.local/bin/semgrep",
92+
"/root/.local/bin/semgrep",
93+
str(Path.home() / ".local" / "bin" / "semgrep"),
94+
]:
7895
if Path(candidate).is_file():
7996
return candidate
97+
98+
# 3. Last resort: try python -m semgrep (works when pip-installed but
99+
# the binary is not on PATH, e.g. Docker USER switch)
100+
try:
101+
result = subprocess.run(
102+
[sys.executable, "-m", "semgrep", "--version"],
103+
capture_output=True,
104+
text=True,
105+
timeout=10,
106+
)
107+
if result.returncode == 0:
108+
logger.info(
109+
"Semgrep binary not on PATH; using '%s -m semgrep' fallback",
110+
sys.executable,
111+
)
112+
return [sys.executable, "-m", "semgrep"]
113+
except (subprocess.SubprocessError, FileNotFoundError, OSError):
114+
pass
115+
80116
return None
81117

118+
def _semgrep_cmd_prefix(self) -> list[str]:
119+
"""Return the command prefix for invoking semgrep as a list.
120+
121+
Handles both the binary-path (str) and module-invocation (list) forms
122+
stored in ``self._semgrep_bin``.
123+
"""
124+
if isinstance(self._semgrep_bin, list):
125+
return list(self._semgrep_bin)
126+
return [self._semgrep_bin]
127+
82128
def _check_semgrep_installed(self) -> bool:
83129
"""Check if semgrep is available"""
84130
if not self._semgrep_bin:
85131
return False
86132
try:
87-
result = subprocess.run([self._semgrep_bin, "--version"], capture_output=True, text=True, timeout=5)
133+
cmd = self._semgrep_cmd_prefix() + ["--version"]
134+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
88135
return result.returncode == 0
89-
except (subprocess.SubprocessError, FileNotFoundError):
136+
except (FileNotFoundError, subprocess.SubprocessError, OSError):
90137
return False
91138

92139
def scan(self, target_path: str, output_format: str = "json") -> dict[str, Any]:
@@ -112,8 +159,7 @@ def scan(self, target_path: str, output_format: str = "json") -> dict[str, Any]:
112159
return {"error": "path_not_found", "findings": []}
113160

114161
# Build semgrep command
115-
cmd = [
116-
self._semgrep_bin,
162+
cmd = self._semgrep_cmd_prefix() + [
117163
"--config",
118164
self.semgrep_rules if self.semgrep_rules != "auto" else "p/security-audit",
119165
"--json",
@@ -218,8 +264,11 @@ def _parse_semgrep_output(self, semgrep_output: dict) -> list[SemgrepFinding]:
218264

219265
def _get_semgrep_version(self) -> str:
220266
"""Get Semgrep version"""
267+
if not self._semgrep_bin:
268+
return "unknown"
221269
try:
222-
result = subprocess.run(["semgrep", "--version"], capture_output=True, text=True, timeout=5)
270+
cmd = self._semgrep_cmd_prefix() + ["--version"]
271+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
223272
return result.stdout.strip()
224273
except Exception:
225274
return "unknown"

0 commit comments

Comments
 (0)