1414import logging
1515import shutil
1616import subprocess
17+ import sys
1718from dataclasses import asdict , dataclass
1819from datetime import datetime , timezone
1920from pathlib import Path
20- from typing import Any , Optional
21+ from typing import Any , Optional , Union
2122
2223logging .basicConfig (level = logging .INFO )
2324logger = logging .getLogger (__name__ )
@@ -63,30 +64,76 @@ def __init__(self, config: Optional[dict] = None):
6364 ["*/test/*" , "*/tests/*" , "*/.git/*" , "*/node_modules/*" , "*/.venv/*" , "*/venv/*" , "*/build/*" , "*/dist/*" ],
6465 )
6566
66- # Resolve semgrep binary path (shutil.which + common fallbacks)
67- self ._semgrep_bin = self ._resolve_semgrep_path ()
67+ # Resolve semgrep binary path (shutil.which + common fallbacks + python -m)
68+ # _semgrep_bin is either a str (binary path) or list[str] (e.g. ["python", "-m", "semgrep"])
69+ self ._semgrep_bin : Optional [Union [str , list [str ]]] = self ._resolve_semgrep_path ()
6870 if not self ._semgrep_bin :
6971 logger .warning ("Semgrep not installed. Install with: pip install semgrep" )
7072
71- def _resolve_semgrep_path (self ) -> Optional [str ]:
72- """Resolve the full path to the semgrep binary."""
73+ def _resolve_semgrep_path (self ) -> Optional [Union [str , list [str ]]]:
74+ """Resolve the semgrep binary path or fall back to ``python -m semgrep``.
75+
76+ Returns:
77+ A string path to the semgrep binary, a list of args for the
78+ ``python -m semgrep`` invocation, or ``None`` if semgrep is not
79+ available at all.
80+ """
81+ # 1. Try PATH lookup first (fastest)
7382 bin_path = shutil .which ("semgrep" )
7483 if bin_path :
7584 return bin_path
76- # Fallback: common install locations not always on PATH in subprocesses
77- for candidate in ["/opt/homebrew/bin/semgrep" , "/usr/local/bin/semgrep" , "/usr/bin/semgrep" ]:
85+
86+ # 2. Fallback: common install locations not always on PATH in containers
87+ for candidate in [
88+ "/usr/local/bin/semgrep" ,
89+ "/usr/bin/semgrep" ,
90+ "/opt/homebrew/bin/semgrep" ,
91+ "/home/agentuser/.local/bin/semgrep" ,
92+ "/root/.local/bin/semgrep" ,
93+ str (Path .home () / ".local" / "bin" / "semgrep" ),
94+ ]:
7895 if Path (candidate ).is_file ():
7996 return candidate
97+
98+ # 3. Last resort: try python -m semgrep (works when pip-installed but
99+ # the binary is not on PATH, e.g. Docker USER switch)
100+ try :
101+ result = subprocess .run (
102+ [sys .executable , "-m" , "semgrep" , "--version" ],
103+ capture_output = True ,
104+ text = True ,
105+ timeout = 10 ,
106+ )
107+ if result .returncode == 0 :
108+ logger .info (
109+ "Semgrep binary not on PATH; using '%s -m semgrep' fallback" ,
110+ sys .executable ,
111+ )
112+ return [sys .executable , "-m" , "semgrep" ]
113+ except (subprocess .SubprocessError , FileNotFoundError , OSError ):
114+ pass
115+
80116 return None
81117
118+ def _semgrep_cmd_prefix (self ) -> list [str ]:
119+ """Return the command prefix for invoking semgrep as a list.
120+
121+ Handles both the binary-path (str) and module-invocation (list) forms
122+ stored in ``self._semgrep_bin``.
123+ """
124+ if isinstance (self ._semgrep_bin , list ):
125+ return list (self ._semgrep_bin )
126+ return [self ._semgrep_bin ]
127+
82128 def _check_semgrep_installed (self ) -> bool :
83129 """Check if semgrep is available"""
84130 if not self ._semgrep_bin :
85131 return False
86132 try :
87- result = subprocess .run ([self ._semgrep_bin , "--version" ], capture_output = True , text = True , timeout = 5 )
133+ cmd = self ._semgrep_cmd_prefix () + ["--version" ]
134+ result = subprocess .run (cmd , capture_output = True , text = True , timeout = 10 )
88135 return result .returncode == 0
89- except (subprocess .SubprocessError , FileNotFoundError ):
136+ except (FileNotFoundError , subprocess .SubprocessError , OSError ):
90137 return False
91138
92139 def scan (self , target_path : str , output_format : str = "json" ) -> dict [str , Any ]:
@@ -112,8 +159,7 @@ def scan(self, target_path: str, output_format: str = "json") -> dict[str, Any]:
112159 return {"error" : "path_not_found" , "findings" : []}
113160
114161 # Build semgrep command
115- cmd = [
116- self ._semgrep_bin ,
162+ cmd = self ._semgrep_cmd_prefix () + [
117163 "--config" ,
118164 self .semgrep_rules if self .semgrep_rules != "auto" else "p/security-audit" ,
119165 "--json" ,
@@ -218,8 +264,11 @@ def _parse_semgrep_output(self, semgrep_output: dict) -> list[SemgrepFinding]:
218264
219265 def _get_semgrep_version (self ) -> str :
220266 """Get Semgrep version"""
267+ if not self ._semgrep_bin :
268+ return "unknown"
221269 try :
222- result = subprocess .run (["semgrep" , "--version" ], capture_output = True , text = True , timeout = 5 )
270+ cmd = self ._semgrep_cmd_prefix () + ["--version" ]
271+ result = subprocess .run (cmd , capture_output = True , text = True , timeout = 10 )
223272 return result .stdout .strip ()
224273 except Exception :
225274 return "unknown"
0 commit comments