diff --git a/backend/app/component/environment.py b/backend/app/component/environment.py index 15ee6a5c1..ebed2b7c2 100644 --- a/backend/app/component/environment.py +++ b/backend/app/component/environment.py @@ -7,14 +7,20 @@ import importlib from typing import Any, overload import threading +from utils.path_safety import sanitize_path traceroot_logger = traceroot.get_logger("env") # Thread-local storage for user-specific environment _thread_local = threading.local() +ALLOWED_ENV_ROOT = Path.home().resolve() + + # Default global environment path -default_env_path = os.path.join(os.path.expanduser("~"), ".eigent", ".env") +default_env_path = ( + sanitize_path(os.path.join(os.path.expanduser("~"), ".eigent", ".env"), ALLOWED_ENV_ROOT) or Path.home() +) load_dotenv(dotenv_path=default_env_path) @@ -23,28 +29,35 @@ def set_user_env_path(env_path: str | None = None): Set user-specific environment path for current thread. If env_path is None, uses default global environment. """ - traceroot_logger.info("Setting user environment path", extra={"env_path": env_path, "exists": env_path and os.path.exists(env_path) if env_path else None}) - - if env_path and os.path.exists(env_path): - _thread_local.env_path = env_path + sanitized_path = sanitize_path(env_path, ALLOWED_ENV_ROOT) + traceroot_logger.info( + "Setting user environment path", + extra={"env_path": sanitized_path or env_path, "exists": sanitized_path.exists() if sanitized_path else None}, + ) + + if sanitized_path and sanitized_path.exists(): + _thread_local.env_path = sanitized_path # Load user-specific environment variables - load_dotenv(dotenv_path=env_path, override=True) - traceroot_logger.info("User-specific environment loaded", extra={"env_path": env_path}) + load_dotenv(dotenv_path=sanitized_path, override=True) + traceroot_logger.info("User-specific environment loaded", extra={"env_path": str(sanitized_path)}) else: # Clear thread-local env_path to fall back to global if hasattr(_thread_local, 'env_path'): delattr(_thread_local, 'env_path') traceroot_logger.info("Reset to default global environment") - if env_path and not os.path.exists(env_path): - traceroot_logger.warning("User environment path does not exist, falling back to global", extra={"env_path": env_path}) + if env_path and (not sanitized_path or not (sanitized_path and sanitized_path.exists())): + traceroot_logger.warning( + "User environment path does not exist or is invalid, falling back to global", extra={"env_path": env_path} + ) def get_current_env_path() -> str: """ Get current environment path (either user-specific or default). """ - return getattr(_thread_local, 'env_path', default_env_path) + current = getattr(_thread_local, 'env_path', default_env_path) + return str(current) @overload diff --git a/backend/app/model/chat.py b/backend/app/model/chat.py index fc9010839..ee05e0fbf 100644 --- a/backend/app/model/chat.py +++ b/backend/app/model/chat.py @@ -6,6 +6,7 @@ from pydantic import BaseModel, Field, field_validator from camel.types import ModelType, RoleType from utils import traceroot_wrapper as traceroot +from utils.path_safety import safe_component, sanitize_path logger = traceroot.get_logger("chat_model") @@ -65,6 +66,11 @@ class Chat(BaseModel): extra_params: dict | None = None # For provider-specific parameters like Azure search_config: dict[str, str] | None = None # User-specific search engine configurations (e.g., GOOGLE_API_KEY, SEARCH_ENGINE_ID) + @staticmethod + def _safe_email(email: str) -> str: + """Sanitize email local part for filesystem use.""" + return re.sub(r'[\\/*?:"<>|\s]', "_", email.split("@")[0]).strip(".") + @field_validator("model_type") @classmethod def check_model_type(cls, model_type: str): @@ -85,14 +91,17 @@ def is_cloud(self): return self.api_url is not None and "44.247.171.124" in self.api_url def file_save_path(self, path: str | None = None): - email = re.sub(r'[\\/*?:"<>|\s]', "_", self.email.split("@")[0]).strip(".") + email = self._safe_email(self.email) + project_id = safe_component(self.project_id, "project_id") + task_id = safe_component(self.task_id, "task_id") + allowed_root = (Path.home() / "eigent").resolve() # Use project-based structure: project_{project_id}/task_{task_id} - save_path = Path.home() / "eigent" / email / f"project_{self.project_id}" / f"task_{self.task_id}" - if path is not None: - save_path = save_path / path - save_path.mkdir(parents=True, exist_ok=True) + base_path = allowed_root / email / f"project_{project_id}" / f"task_{task_id}" + target_path = base_path / path if path is not None else base_path + safe_path = sanitize_path(target_path, allowed_root) or target_path.resolve() + safe_path.mkdir(parents=True, exist_ok=True) - return str(save_path) + return str(safe_path) class SupplementChat(BaseModel): diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py index 0ac7969d2..74c0c41e2 100644 --- a/backend/app/service/chat_service.py +++ b/backend/app/service/chat_service.py @@ -43,9 +43,24 @@ from camel.types import ModelPlatformType from camel.models import ModelProcessingError from utils import traceroot_wrapper as traceroot +from utils.path_safety import sanitize_path import os logger = traceroot.get_logger("chat_service") +ALLOWED_WORKDIR_ROOT = (Path.home() / "eigent").resolve() + + +def _normalize_working_directory(path_value: str | Path | None) -> Path | None: + """Normalize and constrain working directory under the allowed root.""" + sanitized = sanitize_path(path_value, ALLOWED_WORKDIR_ROOT) if path_value else None + if sanitized: + return sanitized + if path_value: + logger.warning( + "Rejected working directory outside allowed root or invalid", + extra={"working_directory": str(path_value)}, + ) + return None def format_task_context(task_data: dict, seen_files: set | None = None, skip_files: bool = False) -> str: @@ -66,10 +81,11 @@ def format_task_context(task_data: dict, seen_files: set | None = None, skip_fil # Skip file listing if requested if not skip_files: - working_directory = task_data.get('working_directory') + working_directory_raw = task_data.get('working_directory') + working_directory = _normalize_working_directory(working_directory_raw) if working_directory: try: - if os.path.exists(working_directory): + if working_directory.exists(): generated_files = [] for root, dirs, files in os.walk(working_directory): dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv']] @@ -193,8 +209,10 @@ def build_conversation_context(task_lock: TaskLock, header: str = "=== CONVERSAT if isinstance(entry['content'], dict): formatted_context = format_task_context(entry['content'], skip_files=True) context += formatted_context + "\n\n" - if entry['content'].get('working_directory'): - working_directories.add(entry['content']['working_directory']) + if entry['content'].get('working_directory'): + normalized_path = _normalize_working_directory(entry['content']['working_directory']) + if normalized_path: + working_directories.add(str(normalized_path)) else: context += entry['content'] + "\n" elif entry['role'] == 'assistant': diff --git a/backend/app/utils/file_utils.py b/backend/app/utils/file_utils.py index caceb9847..f87000ffb 100644 --- a/backend/app/utils/file_utils.py +++ b/backend/app/utils/file_utils.py @@ -1,20 +1,39 @@ """File system utilities.""" +from pathlib import Path from app.component.environment import env from app.model.chat import Chat +from utils.path_safety import sanitize_path + + +def _resolve_and_validate_path(path: str | Path, fallback: Path, allowed_root: Path) -> Path: + """ + Resolve a candidate path and ensure it stays within the allowed working directory root. + Falls back to the provided safe path on any validation failure. + """ + sanitized = sanitize_path(path, allowed_root) + return sanitized if sanitized else fallback def get_working_directory(options: Chat, task_lock=None) -> str: """ Get the correct working directory for file operations. - First checks if there's an updated path from improve API call, - then falls back to environment variable or default path. + Uses a sanitized, canonical path based on user/project/task identifiers. """ if not task_lock: from app.service.task import get_task_lock_if_exists task_lock = get_task_lock_if_exists(options.project_id) - + + allowed_root = (Path.home() / "eigent").resolve() + base_path = Path(options.file_save_path()).resolve() + if task_lock and hasattr(task_lock, 'new_folder_path') and task_lock.new_folder_path: - return str(task_lock.new_folder_path) - else: - return env("file_save_path", options.file_save_path()) \ No newline at end of file + safe_path = _resolve_and_validate_path(task_lock.new_folder_path, base_path, allowed_root) + return str(safe_path) + + env_path = env("file_save_path") + if env_path: + safe_path = _resolve_and_validate_path(env_path, base_path, allowed_root) + return str(safe_path) + + return str(base_path) diff --git a/electron/main/index.ts b/electron/main/index.ts index fa62d7fa3..971232d62 100644 --- a/electron/main/index.ts +++ b/electron/main/index.ts @@ -1148,7 +1148,7 @@ async function createWindow() { // Use a dedicated partition for main window to isolate from webviews // This ensures main window's auth data (localStorage) is stored separately and persists across restarts partition: 'persist:main_window', - webSecurity: false, + webSecurity: true, preload, nodeIntegration: true, contextIsolation: true, diff --git a/resources/scripts/install-bun.js b/resources/scripts/install-bun.js index 735d69bf0..888cc4464 100644 --- a/resources/scripts/install-bun.js +++ b/resources/scripts/install-bun.js @@ -148,7 +148,7 @@ function detectPlatformAndArch() { function detectIsMusl() { try { // Simple check for Alpine Linux which uses MUSL - const output = execSync('cat /etc/os-release').toString() + const output = fs.readFileSync('/etc/os-release', 'utf8') return output.toLowerCase().includes('alpine') } catch (error) { return false diff --git a/resources/scripts/install-uv.js b/resources/scripts/install-uv.js index b35889085..882724b2a 100644 --- a/resources/scripts/install-uv.js +++ b/resources/scripts/install-uv.js @@ -160,7 +160,7 @@ function detectPlatformAndArch() { function detectIsMusl() { try { // Simple check for Alpine Linux which uses MUSL - const output = execSync("cat /etc/os-release").toString(); + const output = fs.readFileSync("/etc/os-release", "utf8"); return output.toLowerCase().includes("alpine"); } catch (error) { return false; diff --git a/server/app/controller/mcp/proxy_controller.py b/server/app/controller/mcp/proxy_controller.py index 0ec1a0cfd..5603f4030 100644 --- a/server/app/controller/mcp/proxy_controller.py +++ b/server/app/controller/mcp/proxy_controller.py @@ -20,6 +20,15 @@ def exa_search(search: ExaSearch, key: Key = Depends(key_must)): """Search using Exa API.""" EXA_API_KEY = env_not_empty("EXA_API_KEY") + secrets_to_redact = (EXA_API_KEY,) + + def _redact_secret(text: str) -> str: + redacted = text + for secret in secrets_to_redact: + if secret: + redacted = redacted.replace(secret, "[REDACTED]") + return redacted + try: # Validate input parameters if search.num_results is not None and not 0 < search.num_results <= 100: @@ -81,7 +90,11 @@ def exa_search(search: ExaSearch, key: Key = Depends(key_must)): logger.warning("Exa search validation error", extra={"error": str(e)}) raise HTTPException(status_code=500, detail="Internal server error") except Exception as e: - logger.error("Exa search failed", extra={"query": search.query, "error": str(e)}, exc_info=True) + logger.error( + "Exa search failed", + extra={"query": search.query, "error_type": type(e).__name__, "error": _redact_secret(str(e))}, + exc_info=False, + ) raise HTTPException(status_code=500, detail="Internal server error") @@ -93,6 +106,25 @@ def google_search(query: str, search_type: str = "web", key: Key = Depends(key_m GOOGLE_API_KEY = env_not_empty("GOOGLE_API_KEY") # https://cse.google.com/cse/all SEARCH_ENGINE_ID = env_not_empty("SEARCH_ENGINE_ID") + secrets_to_redact = (GOOGLE_API_KEY, SEARCH_ENGINE_ID) + + def _redact_secret(text: str) -> str: + redacted = text + for secret in secrets_to_redact: + if secret and isinstance(redacted, str): + redacted = redacted.replace(secret, "[REDACTED]") + return redacted + + def _redact_obj(obj): + """Recursively redact secrets from all string fields in a dict/list structure.""" + if isinstance(obj, dict): + return {k: _redact_obj(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [_redact_obj(item) for item in obj] + elif isinstance(obj, str): + return _redact_secret(obj) + else: + return obj # Using the first page start_page_idx = 1 @@ -183,14 +215,27 @@ def google_search(query: str, search_type: str = "web", key: Key = Depends(key_m } responses.append(response) - logger.info("Google search completed", extra={"query": query, "search_type": search_type, "result_count": len(responses)}) + logger.info("Google search completed", extra={"query": _redact_secret(query), "search_type": _redact_secret(search_type), "result_count": len(responses)}) else: error_info = data.get("error", {}) - logger.error("Google search API error", extra={"query": query, "api_error": error_info}) + sanitized_error = _redact_obj(error_info) + logger.error( + "Google search API error", + extra={"query": _redact_secret(query), "search_type": _redact_secret(search_type)}, + ) raise HTTPException(status_code=500, detail="Internal server error") except Exception as e: - logger.error("Google search failed", extra={"query": query, "search_type": search_type, "error": str(e)}, exc_info=True) + logger.error( + "Google search failed", + extra={ + "query": _redact_secret(query), + "search_type": _redact_secret(search_type), + "error_type": type(e).__name__, + "error": _redact_secret(str(e)), + }, + exc_info=False, + ) raise HTTPException(status_code=500, detail="Internal server error") return responses \ No newline at end of file diff --git a/server/app/controller/oauth/oauth_controller.py b/server/app/controller/oauth/oauth_controller.py index c43e50973..438cef525 100644 --- a/server/app/controller/oauth/oauth_controller.py +++ b/server/app/controller/oauth/oauth_controller.py @@ -1,3 +1,4 @@ +from urllib.parse import urlencode, quote from fastapi import APIRouter, Request, HTTPException from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse from app.component.environment import env @@ -34,33 +35,35 @@ def oauth_login(app: str, request: Request, state: Optional[str] = None): logger.error("OAuth login failed", extra={"provider": app, "error": str(e)}, exc_info=True) raise HTTPException(status_code=400, detail="OAuth login failed") - +ALLOWED_OAUTH_PROVIDERS = {"slack", "notion", "x", "googlesuite"} @router.get("/{app}/callback", name="OAuth Callback") @traceroot.trace() def oauth_callback(app: str, request: Request, code: Optional[str] = None, state: Optional[str] = None): """Handle OAuth provider callback and redirect to client app.""" - if not code: - logger.warning("OAuth callback missing code", extra={"provider": app}) - raise HTTPException(status_code=400, detail="Missing code parameter") + import re + CODE_STATE_REGEX = re.compile(r'^[A-Za-z0-9_\-]+$') + from starlette.datastructures import URL + + if app not in ALLOWED_OAUTH_PROVIDERS: + logger.warning("Invalid OAuth provider", extra={"provider": app, "code": code}) + raise HTTPException(status_code=400, detail="Invalid OAuth provider") + if not code or not CODE_STATE_REGEX.match(code): + logger.warning("OAuth callback missing or invalid code", extra={"provider": app, "code": code}) + raise HTTPException(status_code=400, detail="Missing or invalid code parameter") + if state and not CODE_STATE_REGEX.match(state): + logger.warning("OAuth callback invalid state", extra={"provider": app, "state": state}) + raise HTTPException(status_code=400, detail="Invalid state parameter") logger.info("OAuth callback received", extra={"provider": app, "has_state": state is not None}) - - redirect_url = f"eigent://callback/oauth?provider={app}&code={code}&state={state}" - html_content = f""" - - - OAuth Callback - - - -

Redirecting, please wait...

- - - - """ - return HTMLResponse(content=html_content) + + base_url = URL("eigent://callback/oauth") + redirect_url = base_url.include_query_params( + provider=app, + code=code, + state=state or "", + ) + + return RedirectResponse(str(redirect_url)) @router.post("/{app}/token", name="OAuth Fetch Token") diff --git a/server/app/controller/redirect_controller.py b/server/app/controller/redirect_controller.py index 3695a8fb4..a90e20e2e 100644 --- a/server/app/controller/redirect_controller.py +++ b/server/app/controller/redirect_controller.py @@ -1,72 +1,20 @@ -import json -from fastapi import APIRouter, Depends, Request -from fastapi_babel import _ -from fastapi.responses import HTMLResponse - +import re +from fastapi import APIRouter, Request,HTTPException +from fastapi.responses import RedirectResponse +from utils import traceroot_wrapper as traceroot +logger = traceroot.get_logger("server_redirect_controller") router = APIRouter(tags=["Redirect"]) @router.get("/redirect/callback") def redirect_callback(code: str, request: Request): - cookies = request.cookies - cookies_json = json.dumps(cookies) + from starlette.datastructures import URL + + if not re.match(r'^[A-Za-z0-9_-]+$', code): + logger.warning("redirect callback invalid code", extra={"code": code}) + raise HTTPException(status_code=400, detail="Invalid state parameter") - html_content = f""" - - - - - - Authorization successful - - - -
-

Authorization Successful

-

Redirecting to application...

-
Please wait...
-
- - - - """ - return HTMLResponse(content=html_content) + base_url = URL("eigent://callback") + redirect_url = base_url.include_query_params(code=code) + return RedirectResponse(str(redirect_url)) \ No newline at end of file diff --git a/server/app/model/chat/chat_snpshot.py b/server/app/model/chat/chat_snpshot.py index a1cb3a98a..7b39fc717 100644 --- a/server/app/model/chat/chat_snpshot.py +++ b/server/app/model/chat/chat_snpshot.py @@ -1,56 +1,71 @@ -from typing import Optional -from sqlalchemy import Column, Integer, text -from sqlmodel import Field -from app.model.abstract.model import AbstractModel, DefaultTimes -from pydantic import BaseModel -import os -import base64 -import time - -from app.component.sqids import encode_user_id - - -class ChatSnapshot(AbstractModel, DefaultTimes, table=True): - id: int = Field(default=None, primary_key=True) - user_id: int = Field(sa_column=(Column(Integer, server_default=text("0")))) - api_task_id: str = Field(index=True) - camel_task_id: str = Field(index=True) - browser_url: str - image_path: str - - @classmethod - def get_user_dir(cls, user_id: int) -> str: - return os.path.join("app", "public", "upload", encode_user_id(user_id)) - - @classmethod - def caclDir(cls, path: str) -> float: - """Return disk usage of path directory (in MB, rounded to 2 decimal places)""" - total_size = 0 - for dirpath, dirnames, filenames in os.walk(path): - for f in filenames: - fp = os.path.join(dirpath, f) - if os.path.isfile(fp): - total_size += os.path.getsize(fp) - size_mb = total_size / (1024 * 1024) - return round(size_mb, 2) - - -class ChatSnapshotIn(BaseModel): - api_task_id: str - user_id: Optional[int] = None - camel_task_id: str - browser_url: str - image_base64: str - - @staticmethod - def save_image(user_id: int, api_task_id: str, image_base64: str) -> str: - if "," in image_base64: - image_base64 = image_base64.split(",", 1)[1] - user_dir = encode_user_id(user_id) - folder = os.path.join("app", "public", "upload", user_dir, api_task_id) - os.makedirs(folder, exist_ok=True) - filename = f"{int(time.time() * 1000)}.jpg" - file_path = os.path.join(folder, filename) - with open(file_path, "wb") as f: - f.write(base64.b64decode(image_base64)) - return f"/public/upload/{user_dir}/{api_task_id}/{filename}" +from typing import Optional +from sqlalchemy import Column, Integer, text +from sqlmodel import Field +from app.model.abstract.model import AbstractModel, DefaultTimes +from pydantic import BaseModel, field_validator +from pathlib import Path +import os +import base64 +import time + +from app.component.sqids import encode_user_id +from utils.path_safety import safe_component, sanitize_path + + +UPLOAD_ROOT = (Path("app") / "public" / "upload").resolve() + + +class ChatSnapshot(AbstractModel, DefaultTimes, table=True): + id: int = Field(default=None, primary_key=True) + user_id: int = Field(sa_column=(Column(Integer, server_default=text("0")))) + api_task_id: str = Field(index=True) + camel_task_id: str = Field(index=True) + browser_url: str + image_path: str + + @classmethod + def get_user_dir(cls, user_id: int) -> str: + return os.path.join("app", "public", "upload", encode_user_id(user_id)) + + @classmethod + def caclDir(cls, path: str) -> float: + """Return disk usage of path directory (in MB, rounded to 2 decimal places)""" + total_size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + if os.path.isfile(fp): + total_size += os.path.getsize(fp) + size_mb = total_size / (1024 * 1024) + return round(size_mb, 2) + + +class ChatSnapshotIn(BaseModel): + api_task_id: str + user_id: Optional[int] = None + camel_task_id: str + browser_url: str + image_base64: str + + @field_validator("api_task_id", "camel_task_id") + @classmethod + def validate_ids(cls, value: str, info): + return safe_component(value, info.field_name) + + @staticmethod + def save_image(user_id: int, api_task_id: str, image_base64: str) -> str: + if "," in image_base64: + image_base64 = image_base64.split(",", 1)[1] + safe_task_id = safe_component(api_task_id, "api_task_id") + user_dir = encode_user_id(user_id) + folder = sanitize_path(UPLOAD_ROOT / user_dir / safe_task_id, UPLOAD_ROOT) + if folder is None: + raise ValueError("Invalid upload path") + folder.mkdir(parents=True, exist_ok=True) + filename = f"{int(time.time() * 1000)}.jpg" + file_path = sanitize_path(folder / filename, UPLOAD_ROOT) + if file_path is None: + raise ValueError("Invalid upload path") + with open(file_path, "wb") as f: + f.write(base64.b64decode(image_base64)) + return f"/public/upload/{user_dir}/{safe_task_id}/{filename}" diff --git a/src/lib/oauth.ts b/src/lib/oauth.ts index 78f61d1bb..e5de6d87a 100644 --- a/src/lib/oauth.ts +++ b/src/lib/oauth.ts @@ -211,8 +211,17 @@ export class OAuth { async random(size: number) { const mask = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~"; - const randomUints = crypto.getRandomValues(new Uint8Array(size)); - return Array.from(randomUints).map(i => mask[i % mask.length]).join(''); + const maskLength = mask.length; + const out: string[] = []; + const maxUnbiased = 256 - (256 % maskLength); // rejection sampling to avoid modulo bias + + while (out.length < size) { + const byte = crypto.getRandomValues(new Uint8Array(1))[0]; + if (byte >= maxUnbiased) continue; + out.push(mask[byte % maskLength]); + } + + return out.join(''); } } diff --git a/utils/path_safety.py b/utils/path_safety.py new file mode 100644 index 000000000..8f4b4c913 --- /dev/null +++ b/utils/path_safety.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from pathlib import Path +import re +from typing import Pattern + +# Default safe pattern for path components (alphanumeric, underscore, dot, dash) +DEFAULT_SAFE_COMPONENT: Pattern[str] = re.compile(r"^[A-Za-z0-9_.-]+$") + + +def safe_component(value: str, field_name: str, pattern: Pattern[str] = DEFAULT_SAFE_COMPONENT) -> str: + """Validate a single path component against a safe pattern.""" + if not pattern.fullmatch(value): + raise ValueError(f"Invalid characters in {field_name}") + return value + + +def sanitize_path(path_value: str | Path | None, allowed_root: Path) -> Path | None: + """ + Resolve a path and ensure it stays under the allowed_root. + Returns the resolved Path if valid, otherwise None. + """ + if not path_value: + return None + try: + resolved = Path(path_value).expanduser().resolve() + resolved.relative_to(allowed_root) + return resolved + except Exception: + return None