Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions astrbot/core/backup/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ def _get_major_version(version_str: str) -> str:
return "0.0"


def _validate_path_within(target_path: Path, base_dir: Path) -> bool:
"""Validate that target_path is within base_dir after resolving symlinks.

Prevents path traversal attacks (CWE-22) by ensuring the resolved
target path is relative to the resolved base directory.
"""
try:
resolved = target_path.resolve(strict=False)
base_resolved = base_dir.resolve(strict=False)
return resolved.is_relative_to(base_resolved)
Comment on lines +62 to +71
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Consider Python version compatibility and explicit strict behavior in _validate_path_within.

Path.is_relative_to was added in Python 3.9; if we still support 3.8 or earlier, this will raise at runtime. Also, Path.resolve() currently relies on the implicit strict=False default—please pass strict=False explicitly so the behavior is clear and stable if the default ever changes.

except (OSError, ValueError):
Comment on lines +68 to +72
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚨 suggestion (security): Broaden exception handling here to cover potential symlink loop errors

Path.resolve can raise RuntimeError on symlink loops. Since this helper is meant to harden against malicious paths, consider adding RuntimeError to the caught exceptions so these cases return False instead of propagating and potentially aborting the import.

Suggested implementation:

    try:
        resolved = target_path.resolve(strict=False)
        base_resolved = base_dir.resolve(strict=False)
        return resolved.is_relative_to(base_resolved)
    except (OSError, ValueError, RuntimeError):
        return False

return False
Comment on lines +62 to +73
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The current implementation of _validate_path_within uses string manipulation and os.sep to verify path boundaries. While functional, using pathlib.Path.relative_to is more idiomatic and robust. It correctly handles edge cases like the root directory (where str(base_resolved) + os.sep might result in // on Unix, causing startswith to fail) and is more readable.

Additionally, note that base_dir.resolve() is called every time this function is invoked. Since the base directory is constant within each import loop (e.g., kb_dir, attachments_dir), you could further optimize performance by resolving the base directory once in the caller and passing it to this validator.

Suggested change
def _validate_path_within(target_path: Path, base_dir: Path) -> bool:
"""Validate that target_path is within base_dir after resolving symlinks.
Prevents path traversal attacks (CWE-22) by ensuring the resolved
target path starts with the resolved base directory.
"""
try:
resolved = target_path.resolve()
base_resolved = base_dir.resolve()
return resolved == base_resolved or str(resolved).startswith(str(base_resolved) + os.sep)
except (OSError, ValueError):
return False
def _validate_path_within(target_path: Path, base_dir: Path) -> bool:
"""Validate that target_path is within base_dir after resolving symlinks.
Prevents path traversal attacks (CWE-22) by ensuring the resolved
target path is a subpath of the resolved base directory.
"""
try:
resolved = target_path.resolve()
base_resolved = base_dir.resolve()
resolved.relative_to(base_resolved)
return True
except (OSError, ValueError):
return False



CMD_CONFIG_FILE_PATH = os.path.join(get_astrbot_data_path(), "cmd_config.json")
KB_PATH = get_astrbot_knowledge_base_path()
DEFAULT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT = 5
Expand Down Expand Up @@ -765,6 +779,10 @@ async def _import_knowledge_bases(
try:
rel_path = name[len(media_prefix) :]
target_path = kb_dir / rel_path
# Validate path is within kb directory (CWE-22)
if not _validate_path_within(target_path, kb_dir):
logger.warning(f"媒体文件路径越界,已跳过: {target_path}")
continue
target_path.parent.mkdir(parents=True, exist_ok=True)
with zf.open(name) as src, open(target_path, "wb") as dst:
dst.write(src.read())
Expand Down Expand Up @@ -827,6 +845,11 @@ async def _import_attachments(
else:
target_path = attachments_dir / os.path.basename(name)

# Validate path is within attachments directory (CWE-22)
if not _validate_path_within(target_path, attachments_dir):
logger.warning(f"附件路径越界,已跳过: {target_path}")
continue

target_path.parent.mkdir(parents=True, exist_ok=True)
with zf.open(name) as src, open(target_path, "wb") as dst:
dst.write(src.read())
Expand Down Expand Up @@ -904,6 +927,10 @@ async def _import_directories(
continue

target_path = target_dir / rel_path
# Validate path is within target directory (CWE-22)
if not _validate_path_within(target_path, target_dir):
result.add_warning(f"文件路径越界,已跳过: {name}")
continue
target_path.parent.mkdir(parents=True, exist_ok=True)

with zf.open(name) as src, open(target_path, "wb") as dst:
Expand Down
Loading