video-analyzer-mcp/server.py at master · FP-sudo/video-analyzer-mcp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""Video Analyzer MCP Server.

Provides tools for video metadata extraction, frame capture,
audio transcription, and optional Gemini-based video analysis.
"""

import shutil
from pathlib import Path

from fastmcp import FastMCP

from video_utils import get_video_info as _get_video_info
from video_utils import extract_frames as _extract_frames
from transcription import transcribe_video as _transcribe_video
from gemini_analyzer import analyze_video_with_gemini as _analyze_gemini

mcp = FastMCP(
    "video-analyzer",
    instructions=(
        "Video analysis tools: extract metadata, frames, transcriptions, "
        "and optionally analyze with Gemini. Use extract_frames to get images, "
        "then Read tool to view them."
    ),
)


@mcp.tool()
def get_video_info(video_path: str) -> dict:
    """Get video metadata (duration, resolution, codec, fps, etc.).

    Args:
        video_path: Absolute path to the video file.

    Returns:
        Dict with file_size_mb, duration_seconds, format_name,
        video codec/resolution/fps, and audio codec/sample_rate.
    """
    return _get_video_info(video_path)


@mcp.tool()
def extract_frames(
    video_path: str,
    fps: float = 1.0,
    max_frames: int = 20,
    quality: int = 5,
    output_dir: str | None = None,
) -> dict:
    """Extract frames from video as JPEG images.

    Frames are saved to disk. Use Claude Code's Read tool to view them.

    Args:
        video_path: Absolute path to the video file.
        fps: Frames per second to extract (default: 1.0).
        max_frames: Maximum number of frames (default: 20).
        quality: JPEG quality 2=best, 31=worst (default: 5).
        output_dir: Directory to save frames. Auto-creates temp dir if omitted.

    Returns:
        Dict with output_dir, frame_count, and list of absolute frame_paths.
    """
    return _extract_frames(
        video_path,
        fps=fps,
        max_frames=max_frames,
        quality=quality,
        output_dir=output_dir,
    )


@mcp.tool()
def transcribe_video(
    video_path: str,
    model: str = "base",
    language: str | None = None,
) -> dict:
    """Transcribe video audio using Whisper.

    Extracts audio track and runs Whisper speech-to-text.

    Args:
        video_path: Absolute path to the video file.
        model: Whisper model (tiny/base/small/medium/large). Default: base.
        language: Language code (e.g. 'ja', 'en'). Auto-detect if omitted.

    Returns:
        Dict with full text, segments (start/end/text), and detected language.
    """
    return _transcribe_video(video_path, model=model, language=language)


@mcp.tool()
def analyze_video_with_gemini(
    video_path: str,
    prompt: str = "Describe this video in detail.",
    model: str = "gemini-2.0-flash",
) -> dict:
    """Analyze video using Google Gemini API (optional).

    Requires GEMINI_API_KEY environment variable. Uploads the video
    to Gemini for multimodal analysis.

    Args:
        video_path: Absolute path to the video file.
        prompt: Analysis prompt to send with the video.
        model: Gemini model to use (default: gemini-2.0-flash).

    Returns:
        Dict with analysis text, or error if Gemini is not configured.
    """
    return _analyze_gemini(video_path, prompt=prompt, model=model)


@mcp.tool()
def cleanup_frames(directory: str) -> dict:
    """Delete temporary frame extraction directory.

    Use this to clean up after extract_frames when you no longer need the images.

    Args:
        directory: Path to the directory to delete (from extract_frames output_dir).

    Returns:
        Dict with status and deleted path.
    """
    path = Path(directory).expanduser().resolve()

    # Safety: only delete directories with our prefix or explicitly under /tmp
    dir_name = path.name
    is_safe = (
        dir_name.startswith("video-analyzer-")
        or str(path).startswith("/tmp/")
        or str(path).startswith("/var/folders/")
    )

    if not is_safe:
        return {
            "status": "refused",
            "reason": f"Safety check: will only delete directories starting with "
                      f"'video-analyzer-' or under /tmp. Got: {path}",
        }

    if not path.exists():
        return {"status": "not_found", "path": str(path)}

    if not path.is_dir():
        return {"status": "error", "reason": f"Not a directory: {path}"}

    shutil.rmtree(path)
    return {"status": "deleted", "path": str(path)}


if __name__ == "__main__":
    mcp.run()