From 61d88ba2e0ea09acf52ad736d8f5c64904aaa2af Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 05:24:39 +0000 Subject: [PATCH 1/3] Add LLM RAG + MCP server system for BrownserverN5 Complete self-hosted system for document RAG with MCP server access: - FastAPI backend with ChromaDB vector store and sentence-transformers - MCP server (SSE + Streamable HTTP) with API key authentication - React web UI for document management, search, SMB browsing, and config - Docker Compose orchestration for Unraid deployment - SMB share browsing and document ingestion from LAN shares https://claude.ai/code/session_01ByAJeYptU8ZosBaBVw1rbd --- rag-mcp-server/.dockerignore | 7 + rag-mcp-server/.env.example | 15 + rag-mcp-server/.gitignore | 8 + rag-mcp-server/backend/Dockerfile | 19 + rag-mcp-server/backend/app/__init__.py | 0 rag-mcp-server/backend/app/config.py | 44 ++ rag-mcp-server/backend/app/main.py | 31 ++ rag-mcp-server/backend/app/models/__init__.py | 0 rag-mcp-server/backend/app/models/schemas.py | 89 ++++ .../backend/app/routers/__init__.py | 0 rag-mcp-server/backend/app/routers/admin.py | 77 ++++ .../backend/app/routers/documents.py | 110 +++++ rag-mcp-server/backend/app/routers/smb.py | 107 +++++ .../backend/app/services/__init__.py | 0 rag-mcp-server/backend/app/services/auth.py | 73 ++++ .../backend/app/services/document_parser.py | 83 ++++ .../backend/app/services/rag_engine.py | 145 +++++++ .../backend/app/services/smb_browser.py | 106 +++++ rag-mcp-server/backend/requirements.txt | 17 + rag-mcp-server/data/documents/.gitkeep | 0 rag-mcp-server/deploy.sh | 52 +++ rag-mcp-server/docker-compose.yml | 58 +++ rag-mcp-server/frontend/Dockerfile | 17 + rag-mcp-server/frontend/index.html | 13 + rag-mcp-server/frontend/nginx.conf | 30 ++ rag-mcp-server/frontend/package.json | 23 + rag-mcp-server/frontend/src/App.jsx | 54 +++ rag-mcp-server/frontend/src/index.css | 409 ++++++++++++++++++ rag-mcp-server/frontend/src/main.jsx | 10 + rag-mcp-server/frontend/src/pages/APIKeys.jsx | 154 +++++++ .../frontend/src/pages/Dashboard.jsx | 85 ++++ .../frontend/src/pages/Documents.jsx | 201 +++++++++ .../frontend/src/pages/MCPConfig.jsx | 152 +++++++ .../frontend/src/pages/SMBBrowser.jsx | 171 ++++++++ rag-mcp-server/frontend/src/pages/Search.jsx | 98 +++++ rag-mcp-server/frontend/src/services/api.js | 74 ++++ rag-mcp-server/frontend/vite.config.js | 11 + rag-mcp-server/mcp_server/Dockerfile | 12 + rag-mcp-server/mcp_server/requirements.txt | 5 + rag-mcp-server/mcp_server/server.py | 378 ++++++++++++++++ rag-mcp-server/stop.sh | 4 + 41 files changed, 2942 insertions(+) create mode 100644 rag-mcp-server/.dockerignore create mode 100644 rag-mcp-server/.env.example create mode 100644 rag-mcp-server/.gitignore create mode 100644 rag-mcp-server/backend/Dockerfile create mode 100644 rag-mcp-server/backend/app/__init__.py create mode 100644 rag-mcp-server/backend/app/config.py create mode 100644 rag-mcp-server/backend/app/main.py create mode 100644 rag-mcp-server/backend/app/models/__init__.py create mode 100644 rag-mcp-server/backend/app/models/schemas.py create mode 100644 rag-mcp-server/backend/app/routers/__init__.py create mode 100644 rag-mcp-server/backend/app/routers/admin.py create mode 100644 rag-mcp-server/backend/app/routers/documents.py create mode 100644 rag-mcp-server/backend/app/routers/smb.py create mode 100644 rag-mcp-server/backend/app/services/__init__.py create mode 100644 rag-mcp-server/backend/app/services/auth.py create mode 100644 rag-mcp-server/backend/app/services/document_parser.py create mode 100644 rag-mcp-server/backend/app/services/rag_engine.py create mode 100644 rag-mcp-server/backend/app/services/smb_browser.py create mode 100644 rag-mcp-server/backend/requirements.txt create mode 100644 rag-mcp-server/data/documents/.gitkeep create mode 100755 rag-mcp-server/deploy.sh create mode 100644 rag-mcp-server/docker-compose.yml create mode 100644 rag-mcp-server/frontend/Dockerfile create mode 100644 rag-mcp-server/frontend/index.html create mode 100644 rag-mcp-server/frontend/nginx.conf create mode 100644 rag-mcp-server/frontend/package.json create mode 100644 rag-mcp-server/frontend/src/App.jsx create mode 100644 rag-mcp-server/frontend/src/index.css create mode 100644 rag-mcp-server/frontend/src/main.jsx create mode 100644 rag-mcp-server/frontend/src/pages/APIKeys.jsx create mode 100644 rag-mcp-server/frontend/src/pages/Dashboard.jsx create mode 100644 rag-mcp-server/frontend/src/pages/Documents.jsx create mode 100644 rag-mcp-server/frontend/src/pages/MCPConfig.jsx create mode 100644 rag-mcp-server/frontend/src/pages/SMBBrowser.jsx create mode 100644 rag-mcp-server/frontend/src/pages/Search.jsx create mode 100644 rag-mcp-server/frontend/src/services/api.js create mode 100644 rag-mcp-server/frontend/vite.config.js create mode 100644 rag-mcp-server/mcp_server/Dockerfile create mode 100644 rag-mcp-server/mcp_server/requirements.txt create mode 100644 rag-mcp-server/mcp_server/server.py create mode 100755 rag-mcp-server/stop.sh diff --git a/rag-mcp-server/.dockerignore b/rag-mcp-server/.dockerignore new file mode 100644 index 00000000..173d7515 --- /dev/null +++ b/rag-mcp-server/.dockerignore @@ -0,0 +1,7 @@ +node_modules/ +dist/ +.git +.env +data/ +__pycache__/ +*.pyc diff --git a/rag-mcp-server/.env.example b/rag-mcp-server/.env.example new file mode 100644 index 00000000..e8b79d03 --- /dev/null +++ b/rag-mcp-server/.env.example @@ -0,0 +1,15 @@ +# API Key for external LLM access to the MCP server +# Generate with: python3 -c "import secrets; print(secrets.token_urlsafe(32))" +MCP_API_KEY=your-api-key-here + +# SMB Configuration (for accessing LAN file shares) +SMB_DEFAULT_USERNAME=guest +SMB_DEFAULT_PASSWORD= +SMB_DEFAULT_DOMAIN=WORKGROUP + +# Embedding model (sentence-transformers model name) +EMBEDDING_MODEL=all-MiniLM-L6-v2 + +# Server hostname (for display in UI) +SERVER_HOSTNAME=BrownserverN5 +SERVER_IP=192.168.1.52 diff --git a/rag-mcp-server/.gitignore b/rag-mcp-server/.gitignore new file mode 100644 index 00000000..d3302ff9 --- /dev/null +++ b/rag-mcp-server/.gitignore @@ -0,0 +1,8 @@ +node_modules/ +dist/ +.env +data/chromadb/ +data/config/ +__pycache__/ +*.pyc +.venv/ diff --git a/rag-mcp-server/backend/Dockerfile b/rag-mcp-server/backend/Dockerfile new file mode 100644 index 00000000..0e148e1b --- /dev/null +++ b/rag-mcp-server/backend/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Pre-download the embedding model +RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" + +COPY . . + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/rag-mcp-server/backend/app/__init__.py b/rag-mcp-server/backend/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/backend/app/config.py b/rag-mcp-server/backend/app/config.py new file mode 100644 index 00000000..e42b20cf --- /dev/null +++ b/rag-mcp-server/backend/app/config.py @@ -0,0 +1,44 @@ +import json +import os +from pathlib import Path + +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + chroma_persist_dir: str = "/app/data/chromadb" + documents_dir: str = "/app/data/documents" + config_dir: str = "/app/data/config" + embedding_model: str = "all-MiniLM-L6-v2" + server_hostname: str = "BrownserverN5" + server_ip: str = "192.168.1.52" + + class Config: + env_file = ".env" + + +settings = Settings() + +CONFIG_FILE = Path(settings.config_dir) / "server_config.json" + + +def _ensure_config(): + Path(settings.config_dir).mkdir(parents=True, exist_ok=True) + if not CONFIG_FILE.exists(): + default = { + "api_keys": [], + "smb_shares": [], + "collections": ["default"], + "mcp_enabled": True, + } + CONFIG_FILE.write_text(json.dumps(default, indent=2)) + + +def load_config() -> dict: + _ensure_config() + return json.loads(CONFIG_FILE.read_text()) + + +def save_config(config: dict): + _ensure_config() + CONFIG_FILE.write_text(json.dumps(config, indent=2)) diff --git a/rag-mcp-server/backend/app/main.py b/rag-mcp-server/backend/app/main.py new file mode 100644 index 00000000..788fd077 --- /dev/null +++ b/rag-mcp-server/backend/app/main.py @@ -0,0 +1,31 @@ +import logging + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.routers import admin, documents, smb + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s") + +app = FastAPI( + title="RAG MCP Server - Backend API", + description="Document RAG engine with MCP server for BrownserverN5", + version="1.0.0", +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(documents.router) +app.include_router(smb.router) +app.include_router(admin.router) + + +@app.get("/health") +async def health(): + return {"status": "ok", "service": "rag-backend"} diff --git a/rag-mcp-server/backend/app/models/__init__.py b/rag-mcp-server/backend/app/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/backend/app/models/schemas.py b/rag-mcp-server/backend/app/models/schemas.py new file mode 100644 index 00000000..a10aa8cc --- /dev/null +++ b/rag-mcp-server/backend/app/models/schemas.py @@ -0,0 +1,89 @@ +from typing import Optional + +from pydantic import BaseModel + + +class DocumentUpload(BaseModel): + collection: str = "default" + + +class QueryRequest(BaseModel): + query: str + collection: str = "default" + n_results: int = 5 + + +class QueryResult(BaseModel): + content: str + source: str + score: float + metadata: dict + + +class QueryResponse(BaseModel): + results: list[QueryResult] + query: str + + +class CollectionInfo(BaseModel): + name: str + document_count: int + + +class SMBShareConfig(BaseModel): + name: str + server: str + share: str + username: str = "guest" + password: str = "" + domain: str = "WORKGROUP" + path: str = "/" + + +class SMBBrowseRequest(BaseModel): + server: str + share: str + path: str = "/" + username: str = "guest" + password: str = "" + domain: str = "WORKGROUP" + + +class SMBFileEntry(BaseModel): + name: str + is_directory: bool + size: int + last_modified: str + + +class APIKeyCreate(BaseModel): + name: str + description: str = "" + + +class APIKeyResponse(BaseModel): + name: str + key_prefix: str + description: str + created_at: str + active: bool + + +class IngestSMBRequest(BaseModel): + server: str + share: str + path: str + username: str = "guest" + password: str = "" + domain: str = "WORKGROUP" + collection: str = "default" + recursive: bool = True + + +class ServerStatus(BaseModel): + hostname: str + ip: str + mcp_enabled: bool + total_documents: int + collections: list[str] + api_keys_count: int diff --git a/rag-mcp-server/backend/app/routers/__init__.py b/rag-mcp-server/backend/app/routers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/backend/app/routers/admin.py b/rag-mcp-server/backend/app/routers/admin.py new file mode 100644 index 00000000..c1453b4a --- /dev/null +++ b/rag-mcp-server/backend/app/routers/admin.py @@ -0,0 +1,77 @@ +import logging + +from fastapi import APIRouter, HTTPException + +from app.config import load_config, save_config, settings +from app.models.schemas import APIKeyCreate, APIKeyResponse, ServerStatus +from app.services import auth, rag_engine + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/admin", tags=["admin"]) + + +@router.get("/status", response_model=ServerStatus) +async def get_status(): + config = load_config() + collections = rag_engine.list_collections() + total_docs = sum(c["document_count"] for c in collections) + return ServerStatus( + hostname=settings.server_hostname, + ip=settings.server_ip, + mcp_enabled=config.get("mcp_enabled", True), + total_documents=total_docs, + collections=[c["name"] for c in collections], + api_keys_count=len([k for k in config.get("api_keys", []) if k.get("active", True)]), + ) + + +@router.post("/api-keys") +async def create_api_key(req: APIKeyCreate): + result = auth.create_api_key(req.name, req.description) + return { + "name": result["name"], + "key": result["raw_key"], + "key_prefix": result["key_prefix"], + "description": result["description"], + "created_at": result["created_at"], + "message": "Save this key - it cannot be retrieved again", + } + + +@router.get("/api-keys", response_model=list[APIKeyResponse]) +async def list_api_keys(): + return auth.list_api_keys() + + +@router.delete("/api-keys/{name}") +async def delete_api_key(name: str): + if auth.delete_api_key(name): + return {"deleted": True, "name": name} + raise HTTPException(404, f"API key not found: {name}") + + +@router.post("/api-keys/{name}/revoke") +async def revoke_api_key(name: str): + if auth.revoke_api_key(name): + return {"revoked": True, "name": name} + raise HTTPException(404, f"API key not found: {name}") + + +@router.post("/mcp/toggle") +async def toggle_mcp(enabled: bool = True): + config = load_config() + config["mcp_enabled"] = enabled + save_config(config) + return {"mcp_enabled": enabled} + + +@router.get("/config") +async def get_config(): + config = load_config() + # Strip sensitive data + safe_config = {**config} + safe_config["api_keys"] = [ + {"name": k["name"], "key_prefix": k["key_prefix"], "active": k.get("active", True)} + for k in config.get("api_keys", []) + ] + return safe_config diff --git a/rag-mcp-server/backend/app/routers/documents.py b/rag-mcp-server/backend/app/routers/documents.py new file mode 100644 index 00000000..41256d92 --- /dev/null +++ b/rag-mcp-server/backend/app/routers/documents.py @@ -0,0 +1,110 @@ +import logging +import os +import shutil +from pathlib import Path + +from fastapi import APIRouter, File, Form, HTTPException, UploadFile + +from app.config import settings +from app.models.schemas import QueryRequest, QueryResponse, QueryResult +from app.services import rag_engine +from app.services.document_parser import can_parse, parse_file + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/documents", tags=["documents"]) + + +@router.post("/upload") +async def upload_document( + file: UploadFile = File(...), + collection: str = Form("default"), +): + if not can_parse(file.filename): + raise HTTPException(400, f"Unsupported file type: {file.filename}") + + content = await file.read() + text = parse_file(content=content, filename=file.filename) + if not text.strip(): + raise HTTPException(400, "Could not extract text from file") + + # Save file to disk + save_dir = Path(settings.documents_dir) / collection + save_dir.mkdir(parents=True, exist_ok=True) + save_path = save_dir / file.filename + save_path.write_bytes(content) + + chunks = rag_engine.ingest_text(text, source=file.filename, collection_name=collection) + return {"filename": file.filename, "collection": collection, "chunks_created": chunks} + + +@router.post("/query", response_model=QueryResponse) +async def query_documents(req: QueryRequest): + results = rag_engine.query(req.query, collection_name=req.collection, n_results=req.n_results) + return QueryResponse( + results=[QueryResult(**r) for r in results], + query=req.query, + ) + + +@router.get("/list") +async def list_documents(collection: str = "default"): + sources = rag_engine.list_documents(collection) + return {"collection": collection, "documents": sources} + + +@router.delete("/{filename}") +async def delete_document(filename: str, collection: str = "default"): + deleted = rag_engine.delete_document(filename, collection) + # Also remove from disk + file_path = Path(settings.documents_dir) / collection / filename + if file_path.exists(): + file_path.unlink() + return {"deleted_chunks": deleted, "filename": filename} + + +@router.post("/reindex") +async def reindex_collection(collection: str = "default"): + doc_dir = Path(settings.documents_dir) / collection + if not doc_dir.exists(): + raise HTTPException(404, f"No documents directory for collection: {collection}") + + # Delete existing collection and re-ingest + try: + rag_engine.delete_collection(collection) + except Exception: + pass + + total_chunks = 0 + files_processed = 0 + for file_path in doc_dir.iterdir(): + if file_path.is_file() and can_parse(file_path.name): + text = parse_file(file_path=str(file_path)) + if text.strip(): + chunks = rag_engine.ingest_text(text, source=file_path.name, collection_name=collection) + total_chunks += chunks + files_processed += 1 + + return { + "collection": collection, + "files_processed": files_processed, + "total_chunks": total_chunks, + } + + +@router.get("/collections") +async def list_collections(): + return {"collections": rag_engine.list_collections()} + + +@router.post("/collections/{name}") +async def create_collection(name: str): + rag_engine.get_or_create_collection(name) + return {"name": name, "created": True} + + +@router.delete("/collections/{name}") +async def delete_collection(name: str): + if name == "default": + raise HTTPException(400, "Cannot delete default collection") + rag_engine.delete_collection(name) + return {"name": name, "deleted": True} diff --git a/rag-mcp-server/backend/app/routers/smb.py b/rag-mcp-server/backend/app/routers/smb.py new file mode 100644 index 00000000..8c5d2525 --- /dev/null +++ b/rag-mcp-server/backend/app/routers/smb.py @@ -0,0 +1,107 @@ +import logging + +from fastapi import APIRouter, HTTPException + +from app.models.schemas import IngestSMBRequest, SMBBrowseRequest, SMBFileEntry +from app.services import rag_engine, smb_browser +from app.services.document_parser import can_parse, parse_file + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/smb", tags=["smb"]) + + +@router.post("/browse", response_model=list[SMBFileEntry]) +async def browse_smb(req: SMBBrowseRequest): + try: + entries = smb_browser.browse_share( + server=req.server, + share=req.share, + path=req.path, + username=req.username, + password=req.password, + domain=req.domain, + ) + return entries + except Exception as e: + raise HTTPException(500, f"SMB browse failed: {str(e)}") + + +@router.post("/shares") +async def list_shares(server: str, username: str = "guest", password: str = "", domain: str = "WORKGROUP"): + try: + shares = smb_browser.list_shares(server, username, password, domain) + return {"server": server, "shares": shares} + except Exception as e: + raise HTTPException(500, f"Failed to list shares: {str(e)}") + + +@router.post("/ingest") +async def ingest_from_smb(req: IngestSMBRequest): + try: + entries = smb_browser.browse_share( + server=req.server, + share=req.share, + path=req.path, + username=req.username, + password=req.password, + domain=req.domain, + ) + except Exception as e: + raise HTTPException(500, f"SMB browse failed: {str(e)}") + + total_chunks = 0 + files_processed = 0 + errors = [] + + for entry in entries: + if entry["is_directory"]: + if req.recursive: + # Recursively ingest subdirectories + sub_path = f"{req.path.rstrip('/')}/{entry['name']}" + try: + sub_req = IngestSMBRequest( + server=req.server, + share=req.share, + path=sub_path, + username=req.username, + password=req.password, + domain=req.domain, + collection=req.collection, + recursive=True, + ) + result = await ingest_from_smb(sub_req) + total_chunks += result["total_chunks"] + files_processed += result["files_processed"] + except Exception as e: + errors.append(f"{sub_path}: {str(e)}") + continue + + if not can_parse(entry["name"]): + continue + + file_path = f"{req.path.rstrip('/')}/{entry['name']}" + try: + content = smb_browser.read_file( + server=req.server, + share=req.share, + path=file_path, + username=req.username, + password=req.password, + domain=req.domain, + ) + text = parse_file(content=content, filename=entry["name"]) + if text.strip(): + source = f"smb://{req.server}/{req.share}{file_path}" + chunks = rag_engine.ingest_text( + text, source=source, collection_name=req.collection + ) + total_chunks += chunks + files_processed += 1 + except Exception as e: + errors.append(f"{file_path}: {str(e)}") + + return { + "files_processed": files_processed, + "total_chunks": total_chunks, + "errors": errors, + } diff --git a/rag-mcp-server/backend/app/services/__init__.py b/rag-mcp-server/backend/app/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/backend/app/services/auth.py b/rag-mcp-server/backend/app/services/auth.py new file mode 100644 index 00000000..c9cd6a0e --- /dev/null +++ b/rag-mcp-server/backend/app/services/auth.py @@ -0,0 +1,73 @@ +import hashlib +import secrets +from datetime import datetime, timezone + +from app.config import load_config, save_config + + +def generate_api_key() -> str: + return f"rmcp_{secrets.token_urlsafe(32)}" + + +def hash_key(key: str) -> str: + return hashlib.sha256(key.encode()).hexdigest() + + +def create_api_key(name: str, description: str = "") -> dict: + config = load_config() + raw_key = generate_api_key() + key_entry = { + "name": name, + "key_hash": hash_key(raw_key), + "key_prefix": raw_key[:12] + "...", + "description": description, + "created_at": datetime.now(timezone.utc).isoformat(), + "active": True, + } + config.setdefault("api_keys", []).append(key_entry) + save_config(config) + return {"raw_key": raw_key, **key_entry} + + +def validate_api_key(key: str) -> bool: + config = load_config() + hashed = hash_key(key) + for entry in config.get("api_keys", []): + if entry["key_hash"] == hashed and entry.get("active", True): + return True + return False + + +def list_api_keys() -> list[dict]: + config = load_config() + return [ + { + "name": e["name"], + "key_prefix": e["key_prefix"], + "description": e.get("description", ""), + "created_at": e["created_at"], + "active": e.get("active", True), + } + for e in config.get("api_keys", []) + ] + + +def revoke_api_key(name: str) -> bool: + config = load_config() + for entry in config.get("api_keys", []): + if entry["name"] == name: + entry["active"] = False + save_config(config) + return True + return False + + +def delete_api_key(name: str) -> bool: + config = load_config() + keys = config.get("api_keys", []) + original_len = len(keys) + config["api_keys"] = [e for e in keys if e["name"] != name] + if len(config["api_keys"]) < original_len: + save_config(config) + return True + return False diff --git a/rag-mcp-server/backend/app/services/document_parser.py b/rag-mcp-server/backend/app/services/document_parser.py new file mode 100644 index 00000000..1cd8639b --- /dev/null +++ b/rag-mcp-server/backend/app/services/document_parser.py @@ -0,0 +1,83 @@ +import logging +from io import BytesIO +from pathlib import Path + +import chardet + +logger = logging.getLogger(__name__) + +SUPPORTED_EXTENSIONS = { + ".txt", ".md", ".py", ".js", ".ts", ".json", ".yaml", ".yml", + ".xml", ".html", ".css", ".csv", ".log", ".cfg", ".ini", ".conf", + ".sh", ".bash", ".zsh", ".bat", ".ps1", ".sql", ".r", ".go", + ".java", ".c", ".cpp", ".h", ".hpp", ".rs", ".toml", ".zig", + ".pdf", ".docx", ".xlsx", +} + + +def can_parse(filename: str) -> bool: + return Path(filename).suffix.lower() in SUPPORTED_EXTENSIONS + + +def parse_file(file_path: str | None = None, content: bytes | None = None, filename: str = "") -> str: + if file_path: + filename = filename or file_path + with open(file_path, "rb") as f: + content = f.read() + + if content is None: + return "" + + ext = Path(filename).suffix.lower() + + try: + if ext == ".pdf": + return _parse_pdf(content) + elif ext == ".docx": + return _parse_docx(content) + elif ext == ".xlsx": + return _parse_xlsx(content) + else: + return _parse_text(content) + except Exception as e: + logger.error(f"Failed to parse {filename}: {e}") + return "" + + +def _parse_text(content: bytes) -> str: + detected = chardet.detect(content) + encoding = detected.get("encoding", "utf-8") or "utf-8" + try: + return content.decode(encoding) + except (UnicodeDecodeError, LookupError): + return content.decode("utf-8", errors="replace") + + +def _parse_pdf(content: bytes) -> str: + from PyPDF2 import PdfReader + reader = PdfReader(BytesIO(content)) + text_parts = [] + for page in reader.pages: + text = page.extract_text() + if text: + text_parts.append(text) + return "\n\n".join(text_parts) + + +def _parse_docx(content: bytes) -> str: + from docx import Document + doc = Document(BytesIO(content)) + return "\n\n".join(p.text for p in doc.paragraphs if p.text.strip()) + + +def _parse_xlsx(content: bytes) -> str: + from openpyxl import load_workbook + wb = load_workbook(BytesIO(content), read_only=True) + text_parts = [] + for sheet in wb.sheetnames: + ws = wb[sheet] + text_parts.append(f"--- Sheet: {sheet} ---") + for row in ws.iter_rows(values_only=True): + cells = [str(c) if c is not None else "" for c in row] + text_parts.append("\t".join(cells)) + return "\n".join(text_parts) diff --git a/rag-mcp-server/backend/app/services/rag_engine.py b/rag-mcp-server/backend/app/services/rag_engine.py new file mode 100644 index 00000000..2f626222 --- /dev/null +++ b/rag-mcp-server/backend/app/services/rag_engine.py @@ -0,0 +1,145 @@ +import hashlib +import logging +import os +from pathlib import Path + +import chromadb +from chromadb.config import Settings as ChromaSettings +from sentence_transformers import SentenceTransformer + +from app.config import settings + +logger = logging.getLogger(__name__) + +_embedding_model: SentenceTransformer | None = None +_chroma_client: chromadb.ClientAPI | None = None + + +def get_embedding_model() -> SentenceTransformer: + global _embedding_model + if _embedding_model is None: + logger.info(f"Loading embedding model: {settings.embedding_model}") + _embedding_model = SentenceTransformer(settings.embedding_model) + logger.info("Embedding model loaded") + return _embedding_model + + +def get_chroma_client() -> chromadb.ClientAPI: + global _chroma_client + if _chroma_client is None: + Path(settings.chroma_persist_dir).mkdir(parents=True, exist_ok=True) + _chroma_client = chromadb.PersistentClient( + path=settings.chroma_persist_dir, + settings=ChromaSettings(anonymized_telemetry=False), + ) + logger.info(f"ChromaDB initialized at {settings.chroma_persist_dir}") + return _chroma_client + + +def get_or_create_collection(name: str = "default"): + client = get_chroma_client() + model = get_embedding_model() + dim = model.get_sentence_embedding_dimension() + return client.get_or_create_collection( + name=name, + metadata={"hnsw:space": "cosine", "dimension": dim}, + ) + + +def chunk_text(text: str, chunk_size: int = 512, overlap: int = 64) -> list[str]: + words = text.split() + chunks = [] + start = 0 + while start < len(words): + end = start + chunk_size + chunk = " ".join(words[start:end]) + if chunk.strip(): + chunks.append(chunk) + start = end - overlap + return chunks if chunks else [text] + + +def compute_doc_id(source: str, chunk_idx: int) -> str: + return hashlib.sha256(f"{source}::{chunk_idx}".encode()).hexdigest()[:16] + + +def ingest_text(text: str, source: str, collection_name: str = "default", metadata: dict | None = None): + model = get_embedding_model() + collection = get_or_create_collection(collection_name) + chunks = chunk_text(text) + if not chunks: + return 0 + + ids = [compute_doc_id(source, i) for i in range(len(chunks))] + embeddings = model.encode(chunks).tolist() + metadatas = [ + {**(metadata or {}), "source": source, "chunk_index": i} + for i in range(len(chunks)) + ] + + collection.upsert(ids=ids, documents=chunks, embeddings=embeddings, metadatas=metadatas) + logger.info(f"Ingested {len(chunks)} chunks from {source} into {collection_name}") + return len(chunks) + + +def query(query_text: str, collection_name: str = "default", n_results: int = 5) -> list[dict]: + model = get_embedding_model() + collection = get_or_create_collection(collection_name) + + if collection.count() == 0: + return [] + + query_embedding = model.encode([query_text]).tolist() + results = collection.query( + query_embeddings=query_embedding, + n_results=min(n_results, collection.count()), + include=["documents", "metadatas", "distances"], + ) + + output = [] + for doc, meta, dist in zip( + results["documents"][0], + results["metadatas"][0], + results["distances"][0], + ): + output.append({ + "content": doc, + "source": meta.get("source", "unknown"), + "score": round(1 - dist, 4), + "metadata": meta, + }) + return output + + +def delete_document(source: str, collection_name: str = "default"): + collection = get_or_create_collection(collection_name) + results = collection.get(where={"source": source}) + if results["ids"]: + collection.delete(ids=results["ids"]) + logger.info(f"Deleted {len(results['ids'])} chunks for {source}") + return len(results["ids"]) + return 0 + + +def list_documents(collection_name: str = "default") -> list[str]: + collection = get_or_create_collection(collection_name) + if collection.count() == 0: + return [] + results = collection.get(include=["metadatas"]) + sources = set() + for meta in results["metadatas"]: + if "source" in meta: + sources.add(meta["source"]) + return sorted(sources) + + +def list_collections() -> list[dict]: + client = get_chroma_client() + collections = client.list_collections() + return [{"name": c.name, "document_count": c.count()} for c in collections] + + +def delete_collection(name: str): + client = get_chroma_client() + client.delete_collection(name) + logger.info(f"Deleted collection {name}") diff --git a/rag-mcp-server/backend/app/services/smb_browser.py b/rag-mcp-server/backend/app/services/smb_browser.py new file mode 100644 index 00000000..53517504 --- /dev/null +++ b/rag-mcp-server/backend/app/services/smb_browser.py @@ -0,0 +1,106 @@ +import logging +from datetime import datetime, timezone +from io import BytesIO + +from smbprotocol.connection import Connection +from smbprotocol.session import Session +from smbprotocol.tree import TreeConnect +from smbprotocol.open import ( + Open, + CreateDisposition, + CreateOptions, + FileAttributes, + FilePipePrinterAccessMask, + ImpersonationLevel, + ShareAccess, +) +from smbprotocol.file_info import ( + FileInformationClass, +) +import smbprotocol + +logger = logging.getLogger(__name__) + + +def _normalize_path(path: str) -> str: + path = path.replace("/", "\\").strip("\\") + return path + + +def browse_share( + server: str, + share: str, + path: str = "/", + username: str = "guest", + password: str = "", + domain: str = "WORKGROUP", +) -> list[dict]: + """Browse files and directories in an SMB share.""" + import smbclient + + smbclient.register_session(server, username=username, password=password, port=445) + + smb_path = f"\\\\{server}\\{share}" + if path and path != "/": + normalized = _normalize_path(path) + smb_path = f"{smb_path}\\{normalized}" + + entries = [] + try: + for entry in smbclient.scandir(smb_path): + stat = entry.stat() + entries.append({ + "name": entry.name, + "is_directory": entry.is_dir(), + "size": stat.st_size if not entry.is_dir() else 0, + "last_modified": datetime.fromtimestamp( + stat.st_mtime, tz=timezone.utc + ).isoformat(), + }) + except Exception as e: + logger.error(f"Failed to browse {smb_path}: {e}") + raise + + entries.sort(key=lambda x: (not x["is_directory"], x["name"].lower())) + return entries + + +def read_file( + server: str, + share: str, + path: str, + username: str = "guest", + password: str = "", + domain: str = "WORKGROUP", +) -> bytes: + """Read a file from an SMB share.""" + import smbclient + + smbclient.register_session(server, username=username, password=password, port=445) + + normalized = _normalize_path(path) + smb_path = f"\\\\{server}\\{share}\\{normalized}" + + with smbclient.open_file(smb_path, mode="rb") as f: + return f.read() + + +def list_shares( + server: str, + username: str = "guest", + password: str = "", + domain: str = "WORKGROUP", +) -> list[str]: + """List available shares on an SMB server.""" + import smbclient + + smbclient.register_session(server, username=username, password=password, port=445) + + shares = [] + try: + for entry in smbclient.scandir(f"\\\\{server}"): + shares.append(entry.name) + except Exception as e: + logger.error(f"Failed to list shares on {server}: {e}") + raise + return sorted(shares) diff --git a/rag-mcp-server/backend/requirements.txt b/rag-mcp-server/backend/requirements.txt new file mode 100644 index 00000000..04451d51 --- /dev/null +++ b/rag-mcp-server/backend/requirements.txt @@ -0,0 +1,17 @@ +fastapi==0.115.6 +uvicorn[standard]==0.34.0 +chromadb==0.5.23 +sentence-transformers==3.3.1 +python-multipart==0.0.18 +pydantic==2.10.4 +pydantic-settings==2.7.1 +smbprotocol==1.14.0 +pypdf2==3.0.1 +python-docx==1.1.2 +openpyxl==3.1.5 +markdown==3.7 +beautifulsoup4==4.12.3 +chardet==5.2.0 +watchdog==6.0.0 +aiofiles==24.1.0 +httpx==0.28.1 diff --git a/rag-mcp-server/data/documents/.gitkeep b/rag-mcp-server/data/documents/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/deploy.sh b/rag-mcp-server/deploy.sh new file mode 100755 index 00000000..435d84ff --- /dev/null +++ b/rag-mcp-server/deploy.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +echo "=========================================" +echo " RAG MCP Server - Deployment Script" +echo " Target: BrownserverN5 (192.168.1.52)" +echo "=========================================" +echo "" + +# Create data directories +mkdir -p data/documents data/chromadb data/config + +# Copy env file if it doesn't exist +if [ ! -f .env ]; then + cp .env.example .env + echo "[!] Created .env from template. Edit it to set your API key." + echo "" +fi + +echo "[1/3] Building Docker images..." +docker compose build + +echo "" +echo "[2/3] Starting services..." +docker compose up -d + +echo "" +echo "[3/3] Waiting for services to start..." +sleep 5 + +# Health checks +echo "" +echo "Health checks:" +curl -sf http://localhost:8900/health && echo " - Backend: OK" || echo " - Backend: STARTING (may take a moment for model download)" +curl -sf http://localhost:8901/health && echo " - MCP Server: OK" || echo " - MCP Server: STARTING" +curl -sf http://localhost:8902/ > /dev/null 2>&1 && echo " - Frontend: OK" || echo " - Frontend: STARTING" + +echo "" +echo "=========================================" +echo " Deployment complete!" +echo "" +echo " Web UI: http://192.168.1.52:8902" +echo " Backend API: http://192.168.1.52:8900" +echo " MCP Server: http://192.168.1.52:8901" +echo " MCP SSE: http://192.168.1.52:8901/sse" +echo " MCP HTTP: http://192.168.1.52:8901/mcp" +echo "=========================================" +echo "" +echo "Next steps:" +echo " 1. Open the Web UI to create an API key" +echo " 2. Upload or ingest documents via SMB" +echo " 3. Configure your LLM to connect to the MCP server" diff --git a/rag-mcp-server/docker-compose.yml b/rag-mcp-server/docker-compose.yml new file mode 100644 index 00000000..0253ad50 --- /dev/null +++ b/rag-mcp-server/docker-compose.yml @@ -0,0 +1,58 @@ +version: "3.8" + +services: + backend: + build: + context: ./backend + dockerfile: Dockerfile + container_name: rag-mcp-backend + restart: unless-stopped + ports: + - "8900:8000" + volumes: + - ./data/documents:/app/data/documents + - ./data/chromadb:/app/data/chromadb + - ./data/config:/app/data/config + environment: + - CHROMA_PERSIST_DIR=/app/data/chromadb + - DOCUMENTS_DIR=/app/data/documents + - CONFIG_DIR=/app/data/config + - EMBEDDING_MODEL=all-MiniLM-L6-v2 + networks: + - rag-network + + mcp-server: + build: + context: ./mcp_server + dockerfile: Dockerfile + container_name: rag-mcp-server + restart: unless-stopped + ports: + - "8901:8001" + volumes: + - ./data/config:/app/data/config + environment: + - BACKEND_URL=http://backend:8000 + - CONFIG_DIR=/app/data/config + depends_on: + - backend + networks: + - rag-network + + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + container_name: rag-mcp-frontend + restart: unless-stopped + ports: + - "8902:80" + depends_on: + - backend + - mcp-server + networks: + - rag-network + +networks: + rag-network: + driver: bridge diff --git a/rag-mcp-server/frontend/Dockerfile b/rag-mcp-server/frontend/Dockerfile new file mode 100644 index 00000000..3c7d8fd2 --- /dev/null +++ b/rag-mcp-server/frontend/Dockerfile @@ -0,0 +1,17 @@ +FROM node:20-alpine AS build + +WORKDIR /app + +COPY package.json ./ +RUN npm install + +COPY . . +RUN npm run build + +FROM nginx:alpine +COPY --from=build /app/dist /usr/share/nginx/html +COPY nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/rag-mcp-server/frontend/index.html b/rag-mcp-server/frontend/index.html new file mode 100644 index 00000000..ed9fff14 --- /dev/null +++ b/rag-mcp-server/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + RAG MCP Server - BrownserverN5 + + + +
+ + + diff --git a/rag-mcp-server/frontend/nginx.conf b/rag-mcp-server/frontend/nginx.conf new file mode 100644 index 00000000..77a1004c --- /dev/null +++ b/rag-mcp-server/frontend/nginx.conf @@ -0,0 +1,30 @@ +server { + listen 80; + server_name _; + root /usr/share/nginx/html; + index index.html; + + # Frontend SPA + location / { + try_files $uri $uri/ /index.html; + } + + # Proxy API requests to backend + location /api/ { + proxy_pass http://backend:8000/api/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + client_max_body_size 100M; + } + + # Proxy MCP requests + location /mcp/ { + proxy_pass http://mcp-server:8001/mcp/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } +} diff --git a/rag-mcp-server/frontend/package.json b/rag-mcp-server/frontend/package.json new file mode 100644 index 00000000..4e4c6460 --- /dev/null +++ b/rag-mcp-server/frontend/package.json @@ -0,0 +1,23 @@ +{ + "name": "rag-mcp-frontend", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-router-dom": "^6.28.0", + "lucide-react": "^0.468.0" + }, + "devDependencies": { + "@types/react": "^18.3.12", + "@types/react-dom": "^18.3.1", + "@vitejs/plugin-react": "^4.3.4", + "vite": "^6.0.3" + } +} diff --git a/rag-mcp-server/frontend/src/App.jsx b/rag-mcp-server/frontend/src/App.jsx new file mode 100644 index 00000000..7cedcf76 --- /dev/null +++ b/rag-mcp-server/frontend/src/App.jsx @@ -0,0 +1,54 @@ +import React, { useState } from 'react' +import Dashboard from './pages/Dashboard' +import Documents from './pages/Documents' +import Search from './pages/Search' +import SMBBrowser from './pages/SMBBrowser' +import MCPConfig from './pages/MCPConfig' +import APIKeys from './pages/APIKeys' + +const PAGES = [ + { id: 'dashboard', label: 'Dashboard', icon: '~' }, + { id: 'documents', label: 'Documents', icon: '#' }, + { id: 'search', label: 'Search', icon: '?' }, + { id: 'smb', label: 'SMB Browser', icon: '>' }, + { id: 'mcp', label: 'MCP Server', icon: '*' }, + { id: 'apikeys', label: 'API Keys', icon: 'K' }, +] + +const PAGE_COMPONENTS = { + dashboard: Dashboard, + documents: Documents, + search: Search, + smb: SMBBrowser, + mcp: MCPConfig, + apikeys: APIKeys, +} + +export default function App() { + const [activePage, setActivePage] = useState('dashboard') + const PageComponent = PAGE_COMPONENTS[activePage] + + return ( +
+ +
+ +
+
+ ) +} diff --git a/rag-mcp-server/frontend/src/index.css b/rag-mcp-server/frontend/src/index.css new file mode 100644 index 00000000..5ad15efe --- /dev/null +++ b/rag-mcp-server/frontend/src/index.css @@ -0,0 +1,409 @@ +:root { + --bg-primary: #0f172a; + --bg-secondary: #1e293b; + --bg-card: #1e293b; + --bg-input: #334155; + --text-primary: #f1f5f9; + --text-secondary: #94a3b8; + --text-muted: #64748b; + --accent: #3b82f6; + --accent-hover: #2563eb; + --success: #22c55e; + --warning: #f59e0b; + --danger: #ef4444; + --border: #334155; + --radius: 8px; +} + +* { box-sizing: border-box; margin: 0; padding: 0; } + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + background: var(--bg-primary); + color: var(--text-primary); + line-height: 1.6; +} + +.app-layout { + display: flex; + min-height: 100vh; +} + +.sidebar { + width: 240px; + background: var(--bg-secondary); + border-right: 1px solid var(--border); + padding: 1rem 0; + position: fixed; + top: 0; + left: 0; + bottom: 0; + overflow-y: auto; +} + +.sidebar-header { + padding: 0.5rem 1.25rem 1.5rem; + border-bottom: 1px solid var(--border); + margin-bottom: 0.5rem; +} + +.sidebar-header h1 { + font-size: 1.1rem; + font-weight: 700; + color: var(--accent); +} + +.sidebar-header p { + font-size: 0.75rem; + color: var(--text-muted); + margin-top: 0.25rem; +} + +.nav-item { + display: flex; + align-items: center; + gap: 0.75rem; + padding: 0.6rem 1.25rem; + color: var(--text-secondary); + text-decoration: none; + font-size: 0.9rem; + cursor: pointer; + transition: all 0.15s; + border: none; + background: none; + width: 100%; + text-align: left; +} + +.nav-item:hover { + background: var(--bg-input); + color: var(--text-primary); +} + +.nav-item.active { + background: var(--accent); + color: white; +} + +.main-content { + flex: 1; + margin-left: 240px; + padding: 2rem; +} + +.page-header { + margin-bottom: 1.5rem; +} + +.page-header h2 { + font-size: 1.5rem; + font-weight: 600; +} + +.page-header p { + color: var(--text-secondary); + font-size: 0.875rem; + margin-top: 0.25rem; +} + +.card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 1.25rem; + margin-bottom: 1rem; +} + +.card-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1rem; +} + +.card-header h3 { + font-size: 1rem; + font-weight: 600; +} + +.stats-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 1rem; + margin-bottom: 1.5rem; +} + +.stat-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 1.25rem; +} + +.stat-card .label { + font-size: 0.75rem; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.stat-card .value { + font-size: 1.75rem; + font-weight: 700; + margin-top: 0.25rem; +} + +.btn { + display: inline-flex; + align-items: center; + gap: 0.5rem; + padding: 0.5rem 1rem; + border-radius: var(--radius); + font-size: 0.875rem; + font-weight: 500; + cursor: pointer; + border: 1px solid transparent; + transition: all 0.15s; +} + +.btn-primary { + background: var(--accent); + color: white; + border-color: var(--accent); +} + +.btn-primary:hover { background: var(--accent-hover); } + +.btn-danger { + background: var(--danger); + color: white; +} + +.btn-danger:hover { opacity: 0.9; } + +.btn-outline { + background: transparent; + color: var(--text-secondary); + border-color: var(--border); +} + +.btn-outline:hover { + border-color: var(--accent); + color: var(--accent); +} + +.btn-sm { padding: 0.3rem 0.6rem; font-size: 0.8rem; } + +.input, .select, .textarea { + width: 100%; + padding: 0.5rem 0.75rem; + background: var(--bg-input); + color: var(--text-primary); + border: 1px solid var(--border); + border-radius: var(--radius); + font-size: 0.875rem; + outline: none; + transition: border-color 0.15s; +} + +.input:focus, .select:focus, .textarea:focus { + border-color: var(--accent); +} + +.textarea { min-height: 80px; resize: vertical; } + +.form-group { + margin-bottom: 1rem; +} + +.form-group label { + display: block; + font-size: 0.8rem; + color: var(--text-secondary); + margin-bottom: 0.3rem; + font-weight: 500; +} + +.form-row { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 1rem; +} + +.table { + width: 100%; + border-collapse: collapse; +} + +.table th, .table td { + text-align: left; + padding: 0.6rem 0.75rem; + border-bottom: 1px solid var(--border); + font-size: 0.875rem; +} + +.table th { + color: var(--text-muted); + font-weight: 500; + font-size: 0.75rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.badge { + display: inline-flex; + align-items: center; + padding: 0.15rem 0.5rem; + border-radius: 999px; + font-size: 0.75rem; + font-weight: 500; +} + +.badge-success { background: #16a34a33; color: var(--success); } +.badge-warning { background: #f59e0b33; color: var(--warning); } +.badge-danger { background: #ef444433; color: var(--danger); } + +.file-browser { + border: 1px solid var(--border); + border-radius: var(--radius); + overflow: hidden; +} + +.file-item { + display: flex; + align-items: center; + gap: 0.75rem; + padding: 0.5rem 0.75rem; + border-bottom: 1px solid var(--border); + cursor: pointer; + transition: background 0.1s; +} + +.file-item:hover { background: var(--bg-input); } +.file-item:last-child { border-bottom: none; } + +.file-item .name { flex: 1; font-size: 0.875rem; } +.file-item .size { color: var(--text-muted); font-size: 0.8rem; } +.file-item .date { color: var(--text-muted); font-size: 0.8rem; } + +.upload-zone { + border: 2px dashed var(--border); + border-radius: var(--radius); + padding: 2rem; + text-align: center; + cursor: pointer; + transition: all 0.15s; +} + +.upload-zone:hover { + border-color: var(--accent); + background: rgba(59, 130, 246, 0.05); +} + +.upload-zone.active { + border-color: var(--accent); + background: rgba(59, 130, 246, 0.1); +} + +.alert { + padding: 0.75rem 1rem; + border-radius: var(--radius); + margin-bottom: 1rem; + font-size: 0.875rem; +} + +.alert-success { background: #16a34a22; border: 1px solid #16a34a55; color: var(--success); } +.alert-error { background: #ef444422; border: 1px solid #ef444455; color: var(--danger); } +.alert-info { background: #3b82f622; border: 1px solid #3b82f655; color: var(--accent); } + +.code-block { + background: #0f172a; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 0.75rem; + font-family: 'Fira Code', monospace; + font-size: 0.8rem; + overflow-x: auto; + word-break: break-all; +} + +.search-results .result-item { + padding: 1rem; + border: 1px solid var(--border); + border-radius: var(--radius); + margin-bottom: 0.75rem; +} + +.result-item .source { + font-size: 0.75rem; + color: var(--accent); + margin-bottom: 0.25rem; +} + +.result-item .score { + font-size: 0.75rem; + color: var(--text-muted); + float: right; +} + +.result-item .content { + font-size: 0.875rem; + color: var(--text-secondary); + white-space: pre-wrap; + max-height: 150px; + overflow-y: auto; +} + +.loading { opacity: 0.6; pointer-events: none; } + +.spinner { + display: inline-block; + width: 1rem; + height: 1rem; + border: 2px solid var(--border); + border-top-color: var(--accent); + border-radius: 50%; + animation: spin 0.6s linear infinite; +} + +@keyframes spin { to { transform: rotate(360deg); } } + +.breadcrumb { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.875rem; + color: var(--text-muted); + margin-bottom: 1rem; +} + +.breadcrumb button { + background: none; + border: none; + color: var(--accent); + cursor: pointer; + font-size: 0.875rem; +} + +.breadcrumb button:hover { text-decoration: underline; } + +.tab-bar { + display: flex; + gap: 0; + border-bottom: 1px solid var(--border); + margin-bottom: 1rem; +} + +.tab { + padding: 0.5rem 1rem; + cursor: pointer; + font-size: 0.875rem; + color: var(--text-muted); + border: none; + background: none; + border-bottom: 2px solid transparent; + transition: all 0.15s; +} + +.tab:hover { color: var(--text-primary); } +.tab.active { color: var(--accent); border-bottom-color: var(--accent); } diff --git a/rag-mcp-server/frontend/src/main.jsx b/rag-mcp-server/frontend/src/main.jsx new file mode 100644 index 00000000..5cc59919 --- /dev/null +++ b/rag-mcp-server/frontend/src/main.jsx @@ -0,0 +1,10 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import App from './App' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')).render( + + + , +) diff --git a/rag-mcp-server/frontend/src/pages/APIKeys.jsx b/rag-mcp-server/frontend/src/pages/APIKeys.jsx new file mode 100644 index 00000000..8503c027 --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/APIKeys.jsx @@ -0,0 +1,154 @@ +import React, { useState, useEffect } from 'react' +import { listAPIKeys, createAPIKey, deleteAPIKey, revokeAPIKey } from '../services/api' + +export default function APIKeys() { + const [keys, setKeys] = useState([]) + const [name, setName] = useState('') + const [description, setDescription] = useState('') + const [newKey, setNewKey] = useState(null) + const [loading, setLoading] = useState(false) + const [message, setMessage] = useState(null) + + const refresh = () => { + listAPIKeys().then(setKeys).catch(e => setMessage({ type: 'error', text: e.message })) + } + + useEffect(() => { refresh() }, []) + + const handleCreate = async () => { + if (!name.trim()) { + setMessage({ type: 'error', text: 'Enter a name for the API key' }) + return + } + setLoading(true) + try { + const result = await createAPIKey(name.trim(), description.trim()) + setNewKey(result.key) + setName('') + setDescription('') + setMessage({ type: 'success', text: 'API key created. Copy it now - it cannot be retrieved later.' }) + refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + const handleDelete = async (keyName) => { + if (!confirm(`Delete API key "${keyName}"? This cannot be undone.`)) return + try { + await deleteAPIKey(keyName) + setMessage({ type: 'success', text: `Deleted key: ${keyName}` }) + refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + } + + const handleRevoke = async (keyName) => { + if (!confirm(`Revoke API key "${keyName}"?`)) return + try { + await revokeAPIKey(keyName) + setMessage({ type: 'success', text: `Revoked key: ${keyName}` }) + refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + } + + const copyToClipboard = (text) => { + navigator.clipboard.writeText(text) + setMessage({ type: 'info', text: 'Copied to clipboard' }) + } + + return ( +
+
+

API Keys

+

Manage API keys for MCP server authentication

+
+ + {message && ( +
+ {message.text} + +
+ )} + + {newKey && ( +
+ New API Key (save this now!): +
copyToClipboard(newKey)}> + {newKey} +
+

Click the key to copy. This is the only time it will be shown.

+ +
+ )} + +
+

Create New Key

+
+
+ + setName(e.target.value)} placeholder="e.g., claude-desktop" /> +
+
+ + setDescription(e.target.value)} placeholder="What this key is used for" /> +
+
+ +
+ +
+
+

Active Keys ({keys.filter(k => k.active).length})

+ +
+ {keys.length > 0 ? ( + + + + + + + + + + + + + {keys.map(k => ( + + + + + + + + + ))} + +
NameKey PrefixDescriptionCreatedStatusActions
{k.name}{k.key_prefix}{k.description || '-'}{new Date(k.created_at).toLocaleDateString()} + + {k.active ? 'Active' : 'Revoked'} + + + {k.active && ( + + )} + +
+ ) : ( +

No API keys yet. Create one to allow LLM access to your documents.

+ )} +
+
+ ) +} diff --git a/rag-mcp-server/frontend/src/pages/Dashboard.jsx b/rag-mcp-server/frontend/src/pages/Dashboard.jsx new file mode 100644 index 00000000..c1a2ded0 --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/Dashboard.jsx @@ -0,0 +1,85 @@ +import React, { useState, useEffect } from 'react' +import { getStatus } from '../services/api' + +export default function Dashboard() { + const [status, setStatus] = useState(null) + const [error, setError] = useState(null) + + useEffect(() => { + getStatus() + .then(setStatus) + .catch(e => setError(e.message)) + }, []) + + if (error) return
{error}
+ if (!status) return
Loading...
+ + return ( +
+
+

Dashboard

+

System overview for {status.hostname} ({status.ip})

+
+ +
+
+
Total Documents
+
{status.total_documents}
+
+
+
Collections
+
{status.collections.length}
+
+
+
Active API Keys
+
{status.api_keys_count}
+
+
+
MCP Server
+
+ {status.mcp_enabled ? 'Active' : 'Disabled'} +
+
+
+ +
+
+

Collections

+
+ {status.collections.length > 0 ? ( + + + + {status.collections.map(c => ( + + ))} + +
Name
{c}
+ ) : ( +

No collections yet. Upload documents to get started.

+ )} +
+ +
+
+

MCP Connection Info

+
+
+ +
http://{status.ip}:8901/sse
+
+
+ +
http://{status.ip}:8901/mcp
+
+
+ +
http://{status.ip}:8901/mcp/info
+
+

+ Use your API key as a Bearer token in the Authorization header. +

+
+
+ ) +} diff --git a/rag-mcp-server/frontend/src/pages/Documents.jsx b/rag-mcp-server/frontend/src/pages/Documents.jsx new file mode 100644 index 00000000..1e8f3aa1 --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/Documents.jsx @@ -0,0 +1,201 @@ +import React, { useState, useEffect, useRef } from 'react' +import { + uploadDocument, listDocuments, deleteDocument, + reindexCollection, listCollections, createCollection, deleteCollection +} from '../services/api' + +export default function Documents() { + const [collections, setCollections] = useState([]) + const [activeCollection, setActiveCollection] = useState('default') + const [documents, setDocuments] = useState([]) + const [loading, setLoading] = useState(false) + const [message, setMessage] = useState(null) + const [newCollection, setNewCollection] = useState('') + const [dragActive, setDragActive] = useState(false) + const fileInputRef = useRef() + + const refresh = async () => { + setLoading(true) + try { + const [colRes, docRes] = await Promise.all([ + listCollections(), + listDocuments(activeCollection), + ]) + setCollections(colRes.collections) + setDocuments(docRes.documents) + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + useEffect(() => { refresh() }, [activeCollection]) + + const handleUpload = async (files) => { + setLoading(true) + let uploaded = 0 + for (const file of files) { + try { + await uploadDocument(file, activeCollection) + uploaded++ + } catch (e) { + setMessage({ type: 'error', text: `Failed to upload ${file.name}: ${e.message}` }) + } + } + if (uploaded > 0) { + setMessage({ type: 'success', text: `Uploaded ${uploaded} file(s)` }) + } + await refresh() + } + + const handleDelete = async (filename) => { + if (!confirm(`Delete "${filename}" from ${activeCollection}?`)) return + try { + await deleteDocument(filename, activeCollection) + setMessage({ type: 'success', text: `Deleted ${filename}` }) + await refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + } + + const handleReindex = async () => { + setLoading(true) + try { + const result = await reindexCollection(activeCollection) + setMessage({ type: 'success', text: `Reindexed: ${result.files_processed} files, ${result.total_chunks} chunks` }) + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + const handleCreateCollection = async () => { + if (!newCollection.trim()) return + try { + await createCollection(newCollection.trim()) + setNewCollection('') + setMessage({ type: 'success', text: `Created collection: ${newCollection}` }) + await refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + } + + const handleDeleteCollection = async (name) => { + if (!confirm(`Delete collection "${name}" and all its documents?`)) return + try { + await deleteCollection(name) + if (activeCollection === name) setActiveCollection('default') + setMessage({ type: 'success', text: `Deleted collection: ${name}` }) + await refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + } + + const onDrop = (e) => { + e.preventDefault() + setDragActive(false) + if (e.dataTransfer.files.length) handleUpload(Array.from(e.dataTransfer.files)) + } + + return ( +
+
+

Documents

+

Manage document collections and upload files for RAG indexing

+
+ + {message && ( +
+ {message.text} + +
+ )} + +
+
+

Collections

+
+ setNewCollection(e.target.value)} + onKeyDown={e => e.key === 'Enter' && handleCreateCollection()} + style={{ width: '200px' }} + /> + +
+
+
+ {collections.map(c => ( +
+ + {c.name !== 'default' && ( + + )} +
+ ))} +
+
+ +
+
+

Upload to "{activeCollection}"

+ +
+ +
{ e.preventDefault(); setDragActive(true) }} + onDragLeave={() => setDragActive(false)} + onClick={() => fileInputRef.current?.click()} + > +

Drop files here or click to browse

+

+ Supports: TXT, MD, PDF, DOCX, XLSX, JSON, YAML, code files, and more +

+ handleUpload(Array.from(e.target.files))} + /> +
+
+ +
+
+

Documents in "{activeCollection}" ({documents.length})

+ +
+ {documents.length > 0 ? ( + + + + {documents.map(doc => ( + + + + + ))} + +
FilenameActions
{doc} + +
+ ) : ( +

No documents in this collection.

+ )} +
+
+ ) +} diff --git a/rag-mcp-server/frontend/src/pages/MCPConfig.jsx b/rag-mcp-server/frontend/src/pages/MCPConfig.jsx new file mode 100644 index 00000000..842fe64e --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/MCPConfig.jsx @@ -0,0 +1,152 @@ +import React, { useState, useEffect } from 'react' +import { getStatus, toggleMCP } from '../services/api' + +export default function MCPConfig() { + const [status, setStatus] = useState(null) + const [loading, setLoading] = useState(false) + const [message, setMessage] = useState(null) + + const refresh = () => { + getStatus().then(setStatus).catch(e => setMessage({ type: 'error', text: e.message })) + } + + useEffect(() => { refresh() }, []) + + const handleToggle = async () => { + setLoading(true) + try { + const newState = !status.mcp_enabled + await toggleMCP(newState) + setMessage({ type: 'success', text: `MCP server ${newState ? 'enabled' : 'disabled'}` }) + refresh() + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + if (!status) return
Loading...
+ + const serverIP = status.ip || '192.168.1.52' + + return ( +
+
+

MCP Server Configuration

+

Configure the Model Context Protocol server for cloud LLM access

+
+ + {message && ( +
+ {message.text} + +
+ )} + +
+
+

Server Status

+ +
+
+
+
Status
+
+ {status.mcp_enabled ? 'ACTIVE' : 'DISABLED'} +
+
+
+
Available Tools
+
4
+
+
+
Transport
+
SSE + HTTP
+
+
+
+ +
+

Connection Endpoints

+ +
+ +
http://{serverIP}:8901/sse
+
+ +
+ +
http://{serverIP}:8901/mcp
+
+ +
+ +
http://{serverIP}:8901/messages?session_id=SESSION_ID
+
+ +
+ +
http://{serverIP}:8901/mcp/info
+
+
+ +
+

Available MCP Tools

+ + + + + + + + + + + + + + + + + + + + +
Tool NameDescription
search_documentsSemantic search through indexed documents. Returns relevant chunks with sources and scores.
list_collectionsList all document collections with their document counts.
list_documentsList all documents in a specific collection.
get_server_statusGet current server status including document counts and available collections.
+
+ +
+

Claude Desktop Configuration

+

+ Add this to your Claude Desktop config file (claude_desktop_config.json): +

+
+{`{ + "mcpServers": { + "rag-documents": { + "url": "http://${serverIP}:8901/sse", + "headers": { + "Authorization": "Bearer YOUR_API_KEY_HERE" + } + } + } +}`} +
+
+ +
+

Authentication

+

+ All MCP endpoints require a valid API key passed as a Bearer token in the Authorization header. + Manage your API keys in the API Keys section. +

+
+ +
Authorization: Bearer rmcp_your_api_key_here
+
+
+
+ ) +} diff --git a/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx b/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx new file mode 100644 index 00000000..f912427f --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx @@ -0,0 +1,171 @@ +import React, { useState } from 'react' +import { browseSMB, ingestFromSMB, listCollections } from '../services/api' + +export default function SMBBrowser() { + const [server, setServer] = useState('192.168.1.52') + const [share, setShare] = useState('') + const [username, setUsername] = useState('guest') + const [password, setPassword] = useState('') + const [domain, setDomain] = useState('WORKGROUP') + const [currentPath, setCurrentPath] = useState('/') + const [entries, setEntries] = useState([]) + const [loading, setLoading] = useState(false) + const [message, setMessage] = useState(null) + const [ingestCollection, setIngestCollection] = useState('default') + const [pathHistory, setPathHistory] = useState(['/']) + + const browse = async (path = '/') => { + if (!server || !share) { + setMessage({ type: 'error', text: 'Enter server and share name' }) + return + } + setLoading(true) + setMessage(null) + try { + const result = await browseSMB(server, share, path, username, password, domain) + setEntries(result) + setCurrentPath(path) + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + const navigateTo = (entry) => { + if (!entry.is_directory) return + const newPath = currentPath === '/' ? `/${entry.name}` : `${currentPath}/${entry.name}` + setPathHistory(prev => [...prev, newPath]) + browse(newPath) + } + + const navigateUp = () => { + if (pathHistory.length <= 1) return + const newHistory = pathHistory.slice(0, -1) + setPathHistory(newHistory) + browse(newHistory[newHistory.length - 1]) + } + + const navigateToIndex = (index) => { + const newHistory = pathHistory.slice(0, index + 1) + setPathHistory(newHistory) + browse(newHistory[newHistory.length - 1]) + } + + const handleIngest = async () => { + if (!server || !share) return + setLoading(true) + setMessage(null) + try { + const result = await ingestFromSMB({ + server, share, path: currentPath, + username, password, domain, + collection: ingestCollection, recursive: true, + }) + let text = `Ingested ${result.files_processed} files (${result.total_chunks} chunks)` + if (result.errors?.length) text += `. Errors: ${result.errors.length}` + setMessage({ type: 'success', text }) + } catch (e) { + setMessage({ type: 'error', text: e.message }) + } + setLoading(false) + } + + const formatSize = (bytes) => { + if (bytes === 0) return '-' + const units = ['B', 'KB', 'MB', 'GB'] + const i = Math.floor(Math.log(bytes) / Math.log(1024)) + return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}` + } + + const pathParts = currentPath.split('/').filter(Boolean) + + return ( +
+
+

SMB File Browser

+

Browse and ingest documents from SMB shares on your LAN

+
+ + {message && ( +
+ {message.text} + +
+ )} + +
+

Connection

+
+
+ + setServer(e.target.value)} placeholder="192.168.1.x" /> +
+
+ + setShare(e.target.value)} placeholder="Documents" /> +
+
+ + setUsername(e.target.value)} /> +
+
+ + setPassword(e.target.value)} /> +
+
+ + setDomain(e.target.value)} /> +
+
+
+ +
+
+ + {entries.length > 0 && ( + <> +
+
+ + {pathParts.map((part, i) => ( + + / + + + ))} +
+ +
+ + +
+ setIngestCollection(e.target.value)} + placeholder="Collection" + /> + +
+ +
+ {entries.map((entry, i) => ( +
navigateTo(entry)}> + + {entry.is_directory ? '[D]' : ' F '} + + {entry.name} + {formatSize(entry.size)} + {entry.last_modified ? new Date(entry.last_modified).toLocaleDateString() : ''} +
+ ))} +
+
+ + )} +
+ ) +} diff --git a/rag-mcp-server/frontend/src/pages/Search.jsx b/rag-mcp-server/frontend/src/pages/Search.jsx new file mode 100644 index 00000000..67caf69e --- /dev/null +++ b/rag-mcp-server/frontend/src/pages/Search.jsx @@ -0,0 +1,98 @@ +import React, { useState, useEffect } from 'react' +import { queryDocuments, listCollections } from '../services/api' + +export default function Search() { + const [query, setQuery] = useState('') + const [collection, setCollection] = useState('default') + const [nResults, setNResults] = useState(5) + const [results, setResults] = useState(null) + const [collections, setCollections] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + useEffect(() => { + listCollections().then(r => setCollections(r.collections)).catch(() => {}) + }, []) + + const handleSearch = async () => { + if (!query.trim()) return + setLoading(true) + setError(null) + try { + const res = await queryDocuments(query, collection, nResults) + setResults(res.results) + } catch (e) { + setError(e.message) + } + setLoading(false) + } + + return ( +
+
+

Search Documents

+

Semantic search across your indexed documents

+
+ +
+
+
+ + setQuery(e.target.value)} + onKeyDown={e => e.key === 'Enter' && handleSearch()} + /> +
+
+ + +
+
+ + setNResults(parseInt(e.target.value) || 5)} + /> +
+
+ +
+ + {error &&
{error}
} + + {results && ( +
+
+

Results ({results.length})

+
+
+ {results.length === 0 ? ( +

No results found. Try a different query or check if documents are indexed.

+ ) : ( + results.map((r, i) => ( +
+ Score: {r.score} +
{r.source}
+
{r.content}
+
+ )) + )} +
+
+ )} +
+ ) +} diff --git a/rag-mcp-server/frontend/src/services/api.js b/rag-mcp-server/frontend/src/services/api.js new file mode 100644 index 00000000..49bc01cf --- /dev/null +++ b/rag-mcp-server/frontend/src/services/api.js @@ -0,0 +1,74 @@ +const API_BASE = '/api'; + +async function request(path, options = {}) { + const res = await fetch(`${API_BASE}${path}`, { + headers: { 'Content-Type': 'application/json', ...options.headers }, + ...options, + }); + if (!res.ok) { + const error = await res.json().catch(() => ({ detail: res.statusText })); + throw new Error(error.detail || `Request failed: ${res.status}`); + } + return res.json(); +} + +// Documents +export const uploadDocument = async (file, collection = 'default') => { + const formData = new FormData(); + formData.append('file', file); + formData.append('collection', collection); + const res = await fetch(`${API_BASE}/documents/upload`, { method: 'POST', body: formData }); + if (!res.ok) throw new Error((await res.json()).detail || 'Upload failed'); + return res.json(); +}; + +export const queryDocuments = (query, collection = 'default', n_results = 5) => + request('/documents/query', { + method: 'POST', + body: JSON.stringify({ query, collection, n_results }), + }); + +export const listDocuments = (collection = 'default') => + request(`/documents/list?collection=${collection}`); + +export const deleteDocument = (filename, collection = 'default') => + request(`/documents/${encodeURIComponent(filename)}?collection=${collection}`, { method: 'DELETE' }); + +export const reindexCollection = (collection = 'default') => + request(`/documents/reindex?collection=${collection}`, { method: 'POST' }); + +export const listCollections = () => request('/documents/collections'); + +export const createCollection = (name) => + request(`/documents/collections/${encodeURIComponent(name)}`, { method: 'POST' }); + +export const deleteCollection = (name) => + request(`/documents/collections/${encodeURIComponent(name)}`, { method: 'DELETE' }); + +// SMB +export const browseSMB = (server, share, path = '/', username = 'guest', password = '', domain = 'WORKGROUP') => + request('/smb/browse', { + method: 'POST', + body: JSON.stringify({ server, share, path, username, password, domain }), + }); + +export const listShares = (server, username = 'guest', password = '', domain = 'WORKGROUP') => + request(`/smb/shares?server=${server}&username=${username}&password=${password}&domain=${domain}`, { + method: 'POST', + }); + +export const ingestFromSMB = (config) => + request('/smb/ingest', { method: 'POST', body: JSON.stringify(config) }); + +// Admin +export const getStatus = () => request('/admin/status'); +export const createAPIKey = (name, description = '') => + request('/admin/api-keys', { method: 'POST', body: JSON.stringify({ name, description }) }); +export const listAPIKeys = () => request('/admin/api-keys'); +export const deleteAPIKey = (name) => + request(`/admin/api-keys/${encodeURIComponent(name)}`, { method: 'DELETE' }); +export const revokeAPIKey = (name) => + request(`/admin/api-keys/${encodeURIComponent(name)}/revoke`, { method: 'POST' }); +export const toggleMCP = (enabled) => + request(`/admin/mcp/toggle?enabled=${enabled}`, { method: 'POST' }); +export const getConfig = () => request('/admin/config'); diff --git a/rag-mcp-server/frontend/vite.config.js b/rag-mcp-server/frontend/vite.config.js new file mode 100644 index 00000000..6b9fb972 --- /dev/null +++ b/rag-mcp-server/frontend/vite.config.js @@ -0,0 +1,11 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + proxy: { + '/api': 'http://localhost:8900', + } + } +}) diff --git a/rag-mcp-server/mcp_server/Dockerfile b/rag-mcp-server/mcp_server/Dockerfile new file mode 100644 index 00000000..c092fe9b --- /dev/null +++ b/rag-mcp-server/mcp_server/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8001 + +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8001"] diff --git a/rag-mcp-server/mcp_server/requirements.txt b/rag-mcp-server/mcp_server/requirements.txt new file mode 100644 index 00000000..9622a2ef --- /dev/null +++ b/rag-mcp-server/mcp_server/requirements.txt @@ -0,0 +1,5 @@ +fastapi==0.115.6 +uvicorn[standard]==0.34.0 +httpx==0.28.1 +pydantic==2.10.4 +sse-starlette==2.2.1 diff --git a/rag-mcp-server/mcp_server/server.py b/rag-mcp-server/mcp_server/server.py new file mode 100644 index 00000000..cb46aeea --- /dev/null +++ b/rag-mcp-server/mcp_server/server.py @@ -0,0 +1,378 @@ +""" +MCP (Model Context Protocol) Server with SSE transport. +Provides RAG-powered document search tools to cloud-based LLMs. +Authenticates via API key in the Authorization header. +""" + +import hashlib +import json +import logging +import os +import uuid +from pathlib import Path +from typing import AsyncGenerator + +import httpx +from fastapi import FastAPI, Header, HTTPException, Request +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from sse_starlette.sse import EventSourceResponse + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [MCP] %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + +BACKEND_URL = os.environ.get("BACKEND_URL", "http://backend:8000") +CONFIG_DIR = os.environ.get("CONFIG_DIR", "/app/data/config") +CONFIG_FILE = Path(CONFIG_DIR) / "server_config.json" + +app = FastAPI(title="RAG MCP Server", version="1.0.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Store active SSE sessions +sessions: dict[str, dict] = {} + + +def load_config() -> dict: + if CONFIG_FILE.exists(): + return json.loads(CONFIG_FILE.read_text()) + return {"api_keys": [], "mcp_enabled": True} + + +def validate_api_key(key: str) -> bool: + config = load_config() + if not config.get("mcp_enabled", True): + return False + hashed = hashlib.sha256(key.encode()).hexdigest() + for entry in config.get("api_keys", []): + if entry["key_hash"] == hashed and entry.get("active", True): + return True + return False + + +def get_api_key(authorization: str | None) -> str: + if not authorization: + raise HTTPException(401, "Missing Authorization header") + if authorization.startswith("Bearer "): + key = authorization[7:] + else: + key = authorization + if not validate_api_key(key): + raise HTTPException(403, "Invalid or inactive API key") + return key + + +# --- MCP Protocol Implementation --- + +SERVER_INFO = { + "name": "rag-document-server", + "version": "1.0.0", +} + +SERVER_CAPABILITIES = { + "tools": {"listChanged": False}, + "resources": {"subscribe": False, "listChanged": False}, +} + +TOOLS = [ + { + "name": "search_documents", + "description": "Search through indexed documents using semantic similarity. Returns relevant document chunks with source information and relevance scores.", + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query to find relevant documents", + }, + "collection": { + "type": "string", + "description": "Document collection to search in (default: 'default')", + "default": "default", + }, + "n_results": { + "type": "integer", + "description": "Number of results to return (default: 5)", + "default": 5, + }, + }, + "required": ["query"], + }, + }, + { + "name": "list_collections", + "description": "List all available document collections with their document counts.", + "inputSchema": { + "type": "object", + "properties": {}, + }, + }, + { + "name": "list_documents", + "description": "List all documents in a specific collection.", + "inputSchema": { + "type": "object", + "properties": { + "collection": { + "type": "string", + "description": "Collection name (default: 'default')", + "default": "default", + }, + }, + }, + }, + { + "name": "get_server_status", + "description": "Get the current status of the RAG server including document counts and available collections.", + "inputSchema": { + "type": "object", + "properties": {}, + }, + }, +] + + +async def handle_tool_call(name: str, arguments: dict) -> dict: + async with httpx.AsyncClient(base_url=BACKEND_URL, timeout=60.0) as client: + if name == "search_documents": + resp = await client.post("/api/documents/query", json={ + "query": arguments["query"], + "collection": arguments.get("collection", "default"), + "n_results": arguments.get("n_results", 5), + }) + resp.raise_for_status() + data = resp.json() + results_text = [] + for r in data["results"]: + results_text.append( + f"**Source:** {r['source']} (score: {r['score']})\n{r['content']}" + ) + return { + "content": [{"type": "text", "text": "\n\n---\n\n".join(results_text) or "No results found."}], + "isError": False, + } + + elif name == "list_collections": + resp = await client.get("/api/documents/collections") + resp.raise_for_status() + data = resp.json() + text = "\n".join( + f"- **{c['name']}**: {c['document_count']} documents" + for c in data["collections"] + ) or "No collections found." + return {"content": [{"type": "text", "text": text}], "isError": False} + + elif name == "list_documents": + collection = arguments.get("collection", "default") + resp = await client.get(f"/api/documents/list?collection={collection}") + resp.raise_for_status() + data = resp.json() + docs = data.get("documents", []) + text = "\n".join(f"- {d}" for d in docs) or "No documents in this collection." + return {"content": [{"type": "text", "text": text}], "isError": False} + + elif name == "get_server_status": + resp = await client.get("/api/admin/status") + resp.raise_for_status() + data = resp.json() + text = ( + f"**Server:** {data['hostname']} ({data['ip']})\n" + f"**MCP Enabled:** {data['mcp_enabled']}\n" + f"**Total Documents:** {data['total_documents']}\n" + f"**Collections:** {', '.join(data['collections']) or 'none'}\n" + f"**Active API Keys:** {data['api_keys_count']}" + ) + return {"content": [{"type": "text", "text": text}], "isError": False} + + else: + return { + "content": [{"type": "text", "text": f"Unknown tool: {name}"}], + "isError": True, + } + + +def handle_jsonrpc(request_body: dict) -> dict: + """Process a JSON-RPC request and return the response skeleton.""" + method = request_body.get("method", "") + req_id = request_body.get("id") + params = request_body.get("params", {}) + + if method == "initialize": + return { + "jsonrpc": "2.0", + "id": req_id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": SERVER_CAPABILITIES, + "serverInfo": SERVER_INFO, + }, + } + elif method == "notifications/initialized": + return None # No response for notifications + elif method == "tools/list": + return { + "jsonrpc": "2.0", + "id": req_id, + "result": {"tools": TOOLS}, + } + elif method == "tools/call": + # Handled async separately + return {"_async_tool_call": True, "id": req_id, "params": params} + elif method == "resources/list": + return { + "jsonrpc": "2.0", + "id": req_id, + "result": {"resources": []}, + } + elif method == "ping": + return {"jsonrpc": "2.0", "id": req_id, "result": {}} + else: + return { + "jsonrpc": "2.0", + "id": req_id, + "error": {"code": -32601, "message": f"Method not found: {method}"}, + } + + +# --- HTTP + SSE Transport --- + +@app.get("/sse") +async def sse_endpoint(request: Request, authorization: str | None = Header(None)): + get_api_key(authorization) + + session_id = str(uuid.uuid4()) + sessions[session_id] = {"active": True} + logger.info(f"New SSE session: {session_id}") + + async def event_generator() -> AsyncGenerator: + # Send the endpoint URL for the client to POST messages to + yield { + "event": "endpoint", + "data": f"/messages?session_id={session_id}", + } + + # Keep connection alive + import asyncio + try: + while sessions.get(session_id, {}).get("active", False): + if "response" in sessions.get(session_id, {}): + response = sessions[session_id].pop("response") + yield { + "event": "message", + "data": json.dumps(response), + } + await asyncio.sleep(0.1) + except asyncio.CancelledError: + pass + finally: + sessions.pop(session_id, None) + logger.info(f"SSE session ended: {session_id}") + + return EventSourceResponse(event_generator()) + + +@app.post("/messages") +async def handle_message( + request: Request, + session_id: str, + authorization: str | None = Header(None), +): + get_api_key(authorization) + + if session_id not in sessions: + raise HTTPException(404, "Session not found") + + body = await request.json() + logger.info(f"Received message: {body.get('method', 'unknown')}") + + result = handle_jsonrpc(body) + + if result is None: + return {"status": "ok"} + + if result.get("_async_tool_call"): + params = result["params"] + tool_name = params.get("name", "") + tool_args = params.get("arguments", {}) + try: + tool_result = await handle_tool_call(tool_name, tool_args) + except Exception as e: + tool_result = { + "content": [{"type": "text", "text": f"Error: {str(e)}"}], + "isError": True, + } + response = { + "jsonrpc": "2.0", + "id": result["id"], + "result": tool_result, + } + else: + response = result + + # Queue response for SSE delivery + if session_id in sessions: + sessions[session_id]["response"] = response + + return {"status": "ok"} + + +# --- Streamable HTTP Transport (newer MCP spec) --- + +@app.post("/mcp") +async def mcp_streamable( + request: Request, + authorization: str | None = Header(None), +): + """Streamable HTTP endpoint - handles single request/response MCP calls.""" + get_api_key(authorization) + + body = await request.json() + logger.info(f"MCP streamable request: {body.get('method', 'unknown')}") + + result = handle_jsonrpc(body) + + if result is None: + return {"jsonrpc": "2.0", "result": {}} + + if result.get("_async_tool_call"): + params = result["params"] + tool_name = params.get("name", "") + tool_args = params.get("arguments", {}) + try: + tool_result = await handle_tool_call(tool_name, tool_args) + except Exception as e: + tool_result = { + "content": [{"type": "text", "text": f"Error: {str(e)}"}], + "isError": True, + } + return { + "jsonrpc": "2.0", + "id": result["id"], + "result": tool_result, + } + + return result + + +@app.get("/health") +async def health(): + return {"status": "ok", "service": "mcp-server"} + + +@app.get("/mcp/info") +async def mcp_info(): + """Public endpoint showing MCP server capabilities.""" + return { + "name": SERVER_INFO["name"], + "version": SERVER_INFO["version"], + "protocol_version": "2024-11-05", + "tools": [{"name": t["name"], "description": t["description"]} for t in TOOLS], + "transports": ["sse", "streamable-http"], + "auth": "Bearer token (API key)", + } diff --git a/rag-mcp-server/stop.sh b/rag-mcp-server/stop.sh new file mode 100755 index 00000000..fb60cbb9 --- /dev/null +++ b/rag-mcp-server/stop.sh @@ -0,0 +1,4 @@ +#!/bin/bash +echo "Stopping RAG MCP Server..." +docker compose down +echo "All services stopped." From f48148990212ffbe755499845df7b00427274993 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 05:35:29 +0000 Subject: [PATCH 2/3] Fix critical bugs found during code review - ChromaDB list_collections: handle 0.5.x API returning strings - MCP SSE: replace dict-based response with asyncio.Queue (race condition) - MCP streamable: return 204 for notifications (JSON-RPC compliance) - SMB browser: remove unused low-level imports, improve list_shares error - Frontend: fix header merge bug in request(), clipboard fallback for HTTP - Remove unused deps (react-router-dom, lucide-react, markdown, etc.) - Switch PyPDF2 to pypdf to avoid deprecation warnings - Remove unused imports (os, shutil) from routers https://claude.ai/code/session_01ByAJeYptU8ZosBaBVw1rbd --- .../backend/app/routers/documents.py | 2 - .../backend/app/services/document_parser.py | 2 +- .../backend/app/services/rag_engine.py | 12 +++-- .../backend/app/services/smb_browser.py | 45 ++++++++----------- rag-mcp-server/backend/requirements.txt | 7 +-- rag-mcp-server/frontend/package.json | 6 +-- rag-mcp-server/frontend/src/pages/APIKeys.jsx | 16 ++++++- .../frontend/src/pages/SMBBrowser.jsx | 2 +- rag-mcp-server/frontend/src/services/api.js | 5 ++- rag-mcp-server/mcp_server/server.py | 24 +++++----- 10 files changed, 60 insertions(+), 61 deletions(-) diff --git a/rag-mcp-server/backend/app/routers/documents.py b/rag-mcp-server/backend/app/routers/documents.py index 41256d92..118570d8 100644 --- a/rag-mcp-server/backend/app/routers/documents.py +++ b/rag-mcp-server/backend/app/routers/documents.py @@ -1,6 +1,4 @@ import logging -import os -import shutil from pathlib import Path from fastapi import APIRouter, File, Form, HTTPException, UploadFile diff --git a/rag-mcp-server/backend/app/services/document_parser.py b/rag-mcp-server/backend/app/services/document_parser.py index 1cd8639b..5cc12a84 100644 --- a/rag-mcp-server/backend/app/services/document_parser.py +++ b/rag-mcp-server/backend/app/services/document_parser.py @@ -54,7 +54,7 @@ def _parse_text(content: bytes) -> str: def _parse_pdf(content: bytes) -> str: - from PyPDF2 import PdfReader + from pypdf import PdfReader reader = PdfReader(BytesIO(content)) text_parts = [] for page in reader.pages: diff --git a/rag-mcp-server/backend/app/services/rag_engine.py b/rag-mcp-server/backend/app/services/rag_engine.py index 2f626222..dbe19cdb 100644 --- a/rag-mcp-server/backend/app/services/rag_engine.py +++ b/rag-mcp-server/backend/app/services/rag_engine.py @@ -1,6 +1,5 @@ import hashlib import logging -import os from pathlib import Path import chromadb @@ -135,8 +134,15 @@ def list_documents(collection_name: str = "default") -> list[str]: def list_collections() -> list[dict]: client = get_chroma_client() - collections = client.list_collections() - return [{"name": c.name, "document_count": c.count()} for c in collections] + collection_names = client.list_collections() + result = [] + for name in collection_names: + try: + col = client.get_collection(name) + result.append({"name": name, "document_count": col.count()}) + except Exception: + result.append({"name": name, "document_count": 0}) + return result def delete_collection(name: str): diff --git a/rag-mcp-server/backend/app/services/smb_browser.py b/rag-mcp-server/backend/app/services/smb_browser.py index 53517504..a4d45c65 100644 --- a/rag-mcp-server/backend/app/services/smb_browser.py +++ b/rag-mcp-server/backend/app/services/smb_browser.py @@ -1,23 +1,7 @@ import logging from datetime import datetime, timezone -from io import BytesIO - -from smbprotocol.connection import Connection -from smbprotocol.session import Session -from smbprotocol.tree import TreeConnect -from smbprotocol.open import ( - Open, - CreateDisposition, - CreateOptions, - FileAttributes, - FilePipePrinterAccessMask, - ImpersonationLevel, - ShareAccess, -) -from smbprotocol.file_info import ( - FileInformationClass, -) -import smbprotocol + +import smbclient logger = logging.getLogger(__name__) @@ -36,8 +20,6 @@ def browse_share( domain: str = "WORKGROUP", ) -> list[dict]: """Browse files and directories in an SMB share.""" - import smbclient - smbclient.register_session(server, username=username, password=password, port=445) smb_path = f"\\\\{server}\\{share}" @@ -74,8 +56,6 @@ def read_file( domain: str = "WORKGROUP", ) -> bytes: """Read a file from an SMB share.""" - import smbclient - smbclient.register_session(server, username=username, password=password, port=445) normalized = _normalize_path(path) @@ -91,16 +71,27 @@ def list_shares( password: str = "", domain: str = "WORKGROUP", ) -> list[str]: - """List available shares on an SMB server.""" - import smbclient + """List available shares on an SMB server using ClientConfig.""" + from smbprotocol.connection import Connection + from smbprotocol.session import Session + from smbprotocol.tree import TreeConnect + import struct + import uuid as _uuid smbclient.register_session(server, username=username, password=password, port=445) + # Use smbclient to list shares by connecting to IPC$ and listing + # Fallback: try scanning the server root (works on some implementations) shares = [] try: for entry in smbclient.scandir(f"\\\\{server}"): shares.append(entry.name) - except Exception as e: - logger.error(f"Failed to list shares on {server}: {e}") - raise + except Exception: + # If scandir on server root fails, return a helpful error + logger.warning(f"Could not enumerate shares on {server} via scandir, " + "try specifying the share name directly") + raise ValueError( + f"Could not auto-discover shares on {server}. " + "Please enter the share name manually (e.g., 'Documents', 'Public')." + ) return sorted(shares) diff --git a/rag-mcp-server/backend/requirements.txt b/rag-mcp-server/backend/requirements.txt index 04451d51..502020af 100644 --- a/rag-mcp-server/backend/requirements.txt +++ b/rag-mcp-server/backend/requirements.txt @@ -6,12 +6,7 @@ python-multipart==0.0.18 pydantic==2.10.4 pydantic-settings==2.7.1 smbprotocol==1.14.0 -pypdf2==3.0.1 +pypdf>=4.0.0 python-docx==1.1.2 openpyxl==3.1.5 -markdown==3.7 -beautifulsoup4==4.12.3 chardet==5.2.0 -watchdog==6.0.0 -aiofiles==24.1.0 -httpx==0.28.1 diff --git a/rag-mcp-server/frontend/package.json b/rag-mcp-server/frontend/package.json index 4e4c6460..d5645d64 100644 --- a/rag-mcp-server/frontend/package.json +++ b/rag-mcp-server/frontend/package.json @@ -10,13 +10,9 @@ }, "dependencies": { "react": "^18.3.1", - "react-dom": "^18.3.1", - "react-router-dom": "^6.28.0", - "lucide-react": "^0.468.0" + "react-dom": "^18.3.1" }, "devDependencies": { - "@types/react": "^18.3.12", - "@types/react-dom": "^18.3.1", "@vitejs/plugin-react": "^4.3.4", "vite": "^6.0.3" } diff --git a/rag-mcp-server/frontend/src/pages/APIKeys.jsx b/rag-mcp-server/frontend/src/pages/APIKeys.jsx index 8503c027..cbf0da46 100644 --- a/rag-mcp-server/frontend/src/pages/APIKeys.jsx +++ b/rag-mcp-server/frontend/src/pages/APIKeys.jsx @@ -57,8 +57,20 @@ export default function APIKeys() { } const copyToClipboard = (text) => { - navigator.clipboard.writeText(text) - setMessage({ type: 'info', text: 'Copied to clipboard' }) + if (navigator.clipboard?.writeText) { + navigator.clipboard.writeText(text) + .then(() => setMessage({ type: 'info', text: 'Copied to clipboard' })) + .catch(() => setMessage({ type: 'info', text: 'Select and copy the key manually' })) + } else { + // Fallback for HTTP contexts (non-HTTPS LAN) + const textarea = document.createElement('textarea') + textarea.value = text + document.body.appendChild(textarea) + textarea.select() + document.execCommand('copy') + document.body.removeChild(textarea) + setMessage({ type: 'info', text: 'Copied to clipboard' }) + } } return ( diff --git a/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx b/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx index f912427f..ede90ed2 100644 --- a/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx +++ b/rag-mcp-server/frontend/src/pages/SMBBrowser.jsx @@ -1,5 +1,5 @@ import React, { useState } from 'react' -import { browseSMB, ingestFromSMB, listCollections } from '../services/api' +import { browseSMB, ingestFromSMB } from '../services/api' export default function SMBBrowser() { const [server, setServer] = useState('192.168.1.52') diff --git a/rag-mcp-server/frontend/src/services/api.js b/rag-mcp-server/frontend/src/services/api.js index 49bc01cf..c95070d5 100644 --- a/rag-mcp-server/frontend/src/services/api.js +++ b/rag-mcp-server/frontend/src/services/api.js @@ -1,9 +1,10 @@ const API_BASE = '/api'; async function request(path, options = {}) { + const { headers, ...rest } = options; const res = await fetch(`${API_BASE}${path}`, { - headers: { 'Content-Type': 'application/json', ...options.headers }, - ...options, + ...rest, + headers: { 'Content-Type': 'application/json', ...headers }, }); if (!res.ok) { const error = await res.json().catch(() => ({ detail: res.statusText })); diff --git a/rag-mcp-server/mcp_server/server.py b/rag-mcp-server/mcp_server/server.py index cb46aeea..59fceee2 100644 --- a/rag-mcp-server/mcp_server/server.py +++ b/rag-mcp-server/mcp_server/server.py @@ -4,6 +4,7 @@ Authenticates via API key in the Authorization header. """ +import asyncio import hashlib import json import logging @@ -13,9 +14,8 @@ from typing import AsyncGenerator import httpx -from fastapi import FastAPI, Header, HTTPException, Request +from fastapi import FastAPI, Header, HTTPException, Request, Response from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel from sse_starlette.sse import EventSourceResponse logging.basicConfig(level=logging.INFO, format="%(asctime)s [MCP] %(levelname)s: %(message)s") @@ -35,7 +35,7 @@ allow_headers=["*"], ) -# Store active SSE sessions +# Store active SSE sessions: session_id -> {"active": bool, "queue": asyncio.Queue} sessions: dict[str, dict] = {} @@ -247,7 +247,8 @@ async def sse_endpoint(request: Request, authorization: str | None = Header(None get_api_key(authorization) session_id = str(uuid.uuid4()) - sessions[session_id] = {"active": True} + queue: asyncio.Queue = asyncio.Queue() + sessions[session_id] = {"active": True, "queue": queue} logger.info(f"New SSE session: {session_id}") async def event_generator() -> AsyncGenerator: @@ -257,17 +258,16 @@ async def event_generator() -> AsyncGenerator: "data": f"/messages?session_id={session_id}", } - # Keep connection alive - import asyncio try: while sessions.get(session_id, {}).get("active", False): - if "response" in sessions.get(session_id, {}): - response = sessions[session_id].pop("response") + try: + response = await asyncio.wait_for(queue.get(), timeout=1.0) yield { "event": "message", "data": json.dumps(response), } - await asyncio.sleep(0.1) + except asyncio.TimeoutError: + continue except asyncio.CancelledError: pass finally: @@ -317,9 +317,9 @@ async def handle_message( # Queue response for SSE delivery if session_id in sessions: - sessions[session_id]["response"] = response + await sessions[session_id]["queue"].put(response) - return {"status": "ok"} + return Response(status_code=202) # --- Streamable HTTP Transport (newer MCP spec) --- @@ -338,7 +338,7 @@ async def mcp_streamable( result = handle_jsonrpc(body) if result is None: - return {"jsonrpc": "2.0", "result": {}} + return Response(status_code=204) if result.get("_async_tool_call"): params = result["params"] From c1a4ab2fc1fa9c6b699ec74230ac561c73eec74d Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 09:08:15 +0000 Subject: [PATCH 3/3] Harden deployment: fix Docker, nginx, SMB, add README - Move .dockerignore to per-service build contexts (backend, mcp, frontend) - Add .gitkeep for data/chromadb/ and data/config/ dirs - Fix .gitignore to track .gitkeep files in data dirs - Fix nginx.conf: proxy /sse and /messages for full MCP access via frontend - Fix SMB list_shares: use request body model (password was in URL query) - Refactor SMB ingest: extract recursive logic into helper function - Remove unused imports (Optional, struct, uuid, smbprotocol internals) - Add deployment README with architecture, quick start, and API docs https://claude.ai/code/session_01ByAJeYptU8ZosBaBVw1rbd --- rag-mcp-server/.gitignore | 6 +- rag-mcp-server/README.md | 145 ++++++++++++++++++ rag-mcp-server/{ => backend}/.dockerignore | 8 +- rag-mcp-server/backend/app/models/schemas.py | 9 +- rag-mcp-server/backend/app/routers/smb.py | 89 ++++++----- .../backend/app/services/smb_browser.py | 17 +- rag-mcp-server/data/chromadb/.gitkeep | 0 rag-mcp-server/data/config/.gitkeep | 0 rag-mcp-server/frontend/.dockerignore | 4 + rag-mcp-server/frontend/nginx.conf | 29 +++- rag-mcp-server/frontend/src/services/api.js | 3 +- rag-mcp-server/mcp_server/.dockerignore | 5 + 12 files changed, 248 insertions(+), 67 deletions(-) create mode 100644 rag-mcp-server/README.md rename rag-mcp-server/{ => backend}/.dockerignore (63%) create mode 100644 rag-mcp-server/data/chromadb/.gitkeep create mode 100644 rag-mcp-server/data/config/.gitkeep create mode 100644 rag-mcp-server/frontend/.dockerignore create mode 100644 rag-mcp-server/mcp_server/.dockerignore diff --git a/rag-mcp-server/.gitignore b/rag-mcp-server/.gitignore index d3302ff9..68131c0d 100644 --- a/rag-mcp-server/.gitignore +++ b/rag-mcp-server/.gitignore @@ -1,8 +1,10 @@ node_modules/ dist/ .env -data/chromadb/ -data/config/ +data/chromadb/* +!data/chromadb/.gitkeep +data/config/* +!data/config/.gitkeep __pycache__/ *.pyc .venv/ diff --git a/rag-mcp-server/README.md b/rag-mcp-server/README.md new file mode 100644 index 00000000..52fb3191 --- /dev/null +++ b/rag-mcp-server/README.md @@ -0,0 +1,145 @@ +# RAG MCP Server for BrownserverN5 + +A self-hosted document RAG (Retrieval Augmented Generation) system with an MCP (Model Context Protocol) server, designed for deployment on Unraid. + +Cloud-based LLMs (Claude, GPT, etc.) connect via the MCP server to search your local documents using semantic similarity - your documents never leave your server. + +## Architecture + +``` + ┌─────────────────┐ + │ Web UI (:8902) │ + │ React + Nginx │ + └───────┬──────────┘ + │ + ┌───────────────┼───────────────┐ + │ │ + ┌────────┴────────┐ ┌─────────┴────────┐ + │ Backend (:8900) │ │ MCP Server(:8901)│ + │ FastAPI + RAG │◄───────────│ SSE + HTTP │ + │ ChromaDB │ │ API Key Auth │ + └────────┬────────┘ └──────────────────┘ + │ + ┌────────┴────────┐ + │ SMB Shares (LAN)│ + │ 192.168.1.x │ + └─────────────────┘ +``` + +**Three Docker services:** + +| Service | Internal Port | External Port | Purpose | +|---------|--------------|---------------|---------| +| Backend | 8000 | **8900** | FastAPI + ChromaDB RAG engine | +| MCP Server | 8001 | **8901** | MCP protocol for cloud LLMs | +| Frontend | 80 | **8902** | React management UI | + +## Quick Start + +### 1. Deploy on Unraid + +SSH into your server or use the Unraid terminal: + +```bash +cd /mnt/user/appdata # or wherever you keep app data +git clone rag-mcp-server +cd rag-mcp-server + +# Copy and edit environment config +cp .env.example .env + +# Deploy +chmod +x deploy.sh +./deploy.sh +``` + +### 2. Open the Web UI + +Navigate to `http://192.168.1.52:8902` in your browser. + +### 3. Create an API Key + +Go to **API Keys** in the sidebar and create a key. Copy it immediately - it's shown only once. + +### 4. Upload Documents + +Use the **Documents** page to upload files, or use the **SMB Browser** to ingest documents from LAN shares. + +### 5. Connect Your LLM + +#### Claude Desktop / Claude Code +Add to your MCP config: +```json +{ + "mcpServers": { + "rag-documents": { + "url": "http://192.168.1.52:8901/sse", + "headers": { + "Authorization": "Bearer YOUR_API_KEY" + } + } + } +} +``` + +#### Streamable HTTP (alternative) +For clients that support it, use `http://192.168.1.52:8901/mcp` as the endpoint. + +## Supported File Types + +| Category | Extensions | +|----------|-----------| +| Text | `.txt`, `.md`, `.csv`, `.log`, `.ini`, `.conf`, `.cfg` | +| Code | `.py`, `.js`, `.ts`, `.go`, `.java`, `.c`, `.cpp`, `.rs`, `.zig`, `.sh`, `.sql` | +| Documents | `.pdf`, `.docx`, `.xlsx` | +| Data | `.json`, `.yaml`, `.yml`, `.xml`, `.html`, `.css`, `.toml` | + +## MCP Tools Available + +| Tool | Description | +|------|-------------| +| `search_documents` | Semantic search across indexed documents | +| `list_collections` | List all document collections | +| `list_documents` | List documents in a collection | +| `get_server_status` | Server status and stats | + +## API Endpoints + +### Backend (port 8900) +- `POST /api/documents/upload` - Upload and index a document +- `POST /api/documents/query` - Semantic search +- `GET /api/documents/list?collection=default` - List documents +- `DELETE /api/documents/{filename}` - Remove a document +- `POST /api/documents/reindex` - Re-index a collection +- `POST /api/smb/browse` - Browse SMB share +- `POST /api/smb/ingest` - Ingest from SMB share +- `GET /api/admin/status` - Server status + +### MCP Server (port 8901) +- `GET /sse` - SSE transport endpoint +- `POST /messages?session_id=X` - SSE message endpoint +- `POST /mcp` - Streamable HTTP endpoint +- `GET /mcp/info` - Server capabilities (public) + +## Data Storage + +All persistent data is stored in `./data/`: +- `documents/` - Uploaded document files +- `chromadb/` - Vector database +- `config/` - Server configuration and API key hashes + +## Management + +```bash +# Start +docker compose up -d + +# Stop +./stop.sh + +# View logs +docker compose logs -f + +# Rebuild after changes +docker compose build && docker compose up -d +``` diff --git a/rag-mcp-server/.dockerignore b/rag-mcp-server/backend/.dockerignore similarity index 63% rename from rag-mcp-server/.dockerignore rename to rag-mcp-server/backend/.dockerignore index 173d7515..9a931cbc 100644 --- a/rag-mcp-server/.dockerignore +++ b/rag-mcp-server/backend/.dockerignore @@ -1,7 +1,7 @@ -node_modules/ -dist/ +__pycache__/ +*.pyc +*.pyo .git .env +.venv data/ -__pycache__/ -*.pyc diff --git a/rag-mcp-server/backend/app/models/schemas.py b/rag-mcp-server/backend/app/models/schemas.py index a10aa8cc..07140257 100644 --- a/rag-mcp-server/backend/app/models/schemas.py +++ b/rag-mcp-server/backend/app/models/schemas.py @@ -1,5 +1,3 @@ -from typing import Optional - from pydantic import BaseModel @@ -40,6 +38,13 @@ class SMBShareConfig(BaseModel): path: str = "/" +class SMBListSharesRequest(BaseModel): + server: str + username: str = "guest" + password: str = "" + domain: str = "WORKGROUP" + + class SMBBrowseRequest(BaseModel): server: str share: str diff --git a/rag-mcp-server/backend/app/routers/smb.py b/rag-mcp-server/backend/app/routers/smb.py index 8c5d2525..2416fb50 100644 --- a/rag-mcp-server/backend/app/routers/smb.py +++ b/rag-mcp-server/backend/app/routers/smb.py @@ -2,7 +2,7 @@ from fastapi import APIRouter, HTTPException -from app.models.schemas import IngestSMBRequest, SMBBrowseRequest, SMBFileEntry +from app.models.schemas import IngestSMBRequest, SMBBrowseRequest, SMBFileEntry, SMBListSharesRequest from app.services import rag_engine, smb_browser from app.services.document_parser import can_parse, parse_file @@ -27,27 +27,32 @@ async def browse_smb(req: SMBBrowseRequest): @router.post("/shares") -async def list_shares(server: str, username: str = "guest", password: str = "", domain: str = "WORKGROUP"): +async def list_shares(req: SMBListSharesRequest): try: - shares = smb_browser.list_shares(server, username, password, domain) - return {"server": server, "shares": shares} + shares = smb_browser.list_shares(req.server, req.username, req.password, req.domain) + return {"server": req.server, "shares": shares} except Exception as e: raise HTTPException(500, f"Failed to list shares: {str(e)}") -@router.post("/ingest") -async def ingest_from_smb(req: IngestSMBRequest): +def _ingest_directory( + server: str, + share: str, + path: str, + username: str, + password: str, + domain: str, + collection: str, + recursive: bool, +) -> dict: + """Recursively ingest documents from an SMB path.""" try: entries = smb_browser.browse_share( - server=req.server, - share=req.share, - path=req.path, - username=req.username, - password=req.password, - domain=req.domain, + server=server, share=share, path=path, + username=username, password=password, domain=domain, ) except Exception as e: - raise HTTPException(500, f"SMB browse failed: {str(e)}") + return {"files_processed": 0, "total_chunks": 0, "errors": [f"{path}: {str(e)}"]} total_chunks = 0 files_processed = 0 @@ -55,45 +60,32 @@ async def ingest_from_smb(req: IngestSMBRequest): for entry in entries: if entry["is_directory"]: - if req.recursive: - # Recursively ingest subdirectories - sub_path = f"{req.path.rstrip('/')}/{entry['name']}" - try: - sub_req = IngestSMBRequest( - server=req.server, - share=req.share, - path=sub_path, - username=req.username, - password=req.password, - domain=req.domain, - collection=req.collection, - recursive=True, - ) - result = await ingest_from_smb(sub_req) - total_chunks += result["total_chunks"] - files_processed += result["files_processed"] - except Exception as e: - errors.append(f"{sub_path}: {str(e)}") + if recursive: + sub_path = f"{path.rstrip('/')}/{entry['name']}" + sub_result = _ingest_directory( + server, share, sub_path, + username, password, domain, + collection, recursive, + ) + total_chunks += sub_result["total_chunks"] + files_processed += sub_result["files_processed"] + errors.extend(sub_result["errors"]) continue if not can_parse(entry["name"]): continue - file_path = f"{req.path.rstrip('/')}/{entry['name']}" + file_path = f"{path.rstrip('/')}/{entry['name']}" try: content = smb_browser.read_file( - server=req.server, - share=req.share, - path=file_path, - username=req.username, - password=req.password, - domain=req.domain, + server=server, share=share, path=file_path, + username=username, password=password, domain=domain, ) text = parse_file(content=content, filename=entry["name"]) if text.strip(): - source = f"smb://{req.server}/{req.share}{file_path}" + source = f"smb://{server}/{share}{file_path}" chunks = rag_engine.ingest_text( - text, source=source, collection_name=req.collection + text, source=source, collection_name=collection, ) total_chunks += chunks files_processed += 1 @@ -105,3 +97,18 @@ async def ingest_from_smb(req: IngestSMBRequest): "total_chunks": total_chunks, "errors": errors, } + + +@router.post("/ingest") +async def ingest_from_smb(req: IngestSMBRequest): + result = _ingest_directory( + server=req.server, + share=req.share, + path=req.path, + username=req.username, + password=req.password, + domain=req.domain, + collection=req.collection, + recursive=req.recursive, + ) + return result diff --git a/rag-mcp-server/backend/app/services/smb_browser.py b/rag-mcp-server/backend/app/services/smb_browser.py index a4d45c65..6079e4f9 100644 --- a/rag-mcp-server/backend/app/services/smb_browser.py +++ b/rag-mcp-server/backend/app/services/smb_browser.py @@ -71,25 +71,18 @@ def list_shares( password: str = "", domain: str = "WORKGROUP", ) -> list[str]: - """List available shares on an SMB server using ClientConfig.""" - from smbprotocol.connection import Connection - from smbprotocol.session import Session - from smbprotocol.tree import TreeConnect - import struct - import uuid as _uuid - + """List available shares on an SMB server.""" smbclient.register_session(server, username=username, password=password, port=445) - # Use smbclient to list shares by connecting to IPC$ and listing - # Fallback: try scanning the server root (works on some implementations) shares = [] try: for entry in smbclient.scandir(f"\\\\{server}"): shares.append(entry.name) except Exception: - # If scandir on server root fails, return a helpful error - logger.warning(f"Could not enumerate shares on {server} via scandir, " - "try specifying the share name directly") + logger.warning( + f"Could not enumerate shares on {server} via scandir, " + "try specifying the share name directly" + ) raise ValueError( f"Could not auto-discover shares on {server}. " "Please enter the share name manually (e.g., 'Documents', 'Public')." diff --git a/rag-mcp-server/data/chromadb/.gitkeep b/rag-mcp-server/data/chromadb/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/data/config/.gitkeep b/rag-mcp-server/data/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/rag-mcp-server/frontend/.dockerignore b/rag-mcp-server/frontend/.dockerignore new file mode 100644 index 00000000..aae5bf68 --- /dev/null +++ b/rag-mcp-server/frontend/.dockerignore @@ -0,0 +1,4 @@ +node_modules/ +dist/ +.git +.env diff --git a/rag-mcp-server/frontend/nginx.conf b/rag-mcp-server/frontend/nginx.conf index 77a1004c..a079938c 100644 --- a/rag-mcp-server/frontend/nginx.conf +++ b/rag-mcp-server/frontend/nginx.conf @@ -18,13 +18,32 @@ server { client_max_body_size 100M; } - # Proxy MCP requests - location /mcp/ { - proxy_pass http://mcp-server:8001/mcp/; + # Proxy MCP streamable HTTP endpoint + location /mcp { + proxy_pass http://mcp-server:8001/mcp; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Authorization $http_authorization; + } + + # Proxy MCP SSE endpoint + location /sse { + proxy_pass http://mcp-server:8001/sse; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Authorization $http_authorization; proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; + proxy_set_header Connection ""; + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 86400s; + } + + # Proxy MCP message posting endpoint + location /messages { + proxy_pass http://mcp-server:8001/messages; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Authorization $http_authorization; } } diff --git a/rag-mcp-server/frontend/src/services/api.js b/rag-mcp-server/frontend/src/services/api.js index c95070d5..b73b4fde 100644 --- a/rag-mcp-server/frontend/src/services/api.js +++ b/rag-mcp-server/frontend/src/services/api.js @@ -54,8 +54,9 @@ export const browseSMB = (server, share, path = '/', username = 'guest', passwor }); export const listShares = (server, username = 'guest', password = '', domain = 'WORKGROUP') => - request(`/smb/shares?server=${server}&username=${username}&password=${password}&domain=${domain}`, { + request('/smb/shares', { method: 'POST', + body: JSON.stringify({ server, username, password, domain }), }); export const ingestFromSMB = (config) => diff --git a/rag-mcp-server/mcp_server/.dockerignore b/rag-mcp-server/mcp_server/.dockerignore new file mode 100644 index 00000000..15e0d71c --- /dev/null +++ b/rag-mcp-server/mcp_server/.dockerignore @@ -0,0 +1,5 @@ +__pycache__/ +*.pyc +*.pyo +.git +.env