-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocal_directory_import.tool.json
More file actions
10 lines (10 loc) · 34.3 KB
/
local_directory_import.tool.json
File metadata and controls
10 lines (10 loc) · 34.3 KB
1
2
3
4
5
6
7
8
9
10
[
{
"id": "local_directory_import",
"name": "Local Directory Import",
"meta": {
"description": "Bulk import local filesystem folders into Open WebUI Knowledge Bases with SHA-256 deduplication."
},
"content": "\"\"\"\nLocal Directory Import Plugin for Open WebUI.\n\nBulk-imports files from a local drop folder into knowledge bases.\nEach immediate subfolder of the drop folder is auto-mapped to a knowledge base\nnamed after it (created if it does not exist). Files are copied to UPLOAD_DIR,\nregistered in the database, linked to the corresponding KB, and vectorized.\n\nAdmin-only access. The drop folder path is configured via Valves.\nReturns a JSON summary with per-KB breakdowns.\n\nNote: local filesystem only — not compatible with S3/GCS/Azure storage backends.\n\"\"\"\n\n__version__ = '0.1.0'\n\nimport asyncio\nimport hashlib\nimport inspect\nimport importlib\nimport json\nimport logging\nimport mimetypes\nimport pathlib\nimport shutil\nimport sys\nimport time\nimport uuid\nfrom contextlib import asynccontextmanager\nfrom dataclasses import asdict, dataclass, field\nfrom types import SimpleNamespace\n\nfrom fastapi import Request\nfrom pydantic import BaseModel, Field\nfrom sqlalchemy import select\n\nUPLOAD_DIR = None\nget_async_db = None\nFile = None\nFileForm = None\nFiles = None\nKnowledge = None\nKnowledgeForm = None\nKnowledges = None\nUserModel = None\nProcessFileForm = None\nprocess_file = None\n\n\ndef _get_mod(key: str):\n \"\"\"Return an already-loaded module from sys.modules, or import it fresh.\"\"\"\n mod = sys.modules.get(key)\n if mod is not None:\n return mod\n return importlib.import_module(key)\n\n\ndef _ensure_openwebui_imports() -> None:\n \"\"\"Resolve Open WebUI symbols lazily for wider version compatibility.\n\n Uses sys.modules first to avoid re-importing modules that are already\n loaded by the running Open WebUI process (which would cause SQLAlchemy\n duplicate table errors). Falls back to importlib for each prefix in turn.\n \"\"\"\n global UPLOAD_DIR\n global get_async_db\n global File\n global FileForm\n global Files\n global Knowledge\n global KnowledgeForm\n global Knowledges\n global UserModel\n global ProcessFileForm\n global process_file\n\n if all(\n sym is not None\n for sym in (\n UPLOAD_DIR,\n get_async_db,\n File,\n FileForm,\n Files,\n Knowledge,\n KnowledgeForm,\n Knowledges,\n ProcessFileForm,\n process_file,\n )\n ):\n return\n\n # Prefixes to try in order. backend.open_webui is intentionally omitted:\n # it aliases the same already-loaded modules and causes SQLAlchemy to\n # complain about duplicate table definitions.\n prefixes = ('open_webui', 'apps.webui')\n\n # db getter names vary across Open WebUI versions\n db_getter_names = ('get_async_db', 'get_session', 'get_db')\n\n errors = []\n for prefix in prefixes:\n try:\n config_mod = _get_mod(f'{prefix}.config')\n db_mod = _get_mod(f'{prefix}.internal.db')\n files_mod = _get_mod(f'{prefix}.models.files')\n knowledge_mod = _get_mod(f'{prefix}.models.knowledge')\n retrieval_mod = _get_mod(f'{prefix}.routers.retrieval')\n\n users_mod = None\n try:\n users_mod = _get_mod(f'{prefix}.models.users')\n except Exception:\n pass\n\n UPLOAD_DIR = getattr(config_mod, 'UPLOAD_DIR', None)\n\n # Accept whichever async-db getter this version exposes\n _get_async_db = None\n for name in db_getter_names:\n _get_async_db = getattr(db_mod, name, None)\n if _get_async_db is not None:\n break\n get_async_db = _get_async_db\n\n File = getattr(files_mod, 'File', None)\n FileForm = getattr(files_mod, 'FileForm', None)\n Files = getattr(files_mod, 'Files', None)\n Knowledge = getattr(knowledge_mod, 'Knowledge', None)\n KnowledgeForm = getattr(knowledge_mod, 'KnowledgeForm', None)\n Knowledges = getattr(knowledge_mod, 'Knowledges', None)\n UserModel = getattr(users_mod, 'UserModel', None) if users_mod else None\n ProcessFileForm = getattr(retrieval_mod, 'ProcessFileForm', None)\n process_file = getattr(retrieval_mod, 'process_file', None)\n\n required = {\n 'UPLOAD_DIR': UPLOAD_DIR,\n 'get_async_db': get_async_db,\n 'File': File,\n 'FileForm': FileForm,\n 'Files': Files,\n 'KnowledgeForm': KnowledgeForm,\n 'Knowledges': Knowledges,\n 'process_file': process_file,\n }\n missing = [name for name, value in required.items() if value is None]\n if missing:\n errors.append(f'{prefix}: missing {\", \".join(missing)}')\n continue\n\n return\n except Exception as exc:\n errors.append(f'{prefix}: {exc}')\n\n raise ImportError(\n 'Unable to resolve Open WebUI tool imports for this environment. '\n + ' | '.join(errors)\n )\n\nlog = logging.getLogger(__name__)\n\n\n# ---------------------------------------------------------------------------\n# Data structures\n# ---------------------------------------------------------------------------\n\n\n@dataclass\nclass ImportFileResult:\n relative_path: str\n filename: str\n file_id: str | None\n status: str\n error: str | None = None\n\n\n@dataclass\nclass KBImportSummary:\n kb_name: str\n knowledge_id: str | None\n kb_created: bool\n discovered: int\n imported: int\n linked: int\n processed: int\n failed: int\n skipped: int = 0\n files: list = field(default_factory=list)\n error: str | None = None\n duration_seconds: float = 0.0\n files_per_second: float = 0.0\n\n\n@dataclass\nclass ImportSummary:\n drop_folder: str\n total_discovered: int\n total_imported: int\n total_linked: int\n total_processed: int\n total_failed: int\n total_skipped: int = 0\n knowledge_bases: list = field(default_factory=list)\n error: str | None = None\n duration_seconds: float = 0.0\n files_per_second: float = 0.0\n\n\n# ---------------------------------------------------------------------------\n# Hash helpers\n# ---------------------------------------------------------------------------\n\n\ndef _hash_file(path: pathlib.Path, chunk_size: int = 65536) -> str:\n \"\"\"Return the hex SHA-256 digest of the file at *path*.\"\"\"\n h = hashlib.sha256()\n with path.open('rb') as fh:\n while True:\n chunk = fh.read(chunk_size)\n if not chunk:\n break\n h.update(chunk)\n return h.hexdigest()\n\n\nasync def _db_execute(db, statement):\n \"\"\"Execute a statement across async and sync SQLAlchemy session shapes.\"\"\"\n result = db.execute(statement)\n if inspect.isawaitable(result):\n return await result\n return result\n\n\nasync def _maybe_await(value):\n \"\"\"Await *value* when needed, otherwise return it directly.\"\"\"\n if inspect.isawaitable(value):\n return await value\n return value\n\n\nasync def _find_file_by_hash(file_hash: str, db) -> 'File | None':\n \"\"\"Return the first File record whose hash matches *file_hash*, or None.\"\"\"\n _ensure_openwebui_imports()\n result = await _db_execute(\n db,\n select(File).where(File.hash == file_hash).limit(1)\n )\n return result.scalars().first()\n\n\n# ---------------------------------------------------------------------------\n# Discovery helpers\n# ---------------------------------------------------------------------------\n\n\n# Directories that are known dev-tool artefacts and should never be imported.\n# Content directories that happen to start with '.' (e.g. .attachments) are\n# intentionally NOT in this list so their files are discovered and imported.\n_HIDDEN_DIR_BLOCKLIST = {\n '.git', '.github', '.svn', '.hg',\n '.tox', '.venv', '.env', '__pycache__',\n}\n\n\ndef _discover_subfolders(drop_folder: pathlib.Path) -> list:\n \"\"\"Return a sorted list of immediate subdirectories inside *drop_folder*.\n\n Directories whose names are in ``_HIDDEN_DIR_BLOCKLIST`` (e.g. ``.git``) are\n excluded. Other dot-prefixed directories such as ``.attachments`` are kept.\n \"\"\"\n return sorted(\n [p for p in drop_folder.iterdir() if p.is_dir() and p.name not in _HIDDEN_DIR_BLOCKLIST]\n )\n\n\ndef _is_supported_import_file(path: pathlib.Path) -> bool:\n \"\"\"Return True when *path* is an allowed text/structured doc file.\"\"\"\n suffix = path.suffix.lower()\n return suffix in {\n '.md',\n '.markdown',\n '.mdown',\n '.mkd',\n '.txt',\n '.json',\n '.yml',\n '.yaml',\n '.pdf',\n '.png',\n '.svg',\n }\n\n\ndef _discover_files(subfolder: pathlib.Path) -> list:\n \"\"\"Return supported doc files recursively inside *subfolder*.\n\n Files whose path passes through a directory listed in\n ``_HIDDEN_DIR_BLOCKLIST`` (e.g. ``.git``) are excluded. Directories whose\n names merely start with ``'.'`` but are not in the blocklist (e.g.\n ``.attachments``) are traversed normally.\n \"\"\"\n base_parts = len(subfolder.parts)\n return sorted(\n [\n p\n for p in subfolder.rglob('*')\n if p.is_file()\n and _is_supported_import_file(p)\n and not any(part in _HIDDEN_DIR_BLOCKLIST for part in p.parts[base_parts:])\n ]\n )\n\n\n# ---------------------------------------------------------------------------\n# File staging helpers\n# ---------------------------------------------------------------------------\n\n\ndef _copy_file_to_upload_dir(src: pathlib.Path, file_id: str, filename: str) -> pathlib.Path:\n \"\"\"Copy *src* into UPLOAD_DIR with a prefixed name and return the destination path.\"\"\"\n _ensure_openwebui_imports()\n dest = pathlib.Path(UPLOAD_DIR) / f'{file_id}_{filename}'\n shutil.copy(src, dest)\n return dest\n\n\nasync def _insert_file_record(\n user_id: str,\n file_id: str,\n filename: str,\n dest_path: pathlib.Path,\n relative_path: str,\n file_hash: str,\n) -> None:\n \"\"\"Create a File DB record for the staged file.\"\"\"\n _ensure_openwebui_imports()\n content_type = mimetypes.guess_type(filename)[0] or 'application/octet-stream'\n size = dest_path.stat().st_size\n await _maybe_await(\n Files.insert_new_file(\n user_id,\n FileForm(\n id=file_id,\n hash=file_hash,\n filename=filename,\n path=str(dest_path),\n data={'content': ''},\n meta={\n 'name': filename,\n 'content_type': content_type,\n 'size': size,\n 'source': relative_path,\n },\n ),\n )\n )\n\n\n# ---------------------------------------------------------------------------\n# Knowledge base helpers\n# ---------------------------------------------------------------------------\n\n\nasync def _find_or_create_kb(kb_name: str, user_id: str, db) -> tuple:\n \"\"\"Look up a KB by *kb_name*; create it if absent.\n\n Returns ``(knowledge_id, kb_created)`` where *kb_created* is ``True`` when a\n new knowledge base was created during this call.\n \"\"\"\n _ensure_openwebui_imports()\n\n existing = None\n lookup_errors = []\n if Knowledge is not None:\n try:\n result = await _db_execute(\n db,\n select(Knowledge).where(Knowledge.name == kb_name).limit(1)\n )\n existing = result.scalars().first()\n except Exception as exc:\n lookup_errors.append(f'orm lookup failed: {exc}')\n\n if existing is None and hasattr(Knowledges, 'get_knowledge_bases'):\n # Compatibility fallback for builds where the ORM class symbol is not exposed\n # or where the direct ORM query shape changed.\n try:\n kbs = await _call_knowledge_api(\n Knowledges.get_knowledge_bases,\n db=db,\n skip=0,\n limit=2000,\n )\n existing = next(\n (kb for kb in kbs if getattr(kb, 'name', None) == kb_name),\n None,\n )\n except Exception as exc:\n lookup_errors.append(f'knowledge list failed: {exc}')\n\n if existing:\n return (existing.id, False)\n\n knowledge_form = (\n KnowledgeForm(\n name=kb_name,\n description='Auto-created by local directory import',\n )\n if KnowledgeForm is not None\n else SimpleNamespace(\n name=kb_name,\n description='Auto-created by local directory import',\n )\n )\n\n try:\n new_kb = await _call_knowledge_api(\n Knowledges.insert_new_knowledge,\n user_id=user_id,\n id=user_id,\n form=knowledge_form,\n form_data=knowledge_form,\n knowledge_form=knowledge_form,\n data=knowledge_form,\n db=db,\n )\n except Exception as exc:\n lookup_errors.append(f'knowledge create failed: {exc}')\n raise RuntimeError('; '.join(lookup_errors)) from exc\n\n return (new_kb.id, True)\n\n\nasync def _call_knowledge_api(func, **candidate_values):\n \"\"\"Call an Open WebUI knowledge helper across signature variations.\"\"\"\n try:\n signature = inspect.signature(func)\n except (TypeError, ValueError):\n signature = None\n\n if signature is not None:\n args = []\n kwargs = {}\n missing = []\n for name, param in signature.parameters.items():\n if name in ('self', 'cls'):\n continue\n if param.kind == inspect.Parameter.VAR_KEYWORD:\n continue\n if param.kind == inspect.Parameter.VAR_POSITIONAL:\n continue\n if name in candidate_values:\n if param.kind == inspect.Parameter.POSITIONAL_ONLY:\n args.append(candidate_values[name])\n else:\n kwargs[name] = candidate_values[name]\n continue\n if param.default is inspect._empty:\n missing.append(name)\n\n if not missing:\n result = func(*args, **kwargs)\n if inspect.isawaitable(result):\n return await result\n return result\n\n fallback_calls = [\n ((candidate_values.get('user_id'), candidate_values.get('form')), {'db': candidate_values.get('db')}),\n ((candidate_values.get('form'),), {'user_id': candidate_values.get('user_id'), 'db': candidate_values.get('db')}),\n ((candidate_values.get('user_id'), candidate_values.get('form')), {}),\n ((), {'skip': candidate_values.get('skip'), 'limit': candidate_values.get('limit'), 'db': candidate_values.get('db')}),\n ((), {'skip': candidate_values.get('skip'), 'limit': candidate_values.get('limit')}),\n ]\n\n last_error = None\n for args, kwargs in fallback_calls:\n call_args = tuple(arg for arg in args if arg is not None)\n call_kwargs = {key: value for key, value in kwargs.items() if value is not None}\n try:\n result = func(*call_args, **call_kwargs)\n if inspect.isawaitable(result):\n return await result\n return result\n except TypeError as exc:\n last_error = exc\n continue\n\n if last_error is not None:\n raise last_error\n raise RuntimeError('Unable to call knowledge API with supported arguments')\n\n\nasync def _link_file_to_kb(knowledge_id: str, file_id: str, user_id: str, db) -> None:\n \"\"\"Link an existing file record to a knowledge base.\"\"\"\n _ensure_openwebui_imports()\n await _maybe_await(\n Knowledges.add_file_to_knowledge_by_id(\n knowledge_id=knowledge_id,\n file_id=file_id,\n user_id=user_id,\n db=db,\n )\n )\n\n\n# ---------------------------------------------------------------------------\n# Vectorization helper\n# ---------------------------------------------------------------------------\n\n\n# File types whose text content must be supplied inline because Open WebUI's\n# retrieval pipeline has no native loader for them.\n_INLINE_CONTENT_EXTENSIONS = {'.json', '.yml', '.yaml'}\n\n\nasync def _vectorize_file(\n request: Request,\n file_id: str,\n knowledge_id: str,\n user,\n db,\n file_path: pathlib.Path | None = None,\n) -> None:\n \"\"\"Vectorize a file into the KB's collection via the retrieval pipeline.\n\n For formats without a native Open WebUI loader (JSON, YAML), the file text\n is read here and passed as *content* on the form so the vectorizer does not\n attempt to extract it from disk and return empty content.\n \"\"\"\n _ensure_openwebui_imports()\n\n inline_content = None\n if file_path is not None and file_path.suffix.lower() in _INLINE_CONTENT_EXTENSIONS:\n try:\n inline_content = file_path.read_text(encoding='utf-8', errors='replace')\n except Exception:\n pass\n\n if ProcessFileForm is not None:\n form_kwargs = {'file_id': file_id, 'collection_name': knowledge_id}\n if inline_content is not None:\n form_kwargs['content'] = inline_content\n try:\n form = ProcessFileForm(**form_kwargs)\n except TypeError:\n # Older builds may not accept 'content'; fall back without it.\n form = ProcessFileForm(file_id=file_id, collection_name=knowledge_id)\n else:\n form = SimpleNamespace(\n file_id=file_id,\n collection_name=knowledge_id,\n content=inline_content,\n )\n\n await _maybe_await(\n process_file(\n request,\n form,\n user=user,\n db=db,\n )\n )\n\n\n@asynccontextmanager\nasync def _open_db_session():\n \"\"\"Yield a DB session across Open WebUI dependency shapes.\"\"\"\n _ensure_openwebui_imports()\n db_provider = get_async_db()\n\n if inspect.isawaitable(db_provider) and not hasattr(db_provider, '__aenter__'):\n db_provider = await db_provider\n\n if hasattr(db_provider, '__aenter__') and hasattr(db_provider, '__aexit__'):\n async with db_provider as db:\n yield db\n return\n\n if hasattr(db_provider, '__enter__') and hasattr(db_provider, '__exit__'):\n with db_provider as db:\n yield db\n return\n\n if inspect.isasyncgen(db_provider):\n try:\n db = await anext(db_provider)\n except StopAsyncIteration as exc:\n raise RuntimeError('get_async_db yielded no database session') from exc\n try:\n yield db\n finally:\n await db_provider.aclose()\n return\n\n if inspect.isgenerator(db_provider):\n try:\n db = next(db_provider)\n except StopIteration as exc:\n raise RuntimeError('get_async_db yielded no database session') from exc\n try:\n yield db\n finally:\n db_provider.close()\n return\n\n yield db_provider\n\n\n# ---------------------------------------------------------------------------\n# Tool class\n# ---------------------------------------------------------------------------\n\n\nclass Tools:\n class Valves(BaseModel):\n drop_folder: str = Field(\n default='/app/backend/data/drop',\n description=(\n 'Absolute path to the drop folder to import from. '\n 'Each immediate subfolder is mapped to a knowledge base. '\n 'Note: local filesystem only — not compatible with S3/GCS/Azure storage backends.'\n ),\n )\n detached_import: bool = Field(\n default=False,\n description=(\n 'When true, schedule import work in the background and return '\n 'immediately. Progress/errors are written to Open WebUI logs.'\n ),\n )\n\n def __init__(self):\n self.valves = self.Valves()\n\n async def _run_import_local_directory(\n self,\n __user__: dict,\n __request__: Request,\n ) -> str:\n \"\"\"Execute the full import pipeline and return JSON summary.\"\"\"\n overall_start = time.perf_counter()\n drop_folder = self.valves.drop_folder\n if not drop_folder:\n return json.dumps(\n asdict(\n ImportSummary(\n error='drop_folder valve is not configured',\n drop_folder='',\n total_discovered=0,\n total_imported=0,\n total_linked=0,\n total_processed=0,\n total_failed=0,\n knowledge_bases=[],\n )\n )\n )\n _ensure_openwebui_imports()\n\n # 1. Admin role guard — must be the first check\n if __user__.get('role') != 'admin':\n return json.dumps(\n asdict(\n ImportSummary(\n error='Access denied: admin role required',\n drop_folder=drop_folder,\n total_discovered=0,\n total_imported=0,\n total_linked=0,\n total_processed=0,\n total_failed=0,\n knowledge_bases=[],\n )\n )\n )\n\n # 2. Validate drop_folder exists and is a directory\n drop_path = pathlib.Path(drop_folder).resolve()\n if not drop_path.exists() or not drop_path.is_dir():\n return json.dumps(\n asdict(\n ImportSummary(\n error=f\"drop_folder '{drop_folder}' does not exist or is not a directory\",\n drop_folder=drop_folder,\n total_discovered=0,\n total_imported=0,\n total_linked=0,\n total_processed=0,\n total_failed=0,\n knowledge_bases=[],\n )\n )\n )\n\n user_id = __user__['id']\n user = UserModel(**__user__) if UserModel is not None else __user__\n kb_summaries = []\n\n # 3. Discover immediate subfolders\n subfolders = _discover_subfolders(drop_path)\n\n for subfolder in subfolders:\n kb_name = subfolder.name\n kb_start = time.perf_counter()\n\n # 4. Find or create the knowledge base for this subfolder\n async with _open_db_session() as db:\n try:\n knowledge_id, kb_created = await _find_or_create_kb(kb_name, user_id, db)\n except Exception as exc:\n log.error(\n 'local_import kb_find_or_create kb=%s error=%s',\n kb_name,\n str(exc),\n )\n kb_summaries.append(\n KBImportSummary(\n kb_name=kb_name,\n knowledge_id=None,\n kb_created=False,\n discovered=0,\n imported=0,\n linked=0,\n processed=0,\n failed=1,\n files=[],\n error=str(exc),\n duration_seconds=round(time.perf_counter() - kb_start, 3),\n files_per_second=0.0,\n )\n )\n continue\n\n kb_summary = KBImportSummary(\n kb_name=kb_name,\n knowledge_id=knowledge_id,\n kb_created=kb_created,\n discovered=0,\n imported=0,\n linked=0,\n processed=0,\n failed=0,\n skipped=0,\n files=[],\n )\n\n # 5. Discover and process files within this subfolder\n files = _discover_files(subfolder)\n kb_summary.discovered = len(files)\n\n for file_path in files:\n file_id = str(uuid.uuid4())\n filename = file_path.name\n relative_path = str(file_path.relative_to(subfolder))\n status = 'discovered'\n error = None\n\n # Hash check — skip files that haven't changed\n try:\n file_hash = _hash_file(file_path)\n existing = await _find_file_by_hash(file_hash, db)\n if existing is not None:\n kb_summary.skipped += 1\n log.info(\n 'local_import file=%s kb=%s status=skipped hash=%s',\n relative_path,\n kb_name,\n file_hash,\n )\n kb_summary.files.append(\n ImportFileResult(\n relative_path=relative_path,\n filename=filename,\n file_id=existing.id,\n status='skipped',\n )\n )\n continue\n except Exception as exc:\n error = str(exc)\n status = 'hash_failed'\n kb_summary.failed += 1\n log.error(\n 'local_import file=%s kb=%s status=%s reason=%s',\n relative_path,\n kb_name,\n status,\n error,\n )\n kb_summary.files.append(\n ImportFileResult(\n relative_path=relative_path,\n filename=filename,\n file_id=None,\n status=status,\n error=error,\n )\n )\n continue\n\n # Copy + insert file record\n try:\n dest = _copy_file_to_upload_dir(file_path, file_id, filename)\n await _insert_file_record(\n user_id, file_id, filename, dest, relative_path, file_hash\n )\n kb_summary.imported += 1\n status = 'imported'\n except Exception as exc:\n error = str(exc)\n status = 'import_failed'\n kb_summary.failed += 1\n log.info(\n 'local_import file=%s kb=%s status=%s reason=%s',\n relative_path,\n kb_name,\n status,\n error,\n )\n kb_summary.files.append(\n ImportFileResult(\n relative_path=relative_path,\n filename=filename,\n file_id=None,\n status=status,\n error=error,\n )\n )\n continue\n\n # Link file to KB\n try:\n await _link_file_to_kb(knowledge_id, file_id, user_id, db)\n kb_summary.linked += 1\n status = 'linked'\n except Exception as exc:\n error = str(exc)\n status = 'import_failed'\n kb_summary.failed += 1\n log.info(\n 'local_import file=%s kb=%s status=%s reason=%s',\n relative_path,\n kb_name,\n status,\n error,\n )\n kb_summary.files.append(\n ImportFileResult(\n relative_path=relative_path,\n filename=filename,\n file_id=file_id,\n status=status,\n error=error,\n )\n )\n continue\n\n # Vectorize — failures are non-fatal (FR-017)\n try:\n await _vectorize_file(\n __request__, file_id, knowledge_id, user, db,\n file_path=file_path,\n )\n kb_summary.processed += 1\n status = 'processed'\n except Exception as exc:\n error = str(exc)\n status = 'vectorization_failed'\n kb_summary.failed += 1\n\n log.info(\n 'local_import file=%s kb=%s status=%s reason=%s',\n relative_path,\n kb_name,\n status,\n error or '',\n )\n kb_summary.files.append(\n ImportFileResult(\n relative_path=relative_path,\n filename=filename,\n file_id=file_id,\n status=status,\n error=error,\n )\n )\n\n kb_summary.duration_seconds = round(time.perf_counter() - kb_start, 3)\n if kb_summary.duration_seconds > 0:\n kb_summary.files_per_second = round(\n kb_summary.discovered / kb_summary.duration_seconds,\n 3,\n )\n\n kb_summaries.append(kb_summary)\n\n # 6. Aggregate totals\n total_discovered = sum(kb.discovered for kb in kb_summaries)\n duration_seconds = round(time.perf_counter() - overall_start, 3)\n files_per_second = (\n round(total_discovered / duration_seconds, 3)\n if duration_seconds > 0\n else 0.0\n )\n\n summary = ImportSummary(\n drop_folder=drop_folder,\n total_discovered=total_discovered,\n total_imported=sum(kb.imported for kb in kb_summaries),\n total_linked=sum(kb.linked for kb in kb_summaries),\n total_processed=sum(kb.processed for kb in kb_summaries),\n total_failed=sum(kb.failed for kb in kb_summaries),\n total_skipped=sum(kb.skipped for kb in kb_summaries),\n knowledge_bases=kb_summaries,\n duration_seconds=duration_seconds,\n files_per_second=files_per_second,\n error=(\n 'One or more knowledge bases failed to import; '\n 'see knowledge_bases[*].error'\n if any(kb.error for kb in kb_summaries)\n else None\n ),\n )\n\n log.info(\n 'local_import summary drop_folder=%s total_discovered=%d '\n 'total_imported=%d total_linked=%d total_processed=%d total_failed=%d',\n drop_folder,\n summary.total_discovered,\n summary.total_imported,\n summary.total_linked,\n summary.total_processed,\n summary.total_failed,\n )\n\n return json.dumps(asdict(summary))\n\n async def _run_import_local_directory_detached(\n self,\n __user__: dict,\n __request__: Request,\n ) -> None:\n \"\"\"Execute import in a background task and log outcome.\"\"\"\n try:\n result = await self._run_import_local_directory(__user__, __request__)\n log.info('local_import detached_completed summary=%s', result)\n except Exception:\n log.exception('local_import detached_failed')\n\n async def import_local_directory(\n self,\n __user__: dict = {},\n __request__: Request = None,\n ) -> str:\n \"\"\"\n Import all files from the configured drop folder into knowledge bases.\n\n The drop folder path is set by the admin in the Valves configuration\n (drop_folder). Each immediate subfolder is mapped to a knowledge\n base with the same name (created automatically if it does not exist).\n All files within each subfolder (recursively) are copied to the upload\n directory, registered in the database, linked to the KB, and vectorized.\n\n :return: JSON string containing an ImportSummary with per-KB breakdowns.\n \"\"\"\n if getattr(self.valves, 'detached_import', False) is True:\n # Copy user payload to avoid accidental mutation after dispatch.\n user_copy = dict(__user__) if isinstance(__user__, dict) else __user__\n asyncio.create_task(\n self._run_import_local_directory_detached(user_copy, __request__)\n )\n return json.dumps({'status': 'dispatched'})\n\n return await self._run_import_local_directory(__user__, __request__)\n"
}
]