|
| 1 | +import base64 |
| 2 | +import datetime |
| 3 | +import json |
| 4 | +import mimetypes |
| 5 | +import os |
| 6 | +import re |
| 7 | +from pathlib import Path |
| 8 | +from typing import Any |
| 9 | + |
| 10 | +try: |
| 11 | + from openai import OpenAI |
| 12 | +except ImportError: # pragma: no cover - handled at runtime |
| 13 | + OpenAI = None |
| 14 | + |
| 15 | + |
| 16 | +class ImageMetadataStore: |
| 17 | + def __init__(self, store_path: Path): |
| 18 | + self.store_path = store_path |
| 19 | + self.store_path.parent.mkdir(parents=True, exist_ok=True) |
| 20 | + |
| 21 | + def _load(self) -> dict[str, Any]: |
| 22 | + if not self.store_path.exists(): |
| 23 | + return {} |
| 24 | + try: |
| 25 | + with self.store_path.open('r', encoding='utf-8') as f: |
| 26 | + data = json.load(f) |
| 27 | + return data if isinstance(data, dict) else {} |
| 28 | + except Exception: |
| 29 | + return {} |
| 30 | + |
| 31 | + def get(self, key: str) -> dict[str, Any] | None: |
| 32 | + return self._load().get(key) |
| 33 | + |
| 34 | + def set(self, key: str, value: dict[str, Any], overwrite: bool = True) -> tuple[dict[str, Any], bool]: |
| 35 | + data = self._load() |
| 36 | + if not overwrite and key in data: |
| 37 | + return data[key], False |
| 38 | + data[key] = value |
| 39 | + self._write(data) |
| 40 | + return value, True |
| 41 | + |
| 42 | + def _write(self, data: dict[str, Any]) -> None: |
| 43 | + tmp_path = self.store_path.with_name(self.store_path.name + ".tmp") |
| 44 | + with tmp_path.open('w', encoding='utf-8') as f: |
| 45 | + json.dump(data, f, ensure_ascii=False, indent=2) |
| 46 | + os.replace(tmp_path, self.store_path) |
| 47 | + |
| 48 | + |
| 49 | +class CaptionService: |
| 50 | + def __init__( |
| 51 | + self, |
| 52 | + model: str | None = None, |
| 53 | + base_url: str | None = None, |
| 54 | + api_key: str | None = None, |
| 55 | + api_mode: str | None = None, |
| 56 | + ): |
| 57 | + self.model = model or os.environ.get('TIKLOCAL_LLM_MODEL') |
| 58 | + self.base_url = base_url or os.environ.get('TIKLOCAL_LLM_BASE_URL') |
| 59 | + self.api_key = api_key or os.environ.get('OPENAI_API_KEY') |
| 60 | + self.api_mode = (api_mode or os.environ.get('TIKLOCAL_LLM_API') or 'auto').lower() |
| 61 | + self._client = None |
| 62 | + |
| 63 | + def _get_client(self): |
| 64 | + if OpenAI is None: |
| 65 | + raise RuntimeError("OpenAI 客户端未安装,请先安装 openai 依赖。") |
| 66 | + if not self.api_key: |
| 67 | + raise RuntimeError("未配置 OPENAI_API_KEY。") |
| 68 | + if not self.model: |
| 69 | + raise RuntimeError("未配置 TIKLOCAL_LLM_MODEL。") |
| 70 | + if self.base_url and "openrouter.ai" in self.base_url and "/api/v1" not in self.base_url: |
| 71 | + raise RuntimeError("OpenRouter base_url 需要包含 /api/v1,例如 https://openrouter.ai/api/v1") |
| 72 | + if self._client is None: |
| 73 | + kwargs = {"api_key": self.api_key} |
| 74 | + if self.base_url: |
| 75 | + kwargs["base_url"] = self.base_url |
| 76 | + self._client = OpenAI(**kwargs) |
| 77 | + return self._client |
| 78 | + |
| 79 | + def generate(self, image_path: Path, tags_limit: int = 5) -> dict[str, Any]: |
| 80 | + data_url = self._to_data_url(image_path) |
| 81 | + client = self._get_client() |
| 82 | + |
| 83 | + system_prompt = ( |
| 84 | + "你是我的私人媒体库助手。" |
| 85 | + "请仅基于图片可见信息,不要臆测地点、人物或事件。" |
| 86 | + "输出必须是严格 JSON。" |
| 87 | + ) |
| 88 | + user_prompt = ( |
| 89 | + "这是一张我从社交媒体保存的图片。" |
| 90 | + "请用中文、第一人称、带情绪的一句话给出图片标题," |
| 91 | + f"并给出 1 到 {tags_limit} 个标签。" |
| 92 | + "标签用简短词语,不要带 #。" |
| 93 | + "输出格式:{\"title\": \"...\", \"tags\": [\"...\", \"...\"]}。" |
| 94 | + ) |
| 95 | + |
| 96 | + api_mode = self._resolve_api_mode() |
| 97 | + text = "" |
| 98 | + if api_mode == "chat": |
| 99 | + response = client.chat.completions.create( |
| 100 | + model=self.model, |
| 101 | + messages=[ |
| 102 | + {"role": "system", "content": system_prompt}, |
| 103 | + { |
| 104 | + "role": "user", |
| 105 | + "content": [ |
| 106 | + {"type": "text", "text": user_prompt}, |
| 107 | + {"type": "image_url", "image_url": {"url": data_url}}, |
| 108 | + ], |
| 109 | + }, |
| 110 | + ], |
| 111 | + temperature=0.6, |
| 112 | + ) |
| 113 | + text = self._extract_text(response) |
| 114 | + else: |
| 115 | + try: |
| 116 | + response = client.responses.create( |
| 117 | + model=self.model, |
| 118 | + instructions=system_prompt, |
| 119 | + input=[ |
| 120 | + { |
| 121 | + "role": "user", |
| 122 | + "content": [ |
| 123 | + {"type": "input_text", "text": user_prompt}, |
| 124 | + {"type": "input_image", "image_url": data_url}, |
| 125 | + ], |
| 126 | + } |
| 127 | + ], |
| 128 | + temperature=0.6, |
| 129 | + ) |
| 130 | + text = self._extract_text(response) |
| 131 | + except Exception: |
| 132 | + # Fallback for OpenAI-compatible providers without Responses API |
| 133 | + response = client.chat.completions.create( |
| 134 | + model=self.model, |
| 135 | + messages=[ |
| 136 | + {"role": "system", "content": system_prompt}, |
| 137 | + { |
| 138 | + "role": "user", |
| 139 | + "content": [ |
| 140 | + {"type": "text", "text": user_prompt}, |
| 141 | + {"type": "image_url", "image_url": {"url": data_url}}, |
| 142 | + ], |
| 143 | + }, |
| 144 | + ], |
| 145 | + temperature=0.6, |
| 146 | + ) |
| 147 | + text = self._extract_text(response) |
| 148 | + if self._looks_like_html(text): |
| 149 | + raise RuntimeError("模型返回了 HTML 页面,请检查 base_url 或 model 是否正确。") |
| 150 | + |
| 151 | + parsed = self._parse_output(text, tags_limit) |
| 152 | + |
| 153 | + return { |
| 154 | + "title": parsed.get("title", ""), |
| 155 | + "tags": parsed.get("tags", []), |
| 156 | + "style": "first_person_emotion_zh", |
| 157 | + "model": self.model, |
| 158 | + "provider": "openai", |
| 159 | + "base_url": self.base_url or "", |
| 160 | + "created_at": datetime.datetime.utcnow().isoformat() + "Z", |
| 161 | + "prompt_version": 1, |
| 162 | + } |
| 163 | + |
| 164 | + def _to_data_url(self, image_path: Path) -> str: |
| 165 | + mime, _ = mimetypes.guess_type(image_path.name) |
| 166 | + mime = mime or "image/jpeg" |
| 167 | + with image_path.open("rb") as f: |
| 168 | + encoded = base64.b64encode(f.read()).decode("ascii") |
| 169 | + return f"data:{mime};base64,{encoded}" |
| 170 | + |
| 171 | + def _extract_text(self, response: Any) -> str: |
| 172 | + if isinstance(response, str): |
| 173 | + return response |
| 174 | + if hasattr(response, "output_text"): |
| 175 | + return response.output_text or "" |
| 176 | + if hasattr(response, "choices"): |
| 177 | + try: |
| 178 | + message = response.choices[0].message |
| 179 | + return message.content or "" |
| 180 | + except Exception: |
| 181 | + return "" |
| 182 | + if isinstance(response, dict): |
| 183 | + if response.get("output_text"): |
| 184 | + return response.get("output_text") or "" |
| 185 | + if response.get("choices"): |
| 186 | + message = response["choices"][0].get("message", {}) |
| 187 | + return message.get("content") or "" |
| 188 | + return "" |
| 189 | + |
| 190 | + def _resolve_api_mode(self) -> str: |
| 191 | + if self.api_mode in ("chat", "responses"): |
| 192 | + return self.api_mode |
| 193 | + if not self.base_url: |
| 194 | + return "responses" |
| 195 | + base = self.base_url.lower() |
| 196 | + if "openai.com" in base: |
| 197 | + return "responses" |
| 198 | + return "chat" |
| 199 | + |
| 200 | + def _looks_like_html(self, text: str) -> bool: |
| 201 | + if not text: |
| 202 | + return False |
| 203 | + lowered = text.lstrip().lower() |
| 204 | + head = lowered[:400] |
| 205 | + if lowered.startswith("<!doctype") or lowered.startswith("<html"): |
| 206 | + return True |
| 207 | + return "<html" in head or "<head" in head or "<body" in head |
| 208 | + |
| 209 | + def _parse_output(self, text: str, tags_limit: int) -> dict[str, Any]: |
| 210 | + data = None |
| 211 | + try: |
| 212 | + data = json.loads(text) |
| 213 | + except Exception: |
| 214 | + match = re.search(r"\{.*\}", text, re.S) |
| 215 | + if match: |
| 216 | + try: |
| 217 | + data = json.loads(match.group(0)) |
| 218 | + except Exception: |
| 219 | + data = None |
| 220 | + |
| 221 | + title = "" |
| 222 | + tags: list[str] = [] |
| 223 | + |
| 224 | + if isinstance(data, dict): |
| 225 | + title = str(data.get("title") or data.get("caption") or "").strip() |
| 226 | + tags = data.get("tags") or [] |
| 227 | + if not title: |
| 228 | + title = text.strip().splitlines()[0] if text.strip() else "" |
| 229 | + |
| 230 | + if isinstance(tags, str): |
| 231 | + tags = re.split(r"[,,;/\n]+", tags) |
| 232 | + if isinstance(tags, list): |
| 233 | + tags = [str(t).strip() for t in tags if str(t).strip()] |
| 234 | + else: |
| 235 | + tags = [] |
| 236 | + |
| 237 | + # De-dup and clamp |
| 238 | + seen = set() |
| 239 | + cleaned = [] |
| 240 | + for tag in tags: |
| 241 | + if tag in seen: |
| 242 | + continue |
| 243 | + seen.add(tag) |
| 244 | + cleaned.append(tag) |
| 245 | + if len(cleaned) >= tags_limit: |
| 246 | + break |
| 247 | + |
| 248 | + return {"title": title, "tags": cleaned} |
0 commit comments