Skip to content

Commit b5c40f1

Browse files
ChanMoclaude
andcommitted
Release v0.8.0: 新增AI图片标题生成功能
新功能: - 集成OpenAI API实现智能图片标题和标签生成 - 支持中文第一人称情绪化描述风格 - 图库和图片详情页增加标题生成按钮 - 元数据持久化存储到本地JSON文件 - 支持多种AI服务提供商(OpenAI、OpenRouter等) 技术改进: - 新增 ImageMetadataStore 元数据管理服务 - 新增 CaptionService AI标题生成服务 - 新增 /api/image/metadata API端点 - 优化前端UI交互体验 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 5dd81ad commit b5c40f1

8 files changed

Lines changed: 1233 additions & 4 deletions

File tree

poetry.lock

Lines changed: 592 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "TikLocal"
3-
version = "0.7.1"
3+
version = "0.8.0"
44
description = "A local media server that combines the features of TikTok and Pinterest"
55
authors = ["ChanMo <chan.mo@outlook.com>"]
66
readme = "README.md"
@@ -17,6 +17,7 @@ python = ">=3.10,<4.0"
1717
flask = "^3.1.0"
1818
waitress = "^3.0.2"
1919
pyyaml = "^6.0"
20+
openai = "^2.16.0"
2021

2122
[[tool.poetry.source]]
2223
name = "aliyun"

tiklocal/app.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# Service Imports
1313
from tiklocal.services import LibraryService, FavoriteService, RecommendService
1414
from tiklocal.services.thumbnail import ThumbnailService
15+
from tiklocal.services.metadata import ImageMetadataStore, CaptionService
16+
from tiklocal.paths import get_metadata_path
1517

1618

1719
def get_app_version():
@@ -63,6 +65,8 @@ def create_app(test_config=None):
6365
favorite_service = FavoriteService(media_root_str)
6466
recommend_service = RecommendService(library_service, favorite_service)
6567
thumbnail_service = ThumbnailService(Path(media_root_str))
68+
metadata_store = ImageMetadataStore(get_metadata_path())
69+
caption_service = CaptionService()
6670

6771
# --- Template Filters ---
6872
@app.template_filter('timestamp_to_date')
@@ -290,6 +294,35 @@ def api_random_images():
290294
'seed': seed
291295
}
292296

297+
@app.route('/api/image/metadata', methods=['GET', 'POST'])
298+
def api_image_metadata():
299+
if request.method == 'GET':
300+
uri = request.args.get('uri')
301+
if not uri:
302+
return {'success': False, 'error': 'Missing uri'}, 400
303+
return {'success': True, 'data': metadata_store.get(uri)}
304+
305+
payload = request.get_json(silent=True) or {}
306+
uri = payload.get('uri')
307+
force = bool(payload.get('force'))
308+
if not uri:
309+
return {'success': False, 'error': 'Missing uri'}, 400
310+
311+
existing = metadata_store.get(uri)
312+
if existing and not force:
313+
return {'success': True, 'data': existing, 'skipped': True}
314+
315+
target = library_service.resolve_path(uri)
316+
if not target or not target.exists():
317+
return {'success': False, 'error': 'File not found'}, 404
318+
319+
try:
320+
result = caption_service.generate(target, tags_limit=5)
321+
metadata_store.set(uri, result, overwrite=True)
322+
return {'success': True, 'data': result}
323+
except Exception as e:
324+
return {'success': False, 'error': str(e)}, 500
325+
293326
@app.route('/api/favorite/<path:name>', methods=['GET', 'POST'])
294327
def api_favorite(name):
295328
if request.method == 'GET':
@@ -363,4 +396,4 @@ def api_library_stats():
363396
'cache_mb': round(thumb_size / (1024 * 1024), 2)
364397
}
365398

366-
return app
399+
return app

tiklocal/paths.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ def get_thumbnails_dir() -> Path:
2424
def get_thumbs_map_path() -> Path:
2525
return get_data_dir() / 'thumbs.json'
2626

27+
28+
def get_metadata_path() -> Path:
29+
return get_data_dir() / 'metadata.json'

tiklocal/services/metadata.py

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
import base64
2+
import datetime
3+
import json
4+
import mimetypes
5+
import os
6+
import re
7+
from pathlib import Path
8+
from typing import Any
9+
10+
try:
11+
from openai import OpenAI
12+
except ImportError: # pragma: no cover - handled at runtime
13+
OpenAI = None
14+
15+
16+
class ImageMetadataStore:
17+
def __init__(self, store_path: Path):
18+
self.store_path = store_path
19+
self.store_path.parent.mkdir(parents=True, exist_ok=True)
20+
21+
def _load(self) -> dict[str, Any]:
22+
if not self.store_path.exists():
23+
return {}
24+
try:
25+
with self.store_path.open('r', encoding='utf-8') as f:
26+
data = json.load(f)
27+
return data if isinstance(data, dict) else {}
28+
except Exception:
29+
return {}
30+
31+
def get(self, key: str) -> dict[str, Any] | None:
32+
return self._load().get(key)
33+
34+
def set(self, key: str, value: dict[str, Any], overwrite: bool = True) -> tuple[dict[str, Any], bool]:
35+
data = self._load()
36+
if not overwrite and key in data:
37+
return data[key], False
38+
data[key] = value
39+
self._write(data)
40+
return value, True
41+
42+
def _write(self, data: dict[str, Any]) -> None:
43+
tmp_path = self.store_path.with_name(self.store_path.name + ".tmp")
44+
with tmp_path.open('w', encoding='utf-8') as f:
45+
json.dump(data, f, ensure_ascii=False, indent=2)
46+
os.replace(tmp_path, self.store_path)
47+
48+
49+
class CaptionService:
50+
def __init__(
51+
self,
52+
model: str | None = None,
53+
base_url: str | None = None,
54+
api_key: str | None = None,
55+
api_mode: str | None = None,
56+
):
57+
self.model = model or os.environ.get('TIKLOCAL_LLM_MODEL')
58+
self.base_url = base_url or os.environ.get('TIKLOCAL_LLM_BASE_URL')
59+
self.api_key = api_key or os.environ.get('OPENAI_API_KEY')
60+
self.api_mode = (api_mode or os.environ.get('TIKLOCAL_LLM_API') or 'auto').lower()
61+
self._client = None
62+
63+
def _get_client(self):
64+
if OpenAI is None:
65+
raise RuntimeError("OpenAI 客户端未安装,请先安装 openai 依赖。")
66+
if not self.api_key:
67+
raise RuntimeError("未配置 OPENAI_API_KEY。")
68+
if not self.model:
69+
raise RuntimeError("未配置 TIKLOCAL_LLM_MODEL。")
70+
if self.base_url and "openrouter.ai" in self.base_url and "/api/v1" not in self.base_url:
71+
raise RuntimeError("OpenRouter base_url 需要包含 /api/v1,例如 https://openrouter.ai/api/v1")
72+
if self._client is None:
73+
kwargs = {"api_key": self.api_key}
74+
if self.base_url:
75+
kwargs["base_url"] = self.base_url
76+
self._client = OpenAI(**kwargs)
77+
return self._client
78+
79+
def generate(self, image_path: Path, tags_limit: int = 5) -> dict[str, Any]:
80+
data_url = self._to_data_url(image_path)
81+
client = self._get_client()
82+
83+
system_prompt = (
84+
"你是我的私人媒体库助手。"
85+
"请仅基于图片可见信息,不要臆测地点、人物或事件。"
86+
"输出必须是严格 JSON。"
87+
)
88+
user_prompt = (
89+
"这是一张我从社交媒体保存的图片。"
90+
"请用中文、第一人称、带情绪的一句话给出图片标题,"
91+
f"并给出 1 到 {tags_limit} 个标签。"
92+
"标签用简短词语,不要带 #。"
93+
"输出格式:{\"title\": \"...\", \"tags\": [\"...\", \"...\"]}。"
94+
)
95+
96+
api_mode = self._resolve_api_mode()
97+
text = ""
98+
if api_mode == "chat":
99+
response = client.chat.completions.create(
100+
model=self.model,
101+
messages=[
102+
{"role": "system", "content": system_prompt},
103+
{
104+
"role": "user",
105+
"content": [
106+
{"type": "text", "text": user_prompt},
107+
{"type": "image_url", "image_url": {"url": data_url}},
108+
],
109+
},
110+
],
111+
temperature=0.6,
112+
)
113+
text = self._extract_text(response)
114+
else:
115+
try:
116+
response = client.responses.create(
117+
model=self.model,
118+
instructions=system_prompt,
119+
input=[
120+
{
121+
"role": "user",
122+
"content": [
123+
{"type": "input_text", "text": user_prompt},
124+
{"type": "input_image", "image_url": data_url},
125+
],
126+
}
127+
],
128+
temperature=0.6,
129+
)
130+
text = self._extract_text(response)
131+
except Exception:
132+
# Fallback for OpenAI-compatible providers without Responses API
133+
response = client.chat.completions.create(
134+
model=self.model,
135+
messages=[
136+
{"role": "system", "content": system_prompt},
137+
{
138+
"role": "user",
139+
"content": [
140+
{"type": "text", "text": user_prompt},
141+
{"type": "image_url", "image_url": {"url": data_url}},
142+
],
143+
},
144+
],
145+
temperature=0.6,
146+
)
147+
text = self._extract_text(response)
148+
if self._looks_like_html(text):
149+
raise RuntimeError("模型返回了 HTML 页面,请检查 base_url 或 model 是否正确。")
150+
151+
parsed = self._parse_output(text, tags_limit)
152+
153+
return {
154+
"title": parsed.get("title", ""),
155+
"tags": parsed.get("tags", []),
156+
"style": "first_person_emotion_zh",
157+
"model": self.model,
158+
"provider": "openai",
159+
"base_url": self.base_url or "",
160+
"created_at": datetime.datetime.utcnow().isoformat() + "Z",
161+
"prompt_version": 1,
162+
}
163+
164+
def _to_data_url(self, image_path: Path) -> str:
165+
mime, _ = mimetypes.guess_type(image_path.name)
166+
mime = mime or "image/jpeg"
167+
with image_path.open("rb") as f:
168+
encoded = base64.b64encode(f.read()).decode("ascii")
169+
return f"data:{mime};base64,{encoded}"
170+
171+
def _extract_text(self, response: Any) -> str:
172+
if isinstance(response, str):
173+
return response
174+
if hasattr(response, "output_text"):
175+
return response.output_text or ""
176+
if hasattr(response, "choices"):
177+
try:
178+
message = response.choices[0].message
179+
return message.content or ""
180+
except Exception:
181+
return ""
182+
if isinstance(response, dict):
183+
if response.get("output_text"):
184+
return response.get("output_text") or ""
185+
if response.get("choices"):
186+
message = response["choices"][0].get("message", {})
187+
return message.get("content") or ""
188+
return ""
189+
190+
def _resolve_api_mode(self) -> str:
191+
if self.api_mode in ("chat", "responses"):
192+
return self.api_mode
193+
if not self.base_url:
194+
return "responses"
195+
base = self.base_url.lower()
196+
if "openai.com" in base:
197+
return "responses"
198+
return "chat"
199+
200+
def _looks_like_html(self, text: str) -> bool:
201+
if not text:
202+
return False
203+
lowered = text.lstrip().lower()
204+
head = lowered[:400]
205+
if lowered.startswith("<!doctype") or lowered.startswith("<html"):
206+
return True
207+
return "<html" in head or "<head" in head or "<body" in head
208+
209+
def _parse_output(self, text: str, tags_limit: int) -> dict[str, Any]:
210+
data = None
211+
try:
212+
data = json.loads(text)
213+
except Exception:
214+
match = re.search(r"\{.*\}", text, re.S)
215+
if match:
216+
try:
217+
data = json.loads(match.group(0))
218+
except Exception:
219+
data = None
220+
221+
title = ""
222+
tags: list[str] = []
223+
224+
if isinstance(data, dict):
225+
title = str(data.get("title") or data.get("caption") or "").strip()
226+
tags = data.get("tags") or []
227+
if not title:
228+
title = text.strip().splitlines()[0] if text.strip() else ""
229+
230+
if isinstance(tags, str):
231+
tags = re.split(r"[,,;/\n]+", tags)
232+
if isinstance(tags, list):
233+
tags = [str(t).strip() for t in tags if str(t).strip()]
234+
else:
235+
tags = []
236+
237+
# De-dup and clamp
238+
seen = set()
239+
cleaned = []
240+
for tag in tags:
241+
if tag in seen:
242+
continue
243+
seen.add(tag)
244+
cleaned.append(tag)
245+
if len(cleaned) >= tags_limit:
246+
break
247+
248+
return {"title": title, "tags": cleaned}

0 commit comments

Comments
 (0)