44核心工作流:PDF → LLM 概念提取 → SQLite/Neo4j 图谱 → Obsidian 导出
55"""
66
7- import typer
8- import os
97from pathlib import Path
8+
9+ import typer
1010from rich .console import Console
1111from rich .table import Table
12- from rich .panel import Panel
13- from rich .tree import Tree
1412
1513from mkg .database import Database
16- from mkg .pdf_parser import PDFParser , LLMConceptExtractor
17- from mkg .llm import init_llm_from_db , generate
1814from mkg .graph import KnowledgeGraph
15+ from mkg .llm import init_llm_from_db
1916from mkg .neo4j_graph import Neo4jGraph
2017from mkg .obsidian_exporter import ObsidianExporter
18+ from mkg .pdf_parser import LLMConceptExtractor , PDFParser
2119
2220app = typer .Typer (help = "OpenClaw - 学术知识图谱引擎" )
2321console = Console ()
@@ -65,18 +63,19 @@ def get_extractor() -> LLMConceptExtractor:
6563
6664# ========== 核心命令 ==========
6765
66+
6867@app .command ()
6968def init ():
7069 """初始化数据库"""
7170 db = get_db ()
72- console .print (f "[green]✓ 数据库初始化完成[/green]" )
71+ console .print ("[green]✓ 数据库初始化完成[/green]" )
7372 console .print (f" 路径: { db .db_path .absolute ()} " )
7473
7574
7675@app .command ()
7776def process (
7877 pdf_path : str = typer .Argument (..., help = "PDF 文件路径" ),
79- verbose : bool = typer .Option (False , "--verbose" , "-v" , help = "显示详细信息" )
78+ verbose : bool = typer .Option (False , "--verbose" , "-v" , help = "显示详细信息" ),
8079):
8180 """
8281 处理 PDF 论文:解析 + LLM 概念提取 + 构建图谱
@@ -126,11 +125,11 @@ def process(
126125 # 3. 存储到数据库
127126 console .print (" [dim]→ 存储到数据库...[/dim]" )
128127 paper_data = {
129- ' doi' : pdf_file .stem , # 用文件名作为 ID
130- ' title' : extracted .title or content .title ,
131- ' abstract' : extracted .abstract or content .abstract ,
132- ' authors' : extracted .authors or content .authors ,
133- ' pdf_path' : str (pdf_file ),
128+ " doi" : pdf_file .stem , # 用文件名作为 ID
129+ " title" : extracted .title or content .title ,
130+ " abstract" : extracted .abstract or content .abstract ,
131+ " authors" : extracted .authors or content .authors ,
132+ " pdf_path" : str (pdf_file ),
134133 }
135134 doi = db .add_paper (paper_data )
136135
@@ -140,7 +139,7 @@ def process(
140139 db .save_concept_extraction (doi , concept_tree , extracted .raw_response )
141140
142141 # 显示结果
143- console .print (f "\n [green]✓ 处理完成[/green]" )
142+ console .print ("\n [green]✓ 处理完成[/green]" )
144143 console .print (f" 根概念: { extracted .concept_tree .concept } " )
145144 console .print (f" 研究问题: { len (extracted .research_questions )} 个" )
146145 console .print (f" 贡献: { len (extracted .contributions )} 个" )
@@ -154,7 +153,7 @@ def process(
154153@app .command ()
155154def batch (
156155 folder : str = typer .Argument (..., help = "PDF 文件夹路径" ),
157- recursive : bool = typer .Option (True , "--recursive/--no-recursive" , help = "递归扫描子目录" )
156+ recursive : bool = typer .Option (True , "--recursive/--no-recursive" , help = "递归扫描子目录" ),
158157):
159158 """
160159 批量处理文件夹中的 PDF
@@ -172,7 +171,7 @@ def batch(
172171 pattern = "**/*.pdf" if recursive else "*.pdf"
173172 pdf_files = list (pdf_dir .glob (pattern ))
174173 if not pdf_files :
175- console .print (f "[yellow]未找到 PDF 文件[/yellow]" )
174+ console .print ("[yellow]未找到 PDF 文件[/yellow]" )
176175 return
177176
178177 console .print (f"\n [bold]发现 { len (pdf_files )} 个 PDF 文件[/bold]\n " )
@@ -192,10 +191,11 @@ def batch(
192191
193192# ========== 图谱浏览 ==========
194193
194+
195195@app .command ()
196196def tree (
197197 root : str = typer .Option (None , "--root" , "-r" , help = "根概念名称" ),
198- view : str = typer .Option ("knowledge" , "--view" , "-v" , help = "视角: knowledge/paper" )
198+ view : str = typer .Option ("knowledge" , "--view" , "-v" , help = "视角: knowledge/paper" ),
199199):
200200 """查看知识图谱树"""
201201 graph = get_graph ()
@@ -205,9 +205,7 @@ def tree(
205205
206206
207207@app .command ()
208- def ls (
209- concept : str = typer .Argument (None , help = "父概念名称" )
210- ):
208+ def ls (concept : str = typer .Argument (None , help = "父概念名称" )):
211209 """列出概念(类似 ls 命令)"""
212210 graph = get_graph ()
213211
@@ -227,7 +225,7 @@ def ls(
227225 table .add_column ("论文数" )
228226
229227 for c in concepts :
230- table .add_row (c [' text' ], c .get (' category' , '-' ), str (c [' paper_count' ]))
228+ table .add_row (c [" text" ], c .get (" category" , "-" ), str (c [" paper_count" ]))
231229
232230 console .print (table )
233231
@@ -245,21 +243,21 @@ def cd(concept: str = typer.Argument(..., help="概念名称")):
245243 console .print (f"\n [bold]📍 { result ['concept' ]['text' ]} [/bold]\n " )
246244
247245 # 父概念
248- if result [' parents' ]:
246+ if result [" parents" ]:
249247 console .print ("[bold]父概念:[/bold]" )
250- for p in result [' parents' ]:
248+ for p in result [" parents" ]:
251249 console .print (f" ← { p ['text' ]} " )
252250
253251 # 子概念
254- if result [' children' ]:
252+ if result [" children" ]:
255253 console .print (f"\n [bold]子概念 ({ len (result ['children' ])} 个):[/bold]" )
256- for c in result [' children' ][:10 ]:
254+ for c in result [" children" ][:10 ]:
257255 console .print (f" → { c ['text' ]} ({ c ['paper_count' ]} 篇)" )
258256
259257 # 论文
260- if result [' papers' ]:
258+ if result [" papers" ]:
261259 console .print (f"\n [bold]论文 ({ len (result ['papers' ])} 篇):[/bold]" )
262- for p in result [' papers' ][:5 ]:
260+ for p in result [" papers" ][:5 ]:
263261 console .print (f" 📄 { p ['title' ][:60 ]} ..." )
264262
265263
@@ -281,7 +279,7 @@ def search(query: str = typer.Argument(..., help="搜索关键词")):
281279 table .add_column ("论文数" )
282280
283281 for c in matched :
284- table .add_row (c [' text' ], c .get (' category' , '-' ), str (c [' paper_count' ]))
282+ table .add_row (c [" text" ], c .get (" category" , "-" ), str (c [" paper_count" ]))
285283
286284 console .print (table )
287285
@@ -294,10 +292,10 @@ def stats():
294292
295293 console .print ("\n [bold]图谱统计[/bold]\n " )
296294
297- papers = stats .get (' papers' , {})
295+ papers = stats .get (" papers" , {})
298296 console .print (f" 论文总数: { papers .get ('total' , 0 )} " )
299297 for status , count in papers .items ():
300- if status != ' total' :
298+ if status != " total" :
301299 console .print (f" - { status } : { count } " )
302300
303301 console .print (f" 概念总数: { stats .get ('concepts' , {}).get ('total' , 0 )} " )
@@ -307,10 +305,11 @@ def stats():
307305
308306# ========== 导出 ==========
309307
308+
310309@app .command ()
311310def export (
312311 vault : str = typer .Argument ("obsidian_vault" , help = "Obsidian Vault 路径" ),
313- neo4j : bool = typer .Option (False , "--neo4j" , help = "从 Neo4j 导出" )
312+ neo4j : bool = typer .Option (False , "--neo4j" , help = "从 Neo4j 导出" ),
314313):
315314 """导出到 Obsidian Vault"""
316315 exporter = ObsidianExporter (vault )
@@ -333,56 +332,67 @@ def export(
333332@app .command ()
334333def neo4j_test ():
335334 """测试 Neo4j 连接"""
335+ from mkg .neo4j_store import Neo4jStore
336+
336337 console .print ("\n [bold]测试 Neo4j 连接...[/bold]\n " )
337338
338- neo4j = Neo4jGraph ()
339- if neo4j .connected :
339+ store = Neo4jStore ()
340+ if store .connected :
340341 console .print ("[green]✓ Neo4j 连接成功[/green]" )
341- stats = neo4j .get_stats ()
342- console .print (f" 论文 : { stats [ 'papers' ][ 'total' ] } " )
343- console .print (f" 关键词 : { stats [ 'keywords' ][ 'total' ] } " )
342+ stats = store .get_stats ()
343+ console .print (f" 概念总数 : { stats . get ( 'total_concepts' , 0 ) } " )
344+ console .print (f" 关系总数 : { stats . get ( 'total_relations' , 0 ) } " )
344345 else :
345346 console .print ("[red]✗ Neo4j 连接失败[/red]" )
346347 console .print ("\n 请确保:" )
347348 console .print (" 1. Neo4j 已启动" )
348349 console .print (" 2. .env 配置正确" )
350+ store .close ()
349351
350- neo4j .close ()
352+
353+ @app .command ()
354+ def neo4j_status ():
355+ """查看 Neo4j 连接状态和图谱统计"""
356+ from mkg .neo4j_store import Neo4jStore
357+
358+ console .print ("\n [bold]Neo4j 状态[/bold]\n " )
359+
360+ store = Neo4jStore ()
361+ if store .connected :
362+ console .print ("[green]✓ Neo4j 已连接[/green]" )
363+ stats = store .get_stats ()
364+ console .print (f" 概念总数: { stats .get ('total_concepts' , 0 )} " )
365+ console .print (f" 关系总数: { stats .get ('total_relations' , 0 )} " )
366+ console .print (f" 根概念数: { stats .get ('root_concepts' , 0 )} " )
367+ else :
368+ console .print ("[red]✗ Neo4j 未连接[/red]" )
369+ console .print ("\n 请确保:" )
370+ console .print (" 1. Neo4j 服务已启动" )
371+ console .print (" 2. .env 中 USE_NEO4J=true 且配置正确" )
372+ store .close ()
351373
352374
353375@app .command ()
354- def neo4j_migrate ():
355- """从 SQLite 迁移数据到 Neo4j"""
356- console .print ("\n [bold]从 SQLite 迁移到 Neo4j...[/bold]\n " )
376+ def neo4j_sync ():
377+ """从 SQLite 全量同步到 Neo4j"""
378+ from mkg .neo4j_store import Neo4jStore
379+
380+ console .print ("\n [bold]从 SQLite 同步到 Neo4j...[/bold]\n " )
357381
358- neo4j = Neo4jGraph ()
359- if not neo4j .connected :
382+ store = Neo4jStore ()
383+ if not store .connected :
360384 console .print ("[red]Neo4j 未连接[/red]" )
361385 return
362386
363387 db = get_db ()
364-
365- # 迁移论文
366- papers = db .get_all_papers ()
367- console .print (f"迁移 { len (papers )} 篇论文..." )
368- for paper in papers :
369- neo4j .add_paper (paper )
370-
371- # 迁移概念
372- concepts = db .get_all_concepts ()
373- console .print (f"迁移 { len (concepts )} 个概念..." )
374- for concept in concepts :
375- neo4j .add_keyword (concept )
376-
377- # 迁移关系
378- console .print ("迁移概念关系..." )
379- # TODO: 实现关系迁移
380-
381- console .print ("[green]✓ 迁移完成[/green]" )
382- neo4j .close ()
388+ result = store .sync_all_from_sqlite (db )
389+ console .print ("[green]✓ 同步完成[/green]" )
390+ console .print (f" 概念同步: { result ['concepts_synced' ]} " )
391+ console .print (f" 关系统计: { result ['relations_synced' ]} " )
392+ store .close ()
383393
384394
385395# ========== 入口 ==========
386396
387397if __name__ == "__main__" :
388- app ()
398+ app ()
0 commit comments