Skip to content

Commit 21cede4

Browse files
authored
feat(solver): add hybrid index description (#599)
* add graph * fix bug for None * add knowledge unit extra * fix_prompt * extract common function into benchmark commponent * format code * format code * format code * fix benchmark knowledge unit * fix node * add common component * Revert "remove local bge model and dependency scikit-learn" This reverts commit ebb068f. * remove unused pkg * add decompose * change hybrid default config * add trace log * fix some bugs * with prompt * support benchmark unit * support kag thinker model * support kag thinker model config * remove default host addr and project id * add kag model infer script * add kag model infer script * modify prompt for kag model * modify hybrid index docs * use ttl cache
1 parent 984c2f4 commit 21cede4

2 files changed

Lines changed: 39 additions & 11 deletions

File tree

kag/common/conf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from pathlib import Path
2121
from typing import Union, Optional, Dict
2222

23+
import knext.common.cache
2324
from knext.project.client import ProjectClient
2425

2526
logger = logging.getLogger()
@@ -206,13 +207,13 @@ def update_conf(self, configs: dict):
206207
"""
207208
KAG_QA_TASK_CONFIG stores per-task configuration and should be cleaned up after use.
208209
"""
209-
KAG_QA_TASK_CONFIG: Dict[str, KAGConfigMgr] = {}
210+
KAG_QA_TASK_CONFIG = knext.common.cache.LinkCache(maxsize=100, ttl=300)
210211
KAG_QA_TASK_CONFIG_LOCK = threading.Lock()
211212

212213

213214
class KAGConfigAccessor:
214215
@staticmethod
215-
def get_config(task_with_kb_id=None):
216+
def get_config(task_with_kb_id=None) -> KAGConfigMgr:
216217
"""
217218
Get the configuration information.
218219

kag/indexer/kag_index_manager.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def description(self) -> str:
9191
@property
9292
def schema(self) -> str:
9393
return """
94-
AtomicQuery(原子问): EntityType
94+
AtomicQuery(原子问): IndexType
9595
properties:
9696
content(内容): Text
9797
index: TextAndVector
@@ -200,7 +200,7 @@ def description(self) -> str:
200200
@property
201201
def schema(self) -> str:
202202
return """
203-
Chunk(文本块): EntityType
203+
Chunk(文本块): IndexType
204204
properties:
205205
content(内容): Text
206206
index: TextAndVector
@@ -288,7 +288,7 @@ def description(self) -> str:
288288
@property
289289
def schema(self) -> str:
290290
return """
291-
Table(表格): EntityType
291+
Table(表格): IndexType
292292
properties:
293293
content(内容): Text
294294
index: TextAndVector
@@ -370,7 +370,7 @@ def description(self) -> str:
370370
@property
371371
def schema(self) -> str:
372372
return """
373-
Summary(文本摘要): EntityType
373+
Summary(文本摘要): IndexType
374374
properties:
375375
content(内容): Text
376376
index: TextAndVector
@@ -449,7 +449,7 @@ def description(self) -> str:
449449
@property
450450
def schema(self) -> str:
451451
return """
452-
Outline(标题大纲): EntityType
452+
Outline(标题大纲): IndexType
453453
properties:
454454
content(内容): Text
455455
index: TextAndVector
@@ -525,15 +525,42 @@ def description(self) -> str:
525525
@property
526526
def schema(self) -> str:
527527
return """
528-
Chunk(文本块): EntityType
528+
Chunk(文本块): IndexType
529529
properties:
530530
content(内容): Text
531-
index: TextAndVector
531+
index: TextAndVector
532+
533+
KnowledgeUnit(知识点): IndexType
534+
properties:
535+
structedContent(结构化文本): Text
536+
index: TextAndVector
537+
ontology(本体): Text
538+
desc(描述): Text
539+
index: TextAndVector
540+
relatedQuery(关联问): AtomicQuery
541+
extendedKnowledge(关联外扩知识点):Text
542+
content(内容): Text
543+
index: TextAndVector
544+
knowledgeType(知识类型): Text
545+
546+
AtomicQuery(原子问): IndexType
547+
properties:
548+
title(标题): Text
549+
index: TextAndVector
550+
relations:
551+
relateTo(关联文本块): Chunk
552+
similar(相似问题): AtomicQuery
553+
relatedTo(相关): KnowledgeUnit
532554
"""
533555

534556
@property
535557
def index_cost(self) -> str:
536558
msg = """
559+
索引构建的成本:
560+
561+
1、抽取模型消耗:7B 4634332 tokens
562+
2、耗时:1425 秒
563+
3、文件字数:10万字
537564
"""
538565
return msg
539566

@@ -552,7 +579,7 @@ def applicable_scenarios(self) -> str:
552579

553580
@property
554581
def retrieval_method(self) -> str:
555-
return "通过构建chunk 与 图谱的关联,实现chunk 的检索,一般用于检索与图谱相关的chunk"
582+
return "通过构建chunk 与 图谱的关联,实现图谱、chunk 的检索,一般用于检索与图谱相关的chunk"
556583

557584
@classmethod
558585
def build_extractor_config(
@@ -616,7 +643,7 @@ def build_retriever_config(
616643
"kag_qa_task_config_key": kb_task_project_id,
617644
},
618645
{
619-
"type": "kg_fr_open_spg",
646+
"type": "kg_fr_knowledge_unit",
620647
"top_k": 20,
621648
"path_select": {
622649
"type": "fuzzy_one_hop_select",

0 commit comments

Comments
 (0)