diff --git a/kag/common/conf.py b/kag/common/conf.py index e417105c3..72b96d688 100644 --- a/kag/common/conf.py +++ b/kag/common/conf.py @@ -261,8 +261,12 @@ def init_env(config_file: str = None): else: msg = "Done init config from local file" logger.debug(msg) - os.environ[KAGConstants.ENV_KAG_PROJECT_ID] = str(KAG_PROJECT_CONF.project_id) - os.environ[KAGConstants.ENV_KAG_PROJECT_HOST_ADDR] = str(KAG_PROJECT_CONF.host_addr) + if KAG_PROJECT_CONF.project_id: + os.environ[KAGConstants.ENV_KAG_PROJECT_ID] = str(KAG_PROJECT_CONF.project_id) + if KAG_PROJECT_CONF.host_addr: + os.environ[KAGConstants.ENV_KAG_PROJECT_HOST_ADDR] = str( + KAG_PROJECT_CONF.host_addr + ) if len(KAG_CONFIG.all_config) > 0: dump_flag = os.getenv(KAGConstants.ENV_KAG_DEBUG_DUMP_CONFIG) if dump_flag is not None and dump_flag.strip() == "1": diff --git a/kag/examples/FinAlibaba/kag_config.yaml b/kag/examples/FinAlibaba/kag_config.yaml index 07019f01c..2946d78bd 100644 --- a/kag/examples/FinAlibaba/kag_config.yaml +++ b/kag/examples/FinAlibaba/kag_config.yaml @@ -179,7 +179,7 @@ kag_solver_pipeline: executors: - *retriever generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: default_multi_hop_generator diff --git a/kag/examples/baike/kag_config.yaml b/kag/examples/baike/kag_config.yaml index 10887f7ef..0f8e0a6e6 100644 --- a/kag/examples/baike/kag_config.yaml +++ b/kag/examples/baike/kag_config.yaml @@ -74,8 +74,10 @@ graph_api: &graph_api type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi + kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -87,9 +89,14 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr - type: kg_fr_open_spg + type: kg_fr_knowledge_unit top_k: 20 graph_api: *graph_api search_api: *search_api @@ -111,12 +118,18 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api @@ -135,12 +148,16 @@ kag_hybrid_executor: &kag_hybrid_executor_conf kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm + kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor - llm: *openie_llm + llm: *chat_llm + kag_solver_pipeline: type: kag_static_pipeline @@ -157,7 +174,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: default_refer_generator_prompt diff --git a/kag/examples/baike/schema/BaiKe.schema b/kag/examples/baike/schema/BaiKe.schema index 756f44cf3..fc0e0c6cb 100644 --- a/kag/examples/baike/schema/BaiKe.schema +++ b/kag/examples/baike/schema/BaiKe.schema @@ -1,110 +1,122 @@ namespace BaiKe +SemanticConcept(语义概念): EntityType + properties: + desc(内容): Text + index: Text + isA(上位): SemanticConcept + ArtificialObject(人造物体): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): ArtificialObject + officialName(标准名): ArtificialObject Astronomy(天文学): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Astronomy + officialName(标准名): Astronomy Building(建筑): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Building + officialName(标准名): Building Creature(生物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Creature + officialName(标准名): Creature Concept(概念): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Concept + officialName(标准名): Concept Date(日期): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Date + officialName(标准名): Date GeographicLocation(地理位置): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): GeographicLocation + officialName(标准名): GeographicLocation Keyword(关键词): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Keyword + officialName(标准名): Keyword Medicine(药物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text - + semanticType(语义类型): SemanticConcept + synonyms(同义词): Medicine + officialName(标准名): Medicine NaturalScience(自然科学): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): NaturalScience + officialName(标准名): NaturalScience Organization(组织机构): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Organization + officialName(标准名): Organization Person(人物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Person + officialName(标准名): Person Transport(运输): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Transport + officialName(标准名): Transport Works(作品): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text - -Others(其他): EntityType - properties: - desc(描述): Text - index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Works + officialName(标准名): Works Event(事件): EventType properties: @@ -118,8 +130,10 @@ Event(事件): EventType type(事件类型): Text index: Text - -SemanticConcept(语义概念): EntityType +Others(其他): EntityType properties: - desc(内容): Text - index: Text \ No newline at end of file + desc(描述): Text + index: TextAndVector + semanticType(语义类型): SemanticConcept + synonyms(同义词): Others + officialName(标准名): Others \ No newline at end of file diff --git a/kag/examples/csqa/kag_config.yaml b/kag/examples/csqa/kag_config.yaml index 8c3c69654..de482b81e 100644 --- a/kag/examples/csqa/kag_config.yaml +++ b/kag/examples/csqa/kag_config.yaml @@ -79,6 +79,7 @@ chain_vectorizer: kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -90,6 +91,11 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr type: kg_fr_open_spg @@ -114,28 +120,24 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -144,19 +146,21 @@ kag_hybrid_executor: &kag_hybrid_executor_conf - *rc merger: type: kag_merger - enable_summary: true + enable_summary: true kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor llm: *chat_llm + kag_solver_pipeline: type: kag_static_pipeline planner: @@ -172,7 +176,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: default_refer_generator_prompt diff --git a/kag/examples/domain_kg/kag_config.yaml b/kag/examples/domain_kg/kag_config.yaml index f98f55215..fabb96dc3 100644 --- a/kag/examples/domain_kg/kag_config.yaml +++ b/kag/examples/domain_kg/kag_config.yaml @@ -100,6 +100,7 @@ chain_vectorizer: kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -111,6 +112,11 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr type: kg_fr_open_spg @@ -135,28 +141,24 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -165,14 +167,15 @@ kag_hybrid_executor: &kag_hybrid_executor_conf - *rc merger: type: kag_merger - enable_summary: true + enable_summary: true kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor @@ -193,7 +196,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: default_refer_generator_prompt diff --git a/kag/examples/example_config.yaml b/kag/examples/example_config.yaml index 5d0469722..80527c48e 100644 --- a/kag/examples/example_config.yaml +++ b/kag/examples/example_config.yaml @@ -30,7 +30,7 @@ project: host_addr: http://127.0.0.1:8887 id: "1" language: en - namespace: HotpotQA + namespace: HotpotQATest #------------project configuration end----------------# #------------kag-builder configuration start----------------# @@ -38,14 +38,14 @@ kag_builder_pipeline: chain: type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain extractor: - type: schema_free_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor + type: knowledge_unit_extractor llm: *openie_llm ner_prompt: - type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt - std_prompt: - type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt + type: knowledge_unit_ner triple_prompt: - type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt + type: knowledge_unit_triple + kn_prompt: + type: knowledge_unit reader: type: dict_reader # kag.builder.component.reader.dict_reader.DictReader post_processor: @@ -75,6 +75,7 @@ graph_api: &graph_api kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -85,14 +86,19 @@ kg_cs: &kg_cs search_api: *search_api recognition_threshold: 0.9 exclude_types: - - "Chunk" + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr - type: kg_fr_open_spg + type: kg_fr_knowledge_unit top_k: 20 graph_api: *graph_api search_api: *search_api - vectorize_model: *vectorize_model + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select llm_client: *openie_llm @@ -109,29 +115,25 @@ kg_fr: &kg_fr search_api: *search_api recognition_threshold: 0.8 exclude_types: - - "Chunk" + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -142,13 +144,13 @@ kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_merger enable_summary: true - kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor diff --git a/kag/examples/medicine/kag_config.yaml b/kag/examples/medicine/kag_config.yaml index 622e35908..915e6f738 100644 --- a/kag/examples/medicine/kag_config.yaml +++ b/kag/examples/medicine/kag_config.yaml @@ -105,6 +105,7 @@ chain_vectorizer: kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -116,6 +117,11 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr type: kg_fr_open_spg @@ -140,28 +146,24 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -170,20 +172,19 @@ kag_hybrid_executor: &kag_hybrid_executor_conf - *rc merger: type: kag_merger - enable_summary: true - + enable_summary: true kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor llm: *chat_llm - kag_solver_pipeline: type: kag_static_pipeline planner: @@ -199,7 +200,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: default_refer_generator_prompt diff --git a/kag/examples/riskmining/kag_config.yaml b/kag/examples/riskmining/kag_config.yaml index c343ed558..1acb0867e 100644 --- a/kag/examples/riskmining/kag_config.yaml +++ b/kag/examples/riskmining/kag_config.yaml @@ -48,6 +48,7 @@ chain_vectorizer: kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -59,6 +60,11 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr type: kg_fr_open_spg @@ -83,28 +89,24 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -113,15 +115,15 @@ kag_hybrid_executor: &kag_hybrid_executor_conf - *rc merger: type: kag_merger - enable_summary: true - + enable_summary: true kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor @@ -142,7 +144,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: resp_riskmining diff --git a/kag/examples/supplychain/kag_config.yaml b/kag/examples/supplychain/kag_config.yaml index ddef0b1c9..a7a00df32 100644 --- a/kag/examples/supplychain/kag_config.yaml +++ b/kag/examples/supplychain/kag_config.yaml @@ -47,6 +47,7 @@ chain_vectorizer: kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -58,6 +59,11 @@ kg_cs: &kg_cs recognition_threshold: 0.9 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc kg_fr: &kg_fr type: kg_fr_open_spg @@ -82,28 +88,24 @@ kg_fr: &kg_fr recognition_threshold: 0.8 exclude_types: - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *chat_llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -112,15 +114,15 @@ kag_hybrid_executor: &kag_hybrid_executor_conf - *rc merger: type: kag_merger - enable_summary: true - + enable_summary: true kag_output_executor: &kag_output_executor_conf type: kag_output_executor + llm_module: *chat_llm kag_deduce_executor: &kag_deduce_executor_conf type: kag_deduce_executor - + llm_module: *chat_llm py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor @@ -141,7 +143,7 @@ kag_solver_pipeline: - *kag_deduce_executor_conf - *kag_output_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *chat_llm generated_prompt: type: resp_supplychain diff --git a/kag/open_benchmark/2wiki/kag_config_graph.yaml b/kag/open_benchmark/2wiki/kag_config_graph.yaml index 78d81dbde..48838bec8 100644 --- a/kag/open_benchmark/2wiki/kag_config_graph.yaml +++ b/kag/open_benchmark/2wiki/kag_config_graph.yaml @@ -4,6 +4,11 @@ llm: &llm api_key: !ENV LLM_API_KEY model: qwen2.5-72b-instruct +ner_llm: &ner_llm + type: maas + base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/ + api_key: !ENV LLM_API_KEY + model: qwen2.5-7b-instruct vectorize_model: &vectorize_model api_key: !ENV VECTORIZE_MODEL_API_KEY @@ -71,8 +76,10 @@ graph_api: &graph_api type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi -kg_cs: + +kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -83,18 +90,26 @@ kg_cs: search_api: *search_api recognition_threshold: 0.9 exclude_types: - - "Chunk" - -kg_fr: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +kg_fr: &kg_fr type: kg_fr_open_spg top_k: 20 + graph_api: *graph_api + search_api: *search_api + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select - llm_client: *llm + llm_client: *ner_llm graph_api: *graph_api search_api: *search_api ppr_chunk_retriever_tool: - type: ppr_chunk_retriever_legacy + type: ppr_chunk_retriever llm_client: *llm graph_api: *graph_api search_api: *search_api @@ -104,40 +119,34 @@ kg_fr: search_api: *search_api recognition_threshold: 0.8 exclude_types: - - "Chunk" - -rc: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf - type: kag_hybrid_executor - lf_rewriter: - type: kag_spo_lf - llm_client: *llm - lf_trans_prompt: - type: default_spo_retriever_decompose - vectorize_model: *vectorize_model - flow: | - kg_cs->kg_fr->kag_merger;rc->kag_merger - + type: kag_hybrid_retrieval_executor + retrievers: + - *kg_cs + - *kg_fr + - *rc + merger: + type: kag_merger + enable_summary: true kag_solver_pipeline: type: kag_static_pipeline @@ -151,8 +160,10 @@ kag_solver_pipeline: executors: - *kag_hybrid_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *llm generated_prompt: type: default_multi_hop_generator + chunk_reranker: + type: rerank_by_vector #------------kag-solver configuration end----------------# diff --git a/kag/open_benchmark/2wiki/kag_config_knowledge_unit.yaml b/kag/open_benchmark/2wiki/kag_config_knowledge_unit.yaml index 1e282ea49..335e69523 100644 --- a/kag/open_benchmark/2wiki/kag_config_knowledge_unit.yaml +++ b/kag/open_benchmark/2wiki/kag_config_knowledge_unit.yaml @@ -106,7 +106,7 @@ kg_fr: &kg_fr top_k: 20 graph_api: *graph_api search_api: *search_api - vectorize_model: *vectorize_model + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select llm_client: *ner_llm @@ -133,9 +133,11 @@ kg_fr: &kg_fr rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api + score_threshold: 0.65 graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model @@ -149,16 +151,6 @@ r3: &r3 score_threshold: 0.85 top_k: 10 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -182,7 +174,6 @@ py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor llm: *llm - kag_solver_pipeline: type: kag_static_pipeline max_iteration: 3 diff --git a/kag/open_benchmark/2wiki/src/eval.py b/kag/open_benchmark/2wiki/src/eval.py index 5c7cc62eb..9570e06da 100644 --- a/kag/open_benchmark/2wiki/src/eval.py +++ b/kag/open_benchmark/2wiki/src/eval.py @@ -11,7 +11,6 @@ from kag.examples.utils import delay_run from kag.open_benchmark.utils.eval_qa import EvalQa, running_paras, do_main from kag.common.conf import KAG_CONFIG -from kag.common.registry import import_modules_from_path from kag.interface import SolverPipelineABC from kag.solver.reporter.trace_log_reporter import TraceLogReporter @@ -93,10 +92,8 @@ def eval(qa_file_path, thread_num=10, upper_limit=1000, collect_file="benchmark. if __name__ == "__main__": # benchmark common component - common_component = os.path.join( - os.path.abspath(os.path.dirname(__file__)), "../../common_component" - ) - import_modules_from_path(common_component) + import kag.open_benchmark.common_component # noqa: F401 + delay_run(hours=0) # 解析命令行参数 parser = running_paras() diff --git a/kag/open_benchmark/hotpotqa/kag_config_graph.yaml b/kag/open_benchmark/hotpotqa/kag_config_graph.yaml index 6e4bafe25..bc727a0a4 100644 --- a/kag/open_benchmark/hotpotqa/kag_config_graph.yaml +++ b/kag/open_benchmark/hotpotqa/kag_config_graph.yaml @@ -4,6 +4,11 @@ llm: &llm api_key: !ENV LLM_API_KEY model: qwen2.5-72b-instruct +ner_llm: &ner_llm + type: maas + base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/ + api_key: !ENV LLM_API_KEY + model: qwen2.5-7b-instruct vectorize_model: &vectorize_model api_key: !ENV VECTORIZE_MODEL_API_KEY @@ -70,9 +75,9 @@ search_api: &search_api graph_api: &graph_api type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi - -kg_cs: +kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -83,18 +88,26 @@ kg_cs: search_api: *search_api recognition_threshold: 0.9 exclude_types: - - "Chunk" - -kg_fr: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +kg_fr: &kg_fr type: kg_fr_open_spg top_k: 20 + graph_api: *graph_api + search_api: *search_api + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select - llm_client: *llm + llm_client: *ner_llm graph_api: *graph_api search_api: *search_api ppr_chunk_retriever_tool: - type: ppr_chunk_retriever_legacy + type: ppr_chunk_retriever llm_client: *llm graph_api: *graph_api search_api: *search_api @@ -104,40 +117,34 @@ kg_fr: search_api: *search_api recognition_threshold: 0.8 exclude_types: - - "Chunk" - -rc: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf - type: kag_hybrid_executor - lf_rewriter: - type: kag_spo_lf - llm_client: *llm - lf_trans_prompt: - type: default_spo_retriever_decompose - vectorize_model: *vectorize_model - flow: | - kg_cs->kg_fr->kag_merger;rc->kag_merger - + type: kag_hybrid_retrieval_executor + retrievers: + - *kg_cs + - *kg_fr + - *rc + merger: + type: kag_merger + enable_summary: true kag_solver_pipeline: type: kag_static_pipeline @@ -151,8 +158,10 @@ kag_solver_pipeline: executors: - *kag_hybrid_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *llm generated_prompt: type: default_multi_hop_generator + chunk_reranker: + type: rerank_by_vector #------------kag-solver configuration end----------------# diff --git a/kag/open_benchmark/hotpotqa/kag_config_knowledge_unit.yaml b/kag/open_benchmark/hotpotqa/kag_config_knowledge_unit.yaml index 7426195a5..3a5fe057a 100644 --- a/kag/open_benchmark/hotpotqa/kag_config_knowledge_unit.yaml +++ b/kag/open_benchmark/hotpotqa/kag_config_knowledge_unit.yaml @@ -106,7 +106,7 @@ kg_fr: &kg_fr top_k: 20 graph_api: *graph_api search_api: *search_api - vectorize_model: *vectorize_model + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select llm_client: *ner_llm @@ -133,9 +133,11 @@ kg_fr: &kg_fr rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api + score_threshold: 0.65 graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model @@ -149,16 +151,6 @@ r3: &r3 score_threshold: 0.85 top_k: 10 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -182,7 +174,6 @@ py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor llm: *llm - kag_solver_pipeline: type: kag_static_pipeline max_iteration: 3 diff --git a/kag/open_benchmark/hotpotqa/src/eval.py b/kag/open_benchmark/hotpotqa/src/eval.py index 8e572cdcd..7a71f9be9 100644 --- a/kag/open_benchmark/hotpotqa/src/eval.py +++ b/kag/open_benchmark/hotpotqa/src/eval.py @@ -6,7 +6,7 @@ from kag.common.utils import processing_phrases from kag.interface import LLMClient -from kag.common.registry import Functor, import_modules_from_path +from kag.common.registry import Functor from kag.common.benchmarks.evaluate import Evaluate from kag.examples.utils import delay_run from kag.open_benchmark.utils.eval_qa import EvalQa, running_paras, do_main @@ -78,10 +78,8 @@ def eval(qa_file_path, thread_num=10, upper_limit=1000, collect_file="benchmark. if __name__ == "__main__": # benchmark common component - common_component = os.path.join( - os.path.abspath(os.path.dirname(__file__)), "../../common_component" - ) - import_modules_from_path(common_component) + import kag.open_benchmark.common_component # noqa: F401 + delay_run(hours=0) # 解析命令行参数 parser = running_paras() diff --git a/kag/open_benchmark/musique/kag_config_graph.yaml b/kag/open_benchmark/musique/kag_config_graph.yaml index 6c8ecba1c..39430bac4 100644 --- a/kag/open_benchmark/musique/kag_config_graph.yaml +++ b/kag/open_benchmark/musique/kag_config_graph.yaml @@ -4,6 +4,11 @@ llm: &llm api_key: !ENV LLM_API_KEY model: qwen2.5-72b-instruct +ner_llm: &ner_llm + type: maas + base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/ + api_key: !ENV LLM_API_KEY + model: qwen2.5-7b-instruct vectorize_model: &vectorize_model api_key: !ENV VECTORIZE_MODEL_API_KEY @@ -70,9 +75,9 @@ search_api: &search_api graph_api: &graph_api type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi - -kg_cs: +kg_cs: &kg_cs type: kg_cs_open_spg + priority: 0 path_select: type: exact_one_hop_select graph_api: *graph_api @@ -83,18 +88,26 @@ kg_cs: search_api: *search_api recognition_threshold: 0.9 exclude_types: - - "Chunk" - -kg_fr: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +kg_fr: &kg_fr type: kg_fr_open_spg top_k: 20 + graph_api: *graph_api + search_api: *search_api + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select - llm_client: *llm + llm_client: *ner_llm graph_api: *graph_api search_api: *search_api ppr_chunk_retriever_tool: - type: ppr_chunk_retriever_legacy + type: ppr_chunk_retriever llm_client: *llm graph_api: *graph_api search_api: *search_api @@ -104,40 +117,34 @@ kg_fr: search_api: *search_api recognition_threshold: 0.8 exclude_types: - - "Chunk" - -rc: + - Chunk + - AtomicQuery + - KnowledgeUnit + - Summary + - Outline + - Doc + +rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model top_k: 20 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf - type: kag_hybrid_executor - lf_rewriter: - type: kag_spo_lf - llm_client: *llm - lf_trans_prompt: - type: default_spo_retriever_decompose - vectorize_model: *vectorize_model - flow: | - kg_cs->kg_fr->kag_merger;rc->kag_merger - + type: kag_hybrid_retrieval_executor + retrievers: + - *kg_cs + - *kg_fr + - *rc + merger: + type: kag_merger + enable_summary: true kag_solver_pipeline: type: kag_static_pipeline @@ -151,8 +158,10 @@ kag_solver_pipeline: executors: - *kag_hybrid_executor_conf generator: - type: llm_generator + type: llm_index_generator llm_client: *llm generated_prompt: type: default_multi_hop_generator + chunk_reranker: + type: rerank_by_vector #------------kag-solver configuration end----------------# diff --git a/kag/open_benchmark/musique/kag_config_knowledge_unit.yaml b/kag/open_benchmark/musique/kag_config_knowledge_unit.yaml index 3c1f14b5a..38c67ec80 100644 --- a/kag/open_benchmark/musique/kag_config_knowledge_unit.yaml +++ b/kag/open_benchmark/musique/kag_config_knowledge_unit.yaml @@ -106,7 +106,7 @@ kg_fr: &kg_fr top_k: 20 graph_api: *graph_api search_api: *search_api - vectorize_model: *vectorize_model + vectorize_model: *vectorize_model path_select: type: fuzzy_one_hop_select llm_client: *ner_llm @@ -133,9 +133,11 @@ kg_fr: &kg_fr rc: &rc type: rc_open_spg vector_chunk_retriever: - type: vector_chunk_retriever_legacy + type: vector_chunk_retriever vectorize_model: *vectorize_model + score_threshold: 0.65 search_api: *search_api + score_threshold: 0.65 graph_api: *graph_api search_api: *search_api vectorize_model: *vectorize_model @@ -149,16 +151,6 @@ r3: &r3 score_threshold: 0.85 top_k: 10 -kag_merger: - type: kg_merger - top_k: 20 - llm_module: *llm - summary_prompt: - type: default_thought_then_answer - vectorize_model: *vectorize_model - graph_api: *graph_api - search_api: *search_api - kag_hybrid_executor: &kag_hybrid_executor_conf type: kag_hybrid_retrieval_executor retrievers: @@ -182,7 +174,6 @@ py_code_based_math_executor: &py_code_based_math_executor_conf type: py_code_based_math_executor llm: *llm - kag_solver_pipeline: type: kag_static_pipeline max_iteration: 3 diff --git a/kag/templates/schema/{{default}}.schema.tmpl b/kag/templates/schema/{{default}}.schema.tmpl index 4f7b2523a..5e911bc9f 100644 --- a/kag/templates/schema/{{default}}.schema.tmpl +++ b/kag/templates/schema/{{default}}.schema.tmpl @@ -1,110 +1,122 @@ namespace {{namespace}} +SemanticConcept(语义概念): EntityType + properties: + desc(内容): Text + index: Text + isA(上位): SemanticConcept + ArtificialObject(人造物体): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): ArtificialObject + officialName(标准名): ArtificialObject Astronomy(天文学): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Astronomy + officialName(标准名): Astronomy Building(建筑): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Building + officialName(标准名): Building Creature(生物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Creature + officialName(标准名): Creature Concept(概念): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Concept + officialName(标准名): Concept Date(日期): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Date + officialName(标准名): Date GeographicLocation(地理位置): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): GeographicLocation + officialName(标准名): GeographicLocation Keyword(关键词): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Keyword + officialName(标准名): Keyword Medicine(药物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text - + semanticType(语义类型): SemanticConcept + synonyms(同义词): Medicine + officialName(标准名): Medicine NaturalScience(自然科学): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): NaturalScience + officialName(标准名): NaturalScience Organization(组织机构): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Organization + officialName(标准名): Organization Person(人物): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Person + officialName(标准名): Person Transport(运输): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Transport + officialName(标准名): Transport Works(作品): EntityType properties: desc(描述): Text index: TextAndVector - semanticType(语义类型): Text - index: Text - -Others(其他): EntityType - properties: - desc(描述): Text - index: TextAndVector - semanticType(语义类型): Text - index: Text + semanticType(语义类型): SemanticConcept + synonyms(同义词): Works + officialName(标准名): Works Event(事件): EventType properties: @@ -118,8 +130,10 @@ Event(事件): EventType type(事件类型): Text index: Text - -SemanticConcept(语义概念): EntityType +Others(其他): EntityType properties: - desc(内容): Text - index: Text \ No newline at end of file + desc(描述): Text + index: TextAndVector + semanticType(语义类型): SemanticConcept + synonyms(同义词): Others + officialName(标准名): Others \ No newline at end of file diff --git a/knext/command/sub_command/schema.py b/knext/command/sub_command/schema.py index 535aa9639..c1dfc5271 100644 --- a/knext/command/sub_command/schema.py +++ b/knext/command/sub_command/schema.py @@ -78,5 +78,5 @@ def reg_concept_rule(file): """ Register a concept rule according to DSL file. """ - SPGConceptRuleMarkLang(file) + SPGConceptRuleMarkLang(file, host_addr=env.host_addr, project_id=env.project_id) click.secho(f"Concept rule is successfully registered", fg="bright_green") diff --git a/knext/schema/marklang/concept_rule_ml.py b/knext/schema/marklang/concept_rule_ml.py index c55062fac..fa30c5f0e 100644 --- a/knext/schema/marklang/concept_rule_ml.py +++ b/knext/schema/marklang/concept_rule_ml.py @@ -44,9 +44,9 @@ class SPGConceptRuleMarkLang: is_reasoning = False is_priority = False - def __init__(self, filename): + def __init__(self, filename, host_addr=None, project_id=None,): self.current_line_num = 0 - self.session = SchemaClient().create_session() + self.session = SchemaClient(host_addr=host_addr, project_id=project_id).create_session() self.concept_client = rest.ConceptApi() self.load_script(filename)