Skip to content

Commit 54181d5

Browse files
Merge branch 'main' of https://github.com/open-sciencelab/GraphGen into feature/map-and-all-reduce
2 parents 6112d83 + 2024c9d commit 54181d5

16 files changed

Lines changed: 332 additions & 284 deletions

File tree

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ source-roots=
100100

101101
# When enabled, pylint would attempt to guess common misconfiguration and emit
102102
# user-friendly hints instead of false-positive error messages.
103-
suggestion-mode=yes
103+
# suggestion-mode=yes
104104

105105
# Allow loading of arbitrary C extensions. Extensions are imported into the
106106
# active Python interpreter and may run arbitrary code.

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
[![Hugging Face](https://img.shields.io/badge/Demo-on%20HF-blue?logo=huggingface&logoColor=yellow)](https://huggingface.co/spaces/chenzihong/GraphGen)
1818
[![Model Scope](https://img.shields.io/badge/%F0%9F%A4%96%20Demo-on%20MS-green)](https://modelscope.cn/studios/chenzihong/GraphGen)
19-
[![OpenXLab](https://img.shields.io/badge/Demo-on%20OpenXLab-blue?logo=openxlab&logoColor=yellow)](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)
2019

2120

2221
GraphGen: Enhancing Supervised Fine-Tuning for LLMs with Knowledge-Driven Synthetic Data Generation
@@ -107,7 +106,7 @@ Users can flexibly configure according to the needs of synthetic data.
107106

108107
## 🚀 Quick Start
109108

110-
Experience GraphGen through [Web](https://g-app-center-120612-6433-jpdvmvp.openxlab.space) or [Backup Web Entrance](https://openxlab.org.cn/apps/detail/chenzihonga/GraphGen)
109+
Experience GraphGen Demo through [Huggingface](https://huggingface.co/spaces/chenzihong/GraphGen) or [Modelscope](https://modelscope.cn/studios/chenzihong/GraphGen).
111110

112111
For any questions, please check [FAQ](https://github.com/open-sciencelab/GraphGen/issues/10), open new [issue](https://github.com/open-sciencelab/GraphGen/issues) or join our [wechat group](https://cdn.vansin.top/internlm/dou.jpg) and ask.
113112

README_zh.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
[![Hugging Face](https://img.shields.io/badge/Demo-on%20HF-blue?logo=huggingface&logoColor=yellow)](https://huggingface.co/spaces/chenzihong/GraphGen)
1818
[![Model Scope](https://img.shields.io/badge/%F0%9F%A4%96%20Demo-on%20MS-green)](https://modelscope.cn/studios/chenzihong/GraphGen)
19-
[![OpenXLab](https://img.shields.io/badge/Demo-on%20OpenXLab-blue?logo=openxlab&logoColor=yellow)](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)
2019

2120
GraphGen: Enhancing Supervised Fine-Tuning for LLMs with Knowledge-Driven Synthetic Data Generation
2221

@@ -105,7 +104,7 @@ GraphGen 首先根据源文本构建细粒度的知识图谱,然后利用期
105104

106105
## 🚀 快速开始
107106

108-
通过 [Web](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)[备用 Web 入口](https://openxlab.org.cn/apps/detail/chenzihonga/GraphGen) 体验 GraphGen。
107+
通过 [Huggingface](https://huggingface.co/spaces/chenzihong/GraphGen)[Modelscope](https://modelscope.cn/studios/chenzihong/GraphGen) 体验 GraphGen。
109108

110109
如有任何问题,请查看 [FAQ](https://github.com/open-sciencelab/GraphGen/issues/10)、提交新的 [issue](https://github.com/open-sciencelab/GraphGen/issues) 或加入我们的[微信群](https://cdn.vansin.top/internlm/dou.jpg)咨询。
111110

graphgen/graphgen.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ async def quiz_and_judge(self, quiz_and_judge_config: Dict):
255255
self.graph_storage,
256256
self.rephrase_storage,
257257
max_samples,
258+
progress_bar=self.progress_bar,
258259
)
259260

260261
# TODO: assert trainee_llm_client is valid before judge
@@ -270,6 +271,7 @@ async def quiz_and_judge(self, quiz_and_judge_config: Dict):
270271
self.graph_storage,
271272
self.rephrase_storage,
272273
re_judge,
274+
progress_bar=self.progress_bar,
273275
)
274276

275277
await self.rephrase_storage.index_done_callback()

graphgen/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
AtomicGenerator,
55
CoTGenerator,
66
MultiHopGenerator,
7+
QuizGenerator,
78
VQAGenerator,
89
)
910
from .kg_builder import LightRAGKGBuilder, MMKGBuilder

graphgen/models/generator/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
from .atomic_generator import AtomicGenerator
33
from .cot_generator import CoTGenerator
44
from .multi_hop_generator import MultiHopGenerator
5+
from .quiz_generator import QuizGenerator
56
from .vqa_generator import VQAGenerator
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from typing import Any
2+
3+
from graphgen.bases import BaseGenerator
4+
from graphgen.templates import DESCRIPTION_REPHRASING_PROMPT
5+
from graphgen.utils import detect_main_language, logger
6+
7+
8+
class QuizGenerator(BaseGenerator):
9+
"""
10+
Quiz Generator rephrases given descriptions to create quiz questions.
11+
"""
12+
13+
@staticmethod
14+
def build_prompt(
15+
batch: tuple[list[tuple[str, dict]], list[tuple[Any, Any, dict]]]
16+
) -> str:
17+
"""
18+
Build prompt for rephrasing the description.
19+
:param batch: A tuple containing (nodes, edges) where nodes/edges
20+
contain description information
21+
:return: Prompt string
22+
"""
23+
# Extract description from batch
24+
# For quiz generator, we expect a special format where
25+
# the description is passed as the first node's description
26+
nodes, edges = batch
27+
if nodes:
28+
description = nodes[0][1].get("description", "")
29+
template_type = nodes[0][1].get("template_type", "TEMPLATE")
30+
elif edges:
31+
description = edges[0][2].get("description", "")
32+
template_type = edges[0][2].get("template_type", "TEMPLATE")
33+
else:
34+
raise ValueError("Batch must contain at least one node or edge with description")
35+
36+
return QuizGenerator.build_prompt_for_description(description, template_type)
37+
38+
@staticmethod
39+
def build_prompt_for_description(description: str, template_type: str = "TEMPLATE") -> str:
40+
"""
41+
Build prompt for rephrasing a single description.
42+
:param description: The description to rephrase
43+
:param template_type: Either "TEMPLATE" (same meaning) or "ANTI_TEMPLATE" (opposite meaning)
44+
:return: Prompt string
45+
"""
46+
language = detect_main_language(description)
47+
prompt = DESCRIPTION_REPHRASING_PROMPT[language][template_type].format(
48+
input_sentence=description
49+
)
50+
return prompt
51+
52+
@staticmethod
53+
def parse_rephrased_text(response: str) -> str:
54+
"""
55+
Parse the rephrased text from the response.
56+
:param response:
57+
:return:
58+
"""
59+
rephrased_text = response.strip().strip('"')
60+
logger.debug("Rephrased Text: %s", rephrased_text)
61+
return rephrased_text
62+
63+
@staticmethod
64+
def parse_response(response: str) -> Any:
65+
"""
66+
Parse the LLM response. For quiz generator, this returns the rephrased text.
67+
:param response: LLM response
68+
:return: Rephrased text
69+
"""
70+
return QuizGenerator.parse_rephrased_text(response)

graphgen/operators/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from .extract import extract_info
44
from .generate import generate_qas
55
from .init import init_llm
6-
from .judge import judge_statement
76
from .partition import partition_kg
8-
from .quiz import quiz
7+
from .quiz_and_judge import judge_statement, quiz
98
from .read import read_files
109
from .search import search_all

graphgen/operators/generate/generate_qas.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Any
22

3+
import gradio as gr
4+
35
from graphgen.bases import BaseLLMWrapper
46
from graphgen.models import (
57
AggregatedGenerator,
@@ -19,7 +21,7 @@ async def generate_qas(
1921
]
2022
],
2123
generation_config: dict,
22-
progress_bar=None,
24+
progress_bar: gr.Progress = None,
2325
) -> list[dict[str, Any]]:
2426
"""
2527
Generate question-answer pairs based on nodes and edges.

graphgen/operators/judge.py

Lines changed: 0 additions & 150 deletions
This file was deleted.

0 commit comments

Comments
 (0)