-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Expand file tree
/
Copy pathI_base_chat_pipeline.py
More file actions
157 lines (132 loc) · 5.43 KB
/
I_base_chat_pipeline.py
File metadata and controls
157 lines (132 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# coding=utf-8
"""
@project: maxkb
@Author:虎
@file: I_base_chat_pipeline.py
@date:2024/1/9 17:25
@desc:
"""
import time
from abc import abstractmethod
from typing import Type
from rest_framework import serializers
from knowledge.models import Paragraph
class ParagraphPipelineModel:
def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str, title: str, status: str,
is_active: bool, comprehensive_score: float, similarity: float, dataset_name: str, document_name: str,
hit_handling_method: str, directly_return_similarity: float, meta: dict = None):
self.id = _id
self.document_id = document_id
self.knowledge_id = knowledge_id
self.content = content
self.title = title
self.status = status,
self.is_active = is_active
self.comprehensive_score = comprehensive_score
self.similarity = similarity
self.dataset_name = dataset_name
self.document_name = document_name
self.hit_handling_method = hit_handling_method
self.directly_return_similarity = directly_return_similarity
self.meta = meta
def to_dict(self):
return {
'id': self.id,
'document_id': self.document_id,
'knowledge_id': self.knowledge_id,
'content': self.content,
'title': self.title,
'status': self.status,
'is_active': self.is_active,
'comprehensive_score': self.comprehensive_score,
'similarity': self.similarity,
'dataset_name': self.dataset_name,
'document_name': self.document_name,
'meta': self.meta,
}
class builder:
def __init__(self):
self.similarity = None
self.paragraph = {}
self.comprehensive_score = None
self.document_name = None
self.dataset_name = None
self.hit_handling_method = None
self.directly_return_similarity = 0.9
self.meta = {}
def add_paragraph(self, paragraph):
if isinstance(paragraph, Paragraph):
self.paragraph = {'id': paragraph.id,
'document_id': paragraph.document_id,
'knowledge_id': paragraph.knowledge_id,
'content': paragraph.content,
'title': paragraph.title,
'status': paragraph.status,
'is_active': paragraph.is_active,
}
else:
self.paragraph = paragraph
return self
def add_dataset_name(self, dataset_name):
self.dataset_name = dataset_name
return self
def add_document_name(self, document_name):
self.document_name = document_name
return self
def add_hit_handling_method(self, hit_handling_method):
self.hit_handling_method = hit_handling_method
return self
def add_directly_return_similarity(self, directly_return_similarity):
self.directly_return_similarity = directly_return_similarity
return self
def add_comprehensive_score(self, comprehensive_score: float):
self.comprehensive_score = comprehensive_score
return self
def add_similarity(self, similarity: float):
self.similarity = similarity
return self
def add_meta(self, meta: dict):
self.meta = meta
return self
def build(self):
return ParagraphPipelineModel(str(self.paragraph.get('id')), str(self.paragraph.get('document_id')),
str(self.paragraph.get('knowledge_id')),
self.paragraph.get('content'), self.paragraph.get('title'),
self.paragraph.get('status'),
self.paragraph.get('is_active'),
self.comprehensive_score, self.similarity, self.dataset_name,
self.document_name, self.hit_handling_method, self.directly_return_similarity,
self.meta)
class IBaseChatPipelineStep:
def __init__(self):
# 当前步骤上下文,用于存储当前步骤信息
self.context = {}
@abstractmethod
def get_step_serializer(self, manage) -> Type[serializers.Serializer]:
pass
def valid_args(self, manage):
step_serializer_clazz = self.get_step_serializer(manage)
step_serializer = step_serializer_clazz(data=manage.context)
step_serializer.is_valid(raise_exception=True)
self.context['step_args'] = step_serializer.data
def run(self, manage):
"""
:param manage: 步骤管理器
:return: 执行结果
"""
start_time = time.time()
self.context['start_time'] = start_time
# 校验参数,
self.valid_args(manage)
self._run(manage)
self.context['run_time'] = time.time() - start_time
def _run(self, manage):
pass
def execute(self, **kwargs):
pass
def get_details(self, manage, **kwargs):
"""
运行详情
:return: 步骤详情
"""
return None