Skip to content

Commit afdfe54

Browse files
committed
feat: add termbase management functionality with CRUD operations
1 parent 4ca1790 commit afdfe54

27 files changed

Lines changed: 1693 additions & 38 deletions

File tree

apps/common/constants/permission_constants.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ class Group(Enum):
4949
SYSTEM_KNOWLEDGE_TAG = "SYSTEM_KNOWLEDGE_TAG"
5050

5151
KNOWLEDGE_PROBLEM = "KNOWLEDGE_PROBLEM"
52+
KNOWLEDGE_TERMBASE = "KNOWLEDGE_TERMBASE"
5253
SYSTEM_KNOWLEDGE_PROBLEM = "SYSTEM_KNOWLEDGE_PROBLEM"
54+
SYSTEM_KNOWLEDGE_TERMBASE = "SYSTEM_KNOWLEDGE_TERMBASE"
5355
SYSTEM_RES_KNOWLEDGE_PROBLEM = "SYSTEM_RESOURCE_KNOWLEDGE_PROBLEM"
56+
SYSTEM_RES_KNOWLEDGE_TERMBASE = "SYSTEM_RESOURCE_KNOWLEDGE_TERMBASE"
5457

5558
SYSTEM_KNOWLEDGE_HIT_TEST = "SYSTEM_KNOWLEDGE_HIT_TEST"
5659
SYSTEM_RES_KNOWLEDGE_HIT_TEST = "SYSTEM_RESOURCE_KNOWLEDGE_HIT_TEST"
@@ -882,6 +885,28 @@ class PermissionConstants(Enum):
882885
resource_permission_group_list=[ResourcePermissionConst.KNOWLEDGE_MANGE],
883886
parent_group=[WorkspaceGroup.KNOWLEDGE, UserGroup.KNOWLEDGE]
884887
)
888+
KNOWLEDGE_TERMBASE_READ = Permission(
889+
group=Group.KNOWLEDGE_TERMBASE, operate=Operate.READ,
890+
role_list=[RoleConstants.ADMIN, RoleConstants.USER],
891+
resource_permission_group_list=[ResourcePermissionConst.KNOWLEDGE_VIEW],
892+
parent_group=[WorkspaceGroup.KNOWLEDGE, UserGroup.KNOWLEDGE]
893+
)
894+
KNOWLEDGE_TERMBASE_CREATE = Permission(
895+
group=Group.KNOWLEDGE_TERMBASE, operate=Operate.CREATE,
896+
role_list=[RoleConstants.ADMIN, RoleConstants.USER],
897+
resource_permission_group_list=[ResourcePermissionConst.KNOWLEDGE_MANGE],
898+
parent_group=[WorkspaceGroup.KNOWLEDGE, UserGroup.KNOWLEDGE]
899+
)
900+
KNOWLEDGE_TERMBASE_EDIT = Permission(
901+
group=Group.KNOWLEDGE_TERMBASE, operate=Operate.EDIT, role_list=[RoleConstants.ADMIN, RoleConstants.USER],
902+
resource_permission_group_list=[ResourcePermissionConst.KNOWLEDGE_MANGE],
903+
parent_group=[WorkspaceGroup.KNOWLEDGE, UserGroup.KNOWLEDGE]
904+
)
905+
KNOWLEDGE_TERMBASE_DELETE = Permission(
906+
group=Group.KNOWLEDGE_TERMBASE, operate=Operate.DELETE, role_list=[RoleConstants.ADMIN, RoleConstants.USER],
907+
resource_permission_group_list=[ResourcePermissionConst.KNOWLEDGE_MANGE],
908+
parent_group=[WorkspaceGroup.KNOWLEDGE, UserGroup.KNOWLEDGE]
909+
)
885910
KNOWLEDGE_TAG_READ = Permission(
886911
group=Group.KNOWLEDGE_TAG, operate=Operate.READ,
887912
role_list=[RoleConstants.ADMIN, RoleConstants.USER],
@@ -1553,6 +1578,26 @@ class PermissionConstants(Enum):
15531578
group=Group.SYSTEM_KNOWLEDGE_PROBLEM, operate=Operate.RELATE, role_list=[RoleConstants.ADMIN],
15541579
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
15551580
)
1581+
SHARED_KNOWLEDGE_TERMBASE_READ = Permission(
1582+
group=Group.SYSTEM_KNOWLEDGE_TERMBASE, operate=Operate.READ, role_list=[RoleConstants.ADMIN],
1583+
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
1584+
)
1585+
SHARED_KNOWLEDGE_TERMBASE_CREATE = Permission(
1586+
group=Group.SYSTEM_KNOWLEDGE_TERMBASE, operate=Operate.CREATE, role_list=[RoleConstants.ADMIN],
1587+
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
1588+
)
1589+
SHARED_KNOWLEDGE_TERMBASE_EDIT = Permission(
1590+
group=Group.SYSTEM_KNOWLEDGE_TERMBASE, operate=Operate.EDIT, role_list=[RoleConstants.ADMIN],
1591+
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
1592+
)
1593+
SHARED_KNOWLEDGE_TERMBASE_DELETE = Permission(
1594+
group=Group.SYSTEM_KNOWLEDGE_TERMBASE, operate=Operate.DELETE, role_list=[RoleConstants.ADMIN],
1595+
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
1596+
)
1597+
SHARED_KNOWLEDGE_TERMBASE_EXPORT = Permission(
1598+
group=Group.SYSTEM_KNOWLEDGE_TERMBASE, operate=Operate.EXPORT, role_list=[RoleConstants.ADMIN],
1599+
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
1600+
)
15561601
SHARED_KNOWLEDGE_HIT_TEST = Permission(
15571602
group=Group.SYSTEM_KNOWLEDGE_HIT_TEST, operate=Operate.READ, role_list=[RoleConstants.ADMIN],
15581603
parent_group=[SystemGroup.SHARED_KNOWLEDGE], is_ee=settings.edition == "EE"
@@ -1821,6 +1866,26 @@ class PermissionConstants(Enum):
18211866
group=Group.SYSTEM_RES_KNOWLEDGE_PROBLEM, operate=Operate.RELATE, role_list=[RoleConstants.ADMIN],
18221867
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
18231868
)
1869+
RESOURCE_KNOWLEDGE_TERMBASE_READ = Permission(
1870+
group=Group.SYSTEM_RES_KNOWLEDGE_TERMBASE, operate=Operate.READ, role_list=[RoleConstants.ADMIN],
1871+
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
1872+
)
1873+
RESOURCE_KNOWLEDGE_TERMBASE_CREATE = Permission(
1874+
group=Group.SYSTEM_RES_KNOWLEDGE_TERMBASE, operate=Operate.CREATE, role_list=[RoleConstants.ADMIN],
1875+
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
1876+
)
1877+
RESOURCE_KNOWLEDGE_TERMBASE_EDIT = Permission(
1878+
group=Group.SYSTEM_RES_KNOWLEDGE_TERMBASE, operate=Operate.EDIT, role_list=[RoleConstants.ADMIN],
1879+
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
1880+
)
1881+
RESOURCE_KNOWLEDGE_TERMBASE_DELETE = Permission(
1882+
group=Group.SYSTEM_RES_KNOWLEDGE_TERMBASE, operate=Operate.DELETE, role_list=[RoleConstants.ADMIN],
1883+
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
1884+
)
1885+
RESOURCE_KNOWLEDGE_TERMBASE_EXPORT = Permission(
1886+
group=Group.SYSTEM_RES_KNOWLEDGE_TERMBASE, operate=Operate.EXPORT, role_list=[RoleConstants.ADMIN],
1887+
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"
1888+
)
18241889
RESOURCE_KNOWLEDGE_TAG_READ = Permission(
18251890
group=Group.SYSTEM_RES_KNOWLEDGE_TAG, operate=Operate.READ, role_list=[RoleConstants.ADMIN],
18261891
parent_group=[SystemGroup.RESOURCE_KNOWLEDGE], is_ee=settings.edition == "EE"

apps/common/event/listener_manage.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
@date:2023/10/20 14:01
77
@desc:
88
"""
9-
import datetime
109
import os
1110
import threading
1211
import traceback
@@ -15,8 +14,8 @@
1514
import django.db.models
1615
from django.db.models import QuerySet
1716
from django.db.models.functions import Substr, Reverse
18-
from django.utils.translation import gettext_lazy as _
1917
from django.utils import timezone
18+
from django.utils.translation import gettext_lazy as _
2019
from langchain_core.embeddings import Embeddings
2120

2221
from common.config.embedding_config import VectorStore
@@ -132,11 +131,15 @@ def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
132131
ListenerManagement.update_status(QuerySet(Paragraph).filter(id=paragraph_id), TaskType.EMBEDDING, State.STARTED)
133132
try:
134133
data_list = native_search(
135-
{'problem': QuerySet(get_dynamics_model({'paragraph.id': django.db.models.CharField()})).filter(
136-
**{'paragraph.id': paragraph_id}),
134+
{
135+
'problem': QuerySet(
136+
get_dynamics_model({'paragraph.id': django.db.models.CharField()})
137+
).filter(**{'paragraph.id': paragraph_id}),
137138
'paragraph': QuerySet(Paragraph).filter(id=paragraph_id)},
138139
select_string=get_file_content(
139-
os.path.join(PROJECT_DIR, "apps", "common", 'sql', 'list_embedding_text.sql')))
140+
os.path.join(PROJECT_DIR, "apps", "common", 'sql', 'list_embedding_text.sql')
141+
)
142+
)
140143
# 删除段落
141144
VectorStore.get_embedding_vector().delete_by_paragraph_id(paragraph_id)
142145

@@ -149,8 +152,9 @@ def is_the_task_interrupted():
149152
# 批量向量化
150153
VectorStore.get_embedding_vector().batch_save(data_list, embedding_model, is_the_task_interrupted)
151154
# 更新到开始状态
152-
ListenerManagement.update_status(QuerySet(Paragraph).filter(id=paragraph_id), TaskType.EMBEDDING,
153-
State.SUCCESS)
155+
ListenerManagement.update_status(
156+
QuerySet(Paragraph).filter(id=paragraph_id), TaskType.EMBEDDING, State.SUCCESS
157+
)
154158
except Exception as e:
155159
maxkb_logger.error(_('Vectorized paragraph: {paragraph_id} error {error} {traceback}').format(
156160
paragraph_id=paragraph_id, error=str(e), traceback=traceback.format_exc()))
@@ -280,17 +284,23 @@ def is_the_task_interrupted():
280284
State.STARTED)
281285

282286
# 根据段落进行向量化处理
283-
page_desc(QuerySet(Paragraph)
284-
.annotate(
285-
reversed_status=Reverse('status'),
286-
task_type_status=Substr('reversed_status', TaskType.EMBEDDING.value,
287-
1),
288-
).filter(task_type_status__in=state_list, document_id=document_id)
289-
.values('id'), 5,
290-
ListenerManagement.get_embedding_paragraph_apply(embedding_model, is_the_task_interrupted,
291-
ListenerManagement.get_aggregation_document_status(
292-
document_id)),
293-
is_the_task_interrupted)
287+
page_desc(
288+
QuerySet(
289+
Paragraph
290+
).annotate(
291+
reversed_status=Reverse('status'),
292+
task_type_status=Substr('reversed_status', TaskType.EMBEDDING.value, 1),
293+
).filter(
294+
task_type_status__in=state_list, document_id=document_id
295+
).values('id'),
296+
5,
297+
ListenerManagement.get_embedding_paragraph_apply(
298+
embedding_model,
299+
is_the_task_interrupted,
300+
ListenerManagement.get_aggregation_document_status(document_id)
301+
),
302+
is_the_task_interrupted
303+
)
294304
# 检查是否存在索引
295305
create_knowledge_index(document_id=document_id)
296306
except Exception as e:

apps/common/utils/ts_vecto_util.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
@desc:
88
"""
99
import re
10-
import uuid_utils.compat as uuid
1110
from typing import List
1211

1312
import jieba
1413
import jieba.posseg
14+
import uuid_utils.compat as uuid
1515

1616
jieba_word_list_cache = [chr(item) for item in range(38, 84)]
1717

@@ -76,13 +76,27 @@ def get_key_by_word_dict(key, word_dict):
7676
return v
7777

7878

79-
def to_ts_vector(text: str):
79+
def _build_tokenizer(user_words: List[str] = None, user_dict_path: str = None):
80+
"""创建每次调用隔离的分词器实例"""
81+
tokenizer = jieba.Tokenizer()
82+
if user_dict_path:
83+
tokenizer.load_userdict(user_dict_path)
84+
if user_words:
85+
for word in user_words:
86+
if word:
87+
tokenizer.add_word(word)
88+
return tokenizer
89+
90+
91+
def to_ts_vector(text: str, user_words: List[str] = None, user_dict_path: str = None):
8092
# 分词
81-
result = jieba.lcut(text, cut_all=True)
93+
tokenizer = _build_tokenizer(user_words, user_dict_path) if (user_words or user_dict_path) else jieba
94+
result = tokenizer.lcut(text, cut_all=True)
8295
return " ".join(result)
8396

8497

85-
def to_query(text: str):
86-
extract_tags = jieba.lcut(text, cut_all=True)
98+
def to_query(text: str, user_words: List[str] = None, user_dict_path: str = None):
99+
tokenizer = _build_tokenizer(user_words, user_dict_path) if (user_words or user_dict_path) else jieba
100+
extract_tags = tokenizer.lcut(text, cut_all=True)
87101
result = " ".join(extract_tags)
88102
return result

apps/knowledge/api/termbase.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from django.utils.translation import gettext_lazy as _
2+
from drf_spectacular.types import OpenApiTypes
3+
from drf_spectacular.utils import OpenApiParameter
4+
from rest_framework import serializers
5+
6+
from common.mixins.api_mixin import APIMixin
7+
from common.result import DefaultResultSerializer
8+
9+
10+
class TermbaseReadAPI(APIMixin):
11+
@staticmethod
12+
def get_parameters():
13+
return [
14+
OpenApiParameter(
15+
name="workspace_id",
16+
description="工作空间id",
17+
type=OpenApiTypes.STR,
18+
location='path',
19+
required=True,
20+
),
21+
OpenApiParameter(
22+
name="knowledge_id",
23+
description="知识库id",
24+
type=OpenApiTypes.STR,
25+
location='path',
26+
required=True,
27+
),
28+
]
29+
30+
@staticmethod
31+
def get_response():
32+
return DefaultResultSerializer
33+
34+
35+
class TermbaseBatchCreateAPI(TermbaseReadAPI):
36+
@staticmethod
37+
def get_request():
38+
return serializers.ListField(required=True, label=_('problem list'),
39+
child=serializers.UUIDField(required=True, label=_('problem')))
40+
41+
42+
class BatchDeleteAPI(TermbaseReadAPI):
43+
@staticmethod
44+
def get_request():
45+
return serializers.ListField(required=True, label=_('problem list'),
46+
child=serializers.UUIDField(required=True, label=_('problem')))
47+
48+
49+
class TermbasePageAPI(APIMixin):
50+
@staticmethod
51+
def get_parameters():
52+
return [
53+
OpenApiParameter(
54+
name="workspace_id",
55+
description="工作空间id",
56+
type=OpenApiTypes.STR,
57+
location='path',
58+
required=True,
59+
),
60+
OpenApiParameter(
61+
name="knowledge_id",
62+
description="知识库id",
63+
type=OpenApiTypes.STR,
64+
location='path',
65+
required=True,
66+
),
67+
OpenApiParameter(
68+
name="current_page",
69+
description="当前页码",
70+
type=OpenApiTypes.INT,
71+
location='path',
72+
required=True,
73+
),
74+
OpenApiParameter(
75+
name="page_size",
76+
description="每页条数",
77+
type=OpenApiTypes.INT,
78+
location='path',
79+
required=True,
80+
),
81+
]
82+
83+
@staticmethod
84+
def get_response():
85+
return DefaultResultSerializer
86+
87+
88+
class TermbaseDeleteAPI(APIMixin):
89+
@staticmethod
90+
def get_parameters():
91+
return [
92+
OpenApiParameter(
93+
name="workspace_id",
94+
description="工作空间id",
95+
type=OpenApiTypes.STR,
96+
location='path',
97+
required=True,
98+
),
99+
OpenApiParameter(
100+
name="knowledge_id",
101+
description="知识库id",
102+
type=OpenApiTypes.STR,
103+
location='path',
104+
required=True,
105+
),
106+
OpenApiParameter(
107+
name="problem_id",
108+
description="问题id",
109+
type=OpenApiTypes.STR,
110+
location='path',
111+
required=True,
112+
)
113+
]
114+
115+
@staticmethod
116+
def get_response():
117+
return DefaultResultSerializer
118+
119+
120+
class TermbaseEditAPI(TermbaseDeleteAPI):
121+
@staticmethod
122+
def get_request():
123+
return {}
124+
125+
126+
class TermbaseParagraphAPI(TermbaseDeleteAPI):
127+
pass
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by Django 5.2.14 on 2026-05-15 03:08
2+
3+
import django.db.models.deletion
4+
import uuid_utils.compat
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
('knowledge', '0007_remove_knowledgeworkflowversion_workflow_and_more'),
12+
]
13+
14+
operations = [
15+
migrations.CreateModel(
16+
name='Termbase',
17+
fields=[
18+
('create_time', models.DateTimeField(auto_now_add=True, db_index=True, verbose_name='创建时间')),
19+
('update_time', models.DateTimeField(auto_now=True, db_index=True, verbose_name='修改时间')),
20+
('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')),
21+
('content', models.CharField(db_index=True, max_length=256, verbose_name='术语内容')),
22+
('knowledge', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge')),
23+
],
24+
options={
25+
'db_table': 'termbase',
26+
},
27+
),
28+
]

apps/knowledge/models/knowledge.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,17 @@ class ProblemParagraphMapping(AppModelMixin):
270270
class Meta:
271271
db_table = "problem_paragraph_mapping"
272272

273+
class Termbase(AppModelMixin):
274+
"""
275+
术语表
276+
"""
277+
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid7, editable=False, verbose_name="主键id")
278+
knowledge = models.ForeignKey(Knowledge, on_delete=models.DO_NOTHING, db_constraint=False)
279+
content = models.CharField(max_length=256, verbose_name="术语内容", db_index=True)
280+
281+
class Meta:
282+
db_table = "termbase"
283+
273284

274285
class SourceType(models.IntegerChoices):
275286
"""订单类型"""

0 commit comments

Comments
 (0)