Skip to content

Commit 9e5951b

Browse files
committed
feature: realize that annotation, collection, evaluation and synthesis are isolated according to the creator, and operators are not isolated.
1 parent a6a72aa commit 9e5951b

17 files changed

Lines changed: 252 additions & 154 deletions

runtime/datamate-python/.env.example

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ LOG_FILE_DIR=./logs
88
RAG_STORAGE_DIR=./rag_storage
99

1010
# DataBase
11-
MYSQL_HOST=localhost
12-
MYSQL_PORT=3306
13-
MYSQL_USER=root
14-
MYSQL_PASSWORD=password
15-
MYSQL_DATABASE=datamate
11+
PGSQL_HOST=localhost
12+
PGSQL_PORT=5432
13+
PGSQL_USER=postgres
14+
PGSQL_PASSWORD=password
15+
PGSQL_DATABASE=datamate
1616

1717
# Label Studio settings
18-
LABEL_STUDIO_BASE_URL=http://localhost:8080
18+
LABEL_STUDIO_BASE_URL=http://localhost:30001
1919

2020
LABEL_STUDIO_USER_TOKEN="demo_dev_token"
21+
22+
DATAMATE_JWT_ENABLE=false

runtime/datamate-python/app/core/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,7 @@ def build_database_url(self):
7575
# DataMate
7676
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
7777

78+
datamate_jwt_enable: bool = False
79+
7880
# 全局设置实例
7981
settings = Settings()
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from contextvars import ContextVar
2+
from typing import List, Optional
3+
4+
_current_user: ContextVar[Optional[str]] = ContextVar("_current_user", default=None)
5+
SYSTEM_USER = "system"
6+
7+
8+
class DataScopeHandle:
9+
"""
10+
Hold current user info in a ContextVar and provide helpers for SQLAlchemy filters.
11+
"""
12+
13+
@staticmethod
14+
def set_user_info(user: Optional[str]) -> None:
15+
if user is None or user == "":
16+
# set explicit None
17+
_current_user.set(None)
18+
else:
19+
_current_user.set(user)
20+
21+
@staticmethod
22+
def remove_user_info() -> None:
23+
_current_user.set(None)
24+
25+
@staticmethod
26+
def get_user_info() -> Optional[str]:
27+
return _current_user.get()
28+
29+
@staticmethod
30+
def allowed_users() -> List[str]:
31+
"""
32+
Return list of allowed creators: current user + system.
33+
"""
34+
user = DataScopeHandle.get_user_info()
35+
if not user:
36+
return []
37+
return [user, SYSTEM_USER]

runtime/datamate-python/app/db/models/annotation_management.py

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey
55
from sqlalchemy.sql import func
66

7-
from app.db.session import Base
7+
from app.db.models.base_entity import BaseEntity
88

9-
class AnnotationTemplate(Base):
9+
class AnnotationTemplate(BaseEntity):
1010
"""标注配置模板模型"""
11-
11+
1212
__tablename__ = "t_dm_annotation_templates"
13-
13+
__ignore_data_scope__ = True
14+
1415
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
1516
name = Column(String(100), nullable=False, comment="模板名称")
1617
description = Column(String(500), nullable=True, comment="模板描述")
@@ -21,44 +22,40 @@ class AnnotationTemplate(Base):
2122
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
2223
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
2324
version = Column(String(20), default='1.0', comment="模板版本")
24-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
25-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
2625
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
27-
26+
2827
def __repr__(self):
2928
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
30-
29+
3130
@property
3231
def is_deleted(self) -> bool:
3332
"""检查是否已被软删除"""
3433
return self.deleted_at is not None
35-
36-
class LabelingProject(Base):
34+
35+
class LabelingProject(BaseEntity):
3736
"""标注项目模型"""
38-
37+
3938
__tablename__ = "t_dm_labeling_projects"
40-
39+
4140
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
4241
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
4342
name = Column(String(100), nullable=False, comment="项目名称")
4443
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
4544
template_id = Column(String(36), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
4645
configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)")
4746
progress = Column(JSON, nullable=True, comment="项目进度信息")
48-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
49-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
5047
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
51-
48+
5249
def __repr__(self):
5350
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
54-
51+
5552
@property
5653
def is_deleted(self) -> bool:
5754
"""检查是否已被软删除"""
5855
return self.deleted_at is not None
5956

6057

61-
class AutoAnnotationTask(Base):
58+
class AutoAnnotationTask(BaseEntity):
6259
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
6360

6461
__tablename__ = "t_dm_auto_annotation_tasks"
@@ -76,13 +73,6 @@ class AutoAnnotationTask(Base):
7673
detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
7774
output_path = Column(String(500), nullable=True, comment="输出路径")
7875
error_message = Column(Text, nullable=True, comment="错误信息")
79-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
80-
updated_at = Column(
81-
TIMESTAMP,
82-
server_default=func.current_timestamp(),
83-
onupdate=func.current_timestamp(),
84-
comment="更新时间",
85-
)
8676
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
8777
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
8878

@@ -92,4 +82,4 @@ def __repr__(self) -> str: # pragma: no cover - repr 简单返回
9282
@property
9383
def is_deleted(self) -> bool:
9484
"""检查是否已被软删除"""
95-
return self.deleted_at is not None
85+
return self.deleted_at is not None
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from sqlalchemy import Column, String, TIMESTAMP
2+
from sqlalchemy.orm import declarative_base
3+
from sqlalchemy.sql import func
4+
5+
Base = declarative_base()
6+
7+
8+
class BaseEntity(Base):
9+
"""
10+
Common base entity with audit fields.
11+
Subclasses may set `__ignore_data_scope__ = True` to opt-out of data-scope filtering.
12+
"""
13+
__abstract__ = True
14+
15+
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
16+
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(),
17+
comment="更新时间")
18+
created_by = Column(String(255), nullable=True, comment="创建者")
19+
updated_by = Column(String(255), nullable=True, comment="更新者")
20+
21+
# default: do enforce data scope unless subclass sets this to True
22+
__ignore_data_scope__ = False

runtime/datamate-python/app/db/models/data_collection.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
from sqlalchemy import Column, String, Text, TIMESTAMP, Integer, BigInteger, Numeric, JSON, Boolean
33
from sqlalchemy.sql import func
44

5-
from app.db.session import Base
5+
from app.db.models.base_entity import BaseEntity
66

7-
class CollectionTemplate(Base):
7+
8+
class CollectionTemplate(BaseEntity):
89
"""归集模板表(UUID 主键) -> t_dc_collection_templates"""
910

1011
__tablename__ = "t_dc_collection_templates"
12+
__ignore_data_scope__ = True
1113

1214
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="模板ID(UUID)")
1315
name = Column(String(255), nullable=False, comment="模板名称")
@@ -18,12 +20,8 @@ class CollectionTemplate(Base):
1820
target_name = Column(String(64), nullable=False, comment="目标数据源名称")
1921
template_content = Column(JSON, nullable=False, comment="模板内容")
2022
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
21-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
22-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
23-
created_by = Column(String(255), nullable=True, comment="创建者")
24-
updated_by = Column(String(255), nullable=True, comment="更新者")
2523

26-
class CollectionTask(Base):
24+
class CollectionTask(BaseEntity):
2725
"""归集任务表(UUID 主键) -> t_dc_collection_tasks"""
2826

2927
__tablename__ = "t_dc_collection_tasks"
@@ -41,12 +39,8 @@ class CollectionTask(Base):
4139
retry_count = Column(Integer, nullable=True, server_default="3", comment="重试次数")
4240
timeout_seconds = Column(Integer, nullable=True, server_default="3600", comment="超时时间(秒)")
4341
last_execution_id = Column(String(36), nullable=True, comment="最后执行ID(UUID)")
44-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
45-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
46-
created_by = Column(String(255), nullable=True, comment="创建者")
47-
updated_by = Column(String(255), nullable=True, comment="更新者")
4842

49-
class TaskExecution(Base):
43+
class TaskExecution(BaseEntity):
5044
"""任务执行记录表(UUID 主键) -> t_dc_task_executions"""
5145

5246
__tablename__ = "t_dc_task_executions"
@@ -60,7 +54,3 @@ class TaskExecution(Base):
6054
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
6155
duration_seconds = Column(Integer, nullable=True, server_default="0", comment="执行时长(秒)")
6256
error_message = Column(Text, nullable=True, comment="错误信息")
63-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
64-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
65-
created_by = Column(String(255), nullable=True, comment="创建者")
66-
updated_by = Column(String(255), nullable=True, comment="更新者")

runtime/datamate-python/app/db/models/data_evaluation.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
from sqlalchemy import Column, String, Text, Float, TIMESTAMP, ForeignKey, Integer
1111
from sqlalchemy.sql import func
1212

13-
from app.db.session import Base
13+
from app.db.models.base_entity import BaseEntity
1414

1515

16-
class EvaluationTask(Base):
16+
class EvaluationTask(BaseEntity):
1717
"""评估任务表(UUID 主键) -> t_de_eval_task
1818
1919
Columns per data-evaluation-init.sql:
@@ -36,16 +36,13 @@ class EvaluationTask(Base):
3636
eval_process = Column(Float, nullable=False, server_default="0", comment="评估进度")
3737
eval_prompt = Column(Text, nullable=True, comment="评估提示词")
3838
eval_config = Column(Text, nullable=True, comment="评估配置")
39-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
40-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
41-
created_by = Column(String(255), nullable=True, comment="创建者")
42-
updated_by = Column(String(255), nullable=True, comment="更新者")
4339

4440

45-
class EvaluationFile(Base):
41+
class EvaluationFile(BaseEntity):
4642
"""评估条目表(UUID 主键) -> t_de_eval_file"""
4743

4844
__tablename__ = "t_de_eval_file"
45+
__ignore_data_scope__ = True
4946

5047
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
5148
task_id = Column(String(36), ForeignKey('t_de_eval_task.id'), nullable=False, comment="评估任务ID")
@@ -54,20 +51,17 @@ class EvaluationFile(Base):
5451
error_message = Column(Text, nullable=True, comment="错误信息")
5552
total_count = Column(Integer, nullable=False, default=0, comment="总数")
5653
evaluated_count = Column(Integer, nullable=False, default=0, comment="已评估数")
57-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
58-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
59-
created_by = Column(String(255), nullable=True, comment="创建者")
60-
updated_by = Column(String(255), nullable=True, comment="更新者")
6154

6255

63-
class EvaluationItem(Base):
56+
class EvaluationItem(BaseEntity):
6457
"""评估条目表(UUID 主键) -> t_de_eval_item
6558
6659
Columns per data-evaluation-init.sql:
6760
id, task_id, item_id, eval_score, eval_result, status
6861
"""
6962

7063
__tablename__ = "t_de_eval_item"
64+
__ignore_data_scope__ = True
7165

7266
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
7367
task_id = Column(String(36), ForeignKey('t_de_eval_task.id'), nullable=False, comment="评估任务ID")
@@ -77,7 +71,3 @@ class EvaluationItem(Base):
7771
eval_score = Column(Float, nullable=False, server_default="0", comment="评估分数")
7872
eval_result = Column(Text, nullable=True, comment="评估结果")
7973
status = Column(String(50), server_default="PENDING", nullable=False, comment="状态:PENDING/EVALUATED")
80-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
81-
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
82-
created_by = Column(String(255), nullable=True, comment="创建者")
83-
updated_by = Column(String(255), nullable=True, comment="更新者")

runtime/datamate-python/app/db/models/data_synthesis.py

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from sqlalchemy import Column, String, Text, Integer, JSON, TIMESTAMP, func
44

5-
from app.db.session import Base
5+
from app.db.models.base_entity import Base, BaseEntity
66
from app.module.generation.schema.generation import CreateSynthesisTaskRequest
77

88

@@ -43,7 +43,7 @@ async def save_synthesis_task(db_session, synthesis_task: CreateSynthesisTaskReq
4343
return synth_task_instance
4444

4545

46-
class DataSynthInstance(Base):
46+
class DataSynthInstance(BaseEntity):
4747
"""数据合成任务表,对应表 t_data_synth_instances
4848
4949
create table if not exists t_data_synth_instances
@@ -82,19 +82,9 @@ class DataSynthInstance(Base):
8282
total_chunks = Column(Integer, nullable=False, default=0, comment="总文本块数")
8383
processed_chunks = Column(Integer, nullable=False, default=0, comment="已处理文本块数")
8484
total_synth_data = Column(Integer, nullable=False, default=0, comment="总合成数据量")
85-
created_at = Column(TIMESTAMP, nullable=False, default=func.now(), comment="创建时间")
86-
updated_at = Column(
87-
TIMESTAMP,
88-
nullable=False,
89-
default=func.now(),
90-
onupdate=func.now(),
91-
comment="更新时间",
92-
)
93-
created_by = Column(String(255), nullable=True, comment="创建者")
94-
updated_by = Column(String(255), nullable=True, comment="更新者")
9585

9686

97-
class DataSynthesisFileInstance(Base):
87+
class DataSynthesisFileInstance(BaseEntity):
9888
"""数据合成文件任务表,对应表 t_data_synthesis_file_instances
9989
10090
create table if not exists t_data_synthesis_file_instances (
@@ -129,17 +119,6 @@ class DataSynthesisFileInstance(Base):
129119
total_chunks = Column(Integer, nullable=False, default=0, comment="总文本块数")
130120
processed_chunks = Column(Integer, nullable=False, default=0, comment="已处理文本块数")
131121

132-
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), nullable=True, comment="创建时间")
133-
updated_at = Column(
134-
TIMESTAMP,
135-
server_default=func.current_timestamp(),
136-
onupdate=func.current_timestamp(),
137-
nullable=True,
138-
comment="更新时间",
139-
)
140-
created_by = Column(String(255), nullable=True, comment="创建者")
141-
updated_by = Column(String(255), nullable=True, comment="更新者")
142-
143122

144123
class DataSynthesisChunkInstance(Base):
145124
"""数据合成分块任务表,对应表 t_data_synthesis_chunk_instances

0 commit comments

Comments
 (0)