-
Notifications
You must be signed in to change notification settings - Fork 2.8k
fix: Workflow tool import and export #4976
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,129 +90,6 @@ def get_tool_list(self): | |
|
|
||
|
|
||
| class ToolWorkflowSerializer(serializers.Serializer): | ||
| class Import(serializers.Serializer): | ||
| user_id = serializers.UUIDField(required=True, label=_('user id')) | ||
| workspace_id = serializers.CharField(required=False, label=_('workspace id')) | ||
| knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) | ||
|
|
||
| @transaction.atomic | ||
| def import_(self, instance: dict, is_import_tool, with_valid=True): | ||
| if with_valid: | ||
| self.is_valid() | ||
| ToolWorkflowSerializer(data=instance).is_valid(raise_exception=True) | ||
| user_id = self.data.get('user_id') | ||
| workspace_id = self.data.get('workspace_id') | ||
| tool_id = self.data.get('tool_id') | ||
| tool_instance_bytes = instance.get('file').read() | ||
| try: | ||
| tool_instance = restricted_loads(tool_instance_bytes) | ||
| except Exception as e: | ||
| raise AppApiException(1001, _("Unsupported file format")) | ||
| tool_workflow = tool_instance.work_flow | ||
| tool_list = tool_instance.get_tool_list() | ||
| update_tool_map = {} | ||
| if len(tool_list) > 0: | ||
| tool_id_list = reduce(lambda x, y: [*x, *y], | ||
| [[tool.get('id'), generate_uuid((tool.get('id') + workspace_id or ''))] | ||
| for tool | ||
| in | ||
| tool_list], []) | ||
| # 存在的工具列表 | ||
| exits_tool_id_list = [str(tool.id) for tool in | ||
| QuerySet(Tool).filter(id__in=tool_id_list, workspace_id=workspace_id)] | ||
| # 需要更新的工具集合 | ||
| update_tool_map = {tool.get('id'): generate_uuid((tool.get('id') + workspace_id or '')) for tool | ||
| in | ||
| tool_list if | ||
| not exits_tool_id_list.__contains__( | ||
| tool.get('id'))} | ||
|
|
||
| tool_list = [{**tool, 'id': update_tool_map.get(tool.get('id'))} for tool in tool_list if | ||
| not exits_tool_id_list.__contains__( | ||
| tool.get('id')) and not exits_tool_id_list.__contains__( | ||
| generate_uuid((tool.get('id') + workspace_id or '')))] | ||
|
|
||
| work_flow = self.to_tool_workflow( | ||
| tool_workflow, | ||
| update_tool_map, | ||
| ) | ||
| tool_model_list = [self.to_tool(tool, workspace_id, user_id) for tool in tool_list] | ||
| QuerySet(ToolWorkflow).filter(workspace_id=workspace_id, tool_id=tool_id).update_or_create( | ||
| tool_id=tool_id, | ||
| workspace_id=workspace_id, | ||
| defaults={'work_flow': work_flow} | ||
| ) | ||
|
|
||
| if is_import_tool: | ||
| if len(tool_model_list) > 0: | ||
| QuerySet(Tool).bulk_create(tool_model_list) | ||
| UserResourcePermissionSerializer(data={ | ||
| 'workspace_id': self.data.get('workspace_id'), | ||
| 'user_id': self.data.get('user_id'), | ||
| 'auth_target_type': AuthTargetType.TOOL.value | ||
| }).auth_resource_batch([t.id for t in tool_model_list]) | ||
|
|
||
| @staticmethod | ||
| def to_tool_workflow(knowledge_workflow, update_tool_map): | ||
| work_flow = knowledge_workflow.get("work_flow") | ||
| for node in work_flow.get('nodes', []): | ||
| hand_node(node, update_tool_map) | ||
| if node.get('type') == 'loop_node': | ||
| for n in node.get('properties', {}).get('node_data', {}).get('loop_body', {}).get('nodes', []): | ||
| hand_node(n, update_tool_map) | ||
| return work_flow | ||
|
|
||
| @staticmethod | ||
| def to_tool(tool, workspace_id, user_id): | ||
| return Tool(id=tool.get('id'), | ||
| user_id=user_id, | ||
| name=tool.get('name'), | ||
| code=tool.get('code'), | ||
| template_id=tool.get('template_id'), | ||
| input_field_list=tool.get('input_field_list'), | ||
| init_field_list=tool.get('init_field_list'), | ||
| is_active=False if len((tool.get('init_field_list') or [])) > 0 else tool.get('is_active'), | ||
| tool_type=tool.get('tool_type', 'CUSTOM') or 'CUSTOM', | ||
| scope=ToolScope.SHARED if workspace_id == 'None' else ToolScope.WORKSPACE, | ||
| folder_id='default' if workspace_id == 'None' else workspace_id, | ||
| workspace_id=workspace_id) | ||
|
|
||
| class Export(serializers.Serializer): | ||
| user_id = serializers.UUIDField(required=True, label=_('user id')) | ||
| workspace_id = serializers.CharField(required=False, label=_('workspace id')) | ||
| tool_id = serializers.UUIDField(required=True, label=_('knowledge id')) | ||
|
|
||
| def export(self, with_valid=True): | ||
| try: | ||
| if with_valid: | ||
| self.is_valid() | ||
| tool_id = self.data.get('tool_id') | ||
| tool_workflow = QuerySet(ToolWorkflow).filter(tool_id=tool_id).first() | ||
| tool = QuerySet(Tool).filter(id=tool_id).first() | ||
| from application.flow.tools import get_tool_id_list | ||
| tool_id_list = get_tool_id_list(tool_workflow.work_flow) | ||
| tool_list = [] | ||
| if len(tool_id_list) > 0: | ||
| tool_list = QuerySet(Tool).filter(id__in=tool_id_list).exclude(scope=ToolScope.SHARED) | ||
| tool_workflow_dict = {'id': tool.id, | ||
| 'work_flow': tool_workflow.work_flow, | ||
| 'workspace_id': tool.workspace_id, | ||
| 'name': tool.name, | ||
| 'desc': tool.desc, | ||
| 'tool_type': tool.tool_type} | ||
|
|
||
| tool_workflow_instance = ToolWorkflowInstance( | ||
| tool_workflow_dict, | ||
| 'v2', | ||
| [ToolExportModelSerializer(tool).data for tool in tool_list] | ||
| ) | ||
| tool_workflow_pickle = pickle.dumps(tool_workflow_instance) | ||
| response = HttpResponse(content_type='text/plain', content=tool_workflow_pickle) | ||
| response['Content-Disposition'] = f'attachment; filename="{tool.name}.tool"' | ||
| return response | ||
| except Exception as e: | ||
| return result.error(str(e), response_status=status.HTTP_500_INTERNAL_SERVER_ERROR) | ||
|
|
||
| class Operate(serializers.Serializer): | ||
| user_id = serializers.UUIDField(required=True, label=_('user id')) | ||
| workspace_id = serializers.CharField(required=True, label=_('workspace id')) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code contains numerous optimizations and improvements that can be made:
Here's an updated version of the code incorporating these improvements: from django.db import transaction, IntegrityError
from rest_framework.response import Response
from rest_framework.decorators import api_view
from .models import ToolWorkflow, ToolInstance, ToolExportModelSerializer
from .serializers import (OperateSerializer,
ImportToolSerializer,
ExportToolSerializer)
from .services.tool_service import (
handle_import_work_flow,
retrieve_export_instance
)
@api_view(['POST'])
def import_tool(request):
serializer = ImportToolSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
response = handle_import_work_flow(serializer.validated_data['workspace_id'],
request.user.id,
serializer.validated_data['knowledge_id'],
serializer.validated_data['file'].read())
return Response(response, status=response.status_code)
@api_view(['GET'])
def export_tool(request):
serializer = ExportToolSerializer(data=request.query_params)
if serializer.is_valid(raise_exception=True):
response = retrieve_export_instance(serializer.validated_data['tool_id'], serializer.validated_data['workspace_id'], request.user.id)
return Response(response, status=response.status_code)
class OperateSerializer(serializers.Serializer):
user_id = serializers.UUIDField(required=True, label=_('user id'))
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
# Ensure that services.py includes necessary imports and functionsKey Changes Made:
This refactoring should make the code cleaner, more modular, and robust while maintaining its functionality. |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are several issues, potential issues, and areas for optimization in the provided
ToolWorkflowSerializercode:Issues:
Transaction Management: The use of
@transaction.atomicon an entire method does not make much sense because it will rollback if any validation fails or other exceptions occur during execution. It's better to handle transactions at higher levels.Static Methods:
to_toolMethod: This method seems duplicated with some logic withinImport.export. Consider reducing redundancy.to_tool_workflowMethod: Similar to the above, this method has repeated logic that can be optimized by handling more cases inside a single function.Pickle Serialization/Deserialization: The
picklemodule is used for serialization/deserialization, which can introduce vulnerabilities if not properly handled. Using safer alternatives like JSON could improve security.Response Handling: The
result.error(...)call within an exception block doesn't seem to match the expected usage, suggesting there might be confusion about how responses should be formatted.File Upload Validation and Loading:
import_Method: Error handling when reading the file content usinginstance.get('file').read()could prevent unexpected behavior if the file is invalid or corrupted.Database Operations:
Tool Workflow Instance Creation:
code,folder_id) have default values set to non-string literals (e.g.,'default'). Ensure these default literals fit their intended types (str).UUID Generation Logic:
workspace_id.Potential Improvements:
Atomic Transactions: Use
atomicdecorator judiciously where necessary and avoid wrapping multiple actions into a single atomic transaction.Avoid Redundancy: Factor out common logic between methods wherever possible. For example, separate validation logic into its own method.
Secure Data Transmission: If file uploads are involved, ensure secure transmission by using HTTPS and validating uploaded files before processing them with
pickle.Optimize Database Queries: Where appropriate, optimize database queries and batch sizes to minimize load times.
Use JSON for File Content Storage: Prefer using JSON over pickling binary data to enhance security and reliability.
Here’s an improved version focusing on key points highlighted:
By making these improvements, you should observe overall better maintainability, safety, and performance.