Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions apps/dataset/serializers/dataset_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,11 +455,13 @@ def save(self, instance: Dict, with_valid=True):
# 批量插入关联问题
QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len(
problem_paragraph_mapping_list) > 0 else None

# 响应数据
return {**DataSetSerializers(dataset).data,
'document_list': DocumentSerializers.Query(data={'dataset_id': dataset_id}).list(
with_valid=True)}, dataset_id
'user_id': user_id,
'document_list': document_model_list,
"document_count": len(document_model_list),
"char_length": reduce(lambda x, y: x + y, [d.char_length for d in document_model_list],
0)}, dataset_id

@staticmethod
def get_last_url_path(url):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Certainly! Here are some observations and improvements for the provided code:

  1. Code Structure: The function save has multiple responsibilities—saving, handling associations, and constructing the response—but it's split into two separate functions save_with_association and get_response. This can be refactored to merge these functionalities where appropriate.

  2. Bulk Insertion Logic: You're correctly using QuerySet.bulk_create, but consider adding a try-except block around it to handle any potential errors during bulk insertion, such as database constraint violations or other exceptions.

  3. Response Data:

    • Add checks to ensure that document_model_list is not empty before accessing its attributes like char_length.
    • Use type hints (Dict) explicitly where possible to clarify the expected types of inputs and outputs.
  4. User ID Handling: Ensure that user_id is properly set when returning the response.

Here’s an improved version of the code based on the above points:

from functools import reduce

def save(self, instance: Dict, with_valid=True):
    # Assume DataSetSerializers and ProblemParagraphMapping exist elsewhere in the code
    dataset_serializers = DataSetSerializers(instance)
    dataset_responses = {
        'id': instance.get('id')
    }

    problems = None
    if with_valid:
        problems = self._create_problems_with_relation(instance)

    user_id = None  # Define this variable somewhere in your class or pass it via arguments
    document_mappings = self._generate_document_mappings(instance, with_valid)

    if problems:
        dataset_responses.update({
            'problems': problems,
            'document_relations': document_mappings,
            'dataset_list': DocumentSerializers.Query(data={'dataset_id': instance['id']}).list(with_valid=True)}
        )
    
    return dataset_responses

@staticmethod
def _create_problems_with_relation(dataset_data: dict) -> list:
    problems = []
    # Implement logic to create new problems and their mappings here
    return problems

@staticmethod
def _generate_document_mappings(dataset_data: dict, with_valid=True) -> List[dict]:
    document_models = []  # Fetch all document models related to the dataset
    
    if with_valid:
        mapping_list = []
        for doc in document_models:
            mapping_dict = {'doc_id': doc.id, 'dataset_id': doc.dataset_id}
            mapping_list.append(mapping_dict)
        
        QuerySet(ProblemParagraphMapping).bulk_create(mapping_list)
    
    for doc in document_models:
        document_responses = {
            'id': str(doc.id),
            'name': doc.name,
            'description': doc.description,
            'user_id': user_id,  # Assuming user_id is defined somewhere else
            'paragraphs': doc.paragraphs
        }
        document_responses['size'] = sum(len(x) for x in [resp["description"] for resp in (document_responses)] * len([x.split("\n") for x in document_responses]))
        document_requests = ResponseDocumentsSerializer.create_instances(
                data=[docResponses]
                )[0]  
        document_responses.pop("paragraphs")
        return document_requests

@staticmethod
def save_with_association(instance: Dict, with_valid=True):
    problem_paragraph_mapping_list = [
        ProblemParagraphMapping(...) for ... in ...
    ]
    if len(problem_paragraph_mapping_list) > 0:
        BulkCreateManager.objects.bulk_create(problem_paragraph_mapping_list)

@staticmethod
def get_response(data: dict, dataset_id, with_valid=True) -> Dict[str, Any]:
    return {
        **DataSetSerializers(data).data,
        'document_list': DocumentListSerializer.data(data),
        "document_count": len(DocumentListSerializer.data(data)),
        "char_length": sum(reduce(lambda x, y: x + y, [d.char_length for d in Data]), 0)}, dataset_id

These changes aim to improve clarity, maintainability, and robustness of the original code. Adjustments might depend heavily on actual context and classes used within the surrounding environment.

Expand Down