diff --git a/.gitignore b/.gitignore index e5e8139..17a7739 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ test.txt.tmp # VSCode .vscode +test_queue/* diff --git a/docs/source/decisions/0002-add-submission-file-model.rst b/docs/source/decisions/0002-add-submission-file-model.rst new file mode 100644 index 0000000..bb6cade --- /dev/null +++ b/docs/source/decisions/0002-add-submission-file-model.rst @@ -0,0 +1,77 @@ +2. File Handling Implementation for Submission System +##################################################### + +Status +****** + +**Provisional** *2025-02-14* + +Implemented by https://github.com/openedx/edx-submissions/pull/286 + +Context +******* + +As part of the XQueue migration effort detailed in ADR 0001, we need to implement a file handling system within edx-submissions. Currently, XQueue manages file submissions through a tightly coupled approach. + +### Current Limitations + +1. **Inadequate File Management**: XQueue's approach relies on JSON strings in character fields, with size constraints and manual URL manipulation for file handling. + +2. **Process Inefficiencies**: The current system uses synchronous HTTP for file retrieval, lacks proper validation, and tightly couples submission processing with file handling. + +3. **Integration Challenges**: External graders depend on specific URL formats with HTTP-based retrieval, embedding file information directly in submission payloads. + +Decision +******** + +We will implement a dedicated file management system for the assessment submission process, focusing on workflow and educational needs: + +1. **Centralized Storage**: Create a unified repository for student-submitted files, ensuring they are properly associated with their assessments and accessible throughout the grading process. + +2. **Streamlined Workflow**: Design a clear process where files are automatically processed during submission creation, securely stored, and efficiently delivered to grading systems. + +3. **Consistent Experience**: Maintain compatibility with existing systems to ensure a smooth transition, allowing instructors and external graders to access files without changes to their established workflows. + +Consequences +************ + +Positive: +--------- + +1. **Architecture**: Clean separation of concerns, improved maintainability, better error handling, optimized database access + +2. **Integration**: Seamless xqueue-watcher compatibility, support for existing workflows, minimal client code changes + +3. **Operations**: Robust file validation, improved tracking, better error visibility, simplified lifecycle management + +Negative: +--------- + +1. **Technical**: Additional database structures + +2. **Migration**: Temporary system complexity, additional testing needs + +3. **Performance**: File processing overhead + +References +********** + +Current System Documentation: + * XQueue Repository: https://github.com/openedx/xqueue + * XQueue Watcher Repository: https://github.com/openedx/xqueue-watcher + +Related Repositories: + * edx-submissions: https://github.com/openedx/edx-submissions + * edx-platform: https://github.com/openedx/edx-platform + * XQueue Repository: https://github.com/openedx/xqueue + +Related Documentation: + * ADR 0001: Creation of ExternalGraderDetail Model for XQueue Migration + +Future Event Integration: + * Open edX Events Framework: https://github.com/openedx/openedx-events + * Event Bus Documentation: https://openedx.atlassian.net/wiki/spaces/AC/pages/124125264/Event+Bus + +Related Architecture Documents: + * Open edX Architecture Guidelines: https://openedx.atlassian.net/wiki/spaces/AC/pages/124125264/Architecture+Guidelines + diff --git a/settings.py b/settings.py index d718f05..07ec8b7 100644 --- a/settings.py +++ b/settings.py @@ -82,3 +82,11 @@ ) TEST_APPS = ('submissions',) + +EDX_SUBMISSIONS = { + 'MEDIA': { + 'BACKEND': 'django.core.files.storage.InMemoryStorage', + 'OPTIONS': { + } + } +} diff --git a/submissions/__init__.py b/submissions/__init__.py index 9f433d6..9ac2b64 100644 --- a/submissions/__init__.py +++ b/submissions/__init__.py @@ -1,2 +1,2 @@ """ API for creating submissions and scores. """ -__version__ = '3.10.1' +__version__ = '3.11.0' diff --git a/submissions/api.py b/submissions/api.py index d5eb4cd..40f4698 100644 --- a/submissions/api.py +++ b/submissions/api.py @@ -15,7 +15,6 @@ # SubmissionError imported so that code importing this api has access from submissions.errors import ( # pylint: disable=unused-import ExternalGraderQueueEmptyError, - SubmissionError, SubmissionInternalError, SubmissionNotFoundError, SubmissionRequestError @@ -28,6 +27,7 @@ ScoreSummary, StudentItem, Submission, + SubmissionFile, score_reset, score_set ) @@ -49,7 +49,6 @@ TOP_SUBMISSIONS_CACHE_TIMEOUT = 300 -# pylint: disable=unused-argument def create_external_grader_detail(student_item_dict, answer, queue_name: str, @@ -58,7 +57,7 @@ def create_external_grader_detail(student_item_dict, **external_grader_additional_data ): """ - Creates a submission and an associated ExternalGraderDetail record. + Creates a submission and an associated ExternalGraderDetail record with optional file attachments. Args: student_item_dict (dict): The student_item this submission is associated with. @@ -73,13 +72,28 @@ def create_external_grader_detail(student_item_dict, points_possible (int, optional): The maximum possible points for this submission. Defaults to 1. external_grader_additional_data: Additional keyword arguments that may be used for the external grader. + If a 'files' dictionary is included, files will be processed and stored as SubmissionFile objects. + The 'files' dictionary should map filenames to file objects (typically FileObjForWebobFiles). Returns: ExternalGraderDetail: The created external grader detail record that references the submission. + Any processed files are accessible through the `files` related_name on this object. Raises: ExternalGraderQueueEmptyError: If queue_name is empty. SubmissionInternalError: If there's an error creating the submission or external grader detail. + + Example: + >>> files = {'assignment.py': file_obj} + >>> external_grader = create_external_grader_detail( + ... student_item_dict, + ... answer, + ... 'python_grader', + ... files=files + ... ) + >>> # Access files through external_grader.files.all() + >>> # Get URLs for external systems: + >>> urls = {f.original_filename: f.xqueue_url for f in external_grader.files.all()} """ submission = create_submission(student_item_dict, answer) @@ -93,9 +107,20 @@ def create_external_grader_detail(student_item_dict, submission_uuid=submission_uuid, queue_name=queue_name, grader_file_name=grader_file_name, - points_possible=points_possible, + points_possible=points_possible) + + files_dict = external_grader_additional_data.get('files') + if files_dict: + + files_urls = {} + for filename, file_obj in files_dict.items(): + submission_file = SubmissionFile.objects.create( + external_grader=instance, + file=file_obj.file, + original_filename=filename + ) + files_urls[filename] = submission_file.xqueue_url - ) return instance except DatabaseError as error: @@ -106,6 +131,16 @@ def create_external_grader_detail(student_item_dict, raise SubmissionInternalError(error_message) from error +def get_files_for_grader(external_grader): + """ + Returns files in format expected by xqueue-watcher when is calling by get_submission endpoint. + """ + return { + file.original_filename: file.file.url + for file in external_grader.files.all() + } + + def create_submission( student_item_dict, answer, diff --git a/submissions/migrations/0005_submissionfile.py b/submissions/migrations/0005_submissionfile.py new file mode 100644 index 0000000..ddb5e70 --- /dev/null +++ b/submissions/migrations/0005_submissionfile.py @@ -0,0 +1,33 @@ +# Generated by Django 4.2.19 on 2025-05-15 14:14 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import submissions.models +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('submissions', '0004_externalgraderdetail'), + ] + + operations = [ + migrations.CreateModel( + name='SubmissionFile', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uuid', models.UUIDField(default=uuid.uuid4, editable=False)), + ('file', models.FileField(max_length=512, storage=submissions.models.get_storage, + upload_to=submissions.models.submission_file_path)), + ('original_filename', models.CharField(max_length=255)), + ('created_at', models.DateTimeField(default=django.utils.timezone.now)), + ('external_grader', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, + related_name='files', to='submissions.externalgraderdetail')), + ], + options={ + 'indexes': [models.Index(fields=['external_grader', 'uuid'], name='submissions_externa_ff8089_idx')], + }, + ), + ] diff --git a/submissions/models.py b/submissions/models.py index 25bbfa0..78d3cdd 100644 --- a/submissions/models.py +++ b/submissions/models.py @@ -9,15 +9,19 @@ ./manage.py makemigrations submissions """ +import functools import logging +import os from datetime import timedelta from uuid import uuid4 from django.conf import settings from django.contrib import auth +from django.core.files.storage import default_storage from django.db import DatabaseError, models, transaction from django.db.models.signals import post_save, pre_save from django.dispatch import Signal, receiver +from django.utils.module_loading import import_string from django.utils.timezone import now from jsonfield import JSONField from model_utils.models import TimeStampedModel @@ -693,3 +697,113 @@ def update_status(self, new_status): def create_from_uuid(cls, submission_uuid, **kwargs): submission = Submission.objects.get(uuid=submission_uuid) return cls.objects.create(submission=submission, **kwargs) + + +def submission_file_path(instance, _): + """ + Generate file path for submission files. + Format: queue_name/uuid + The filename is replaced with the UUID to ensure uniqueness without preserving extension. + """ + return os.path.join( + instance.external_grader.queue_name, + f"{instance.uuid}" + ) + + +@functools.cache +def _get_storage_cached(): + """ + Cached implementation to get the configured storage backend. + + This private function loads storage configuration from settings and + dynamically instantiates the specified storage backend. It expects + EDX_SUBMISSIONS['MEDIA'] to be a dict with 'BACKEND' (string path to + storage class) and optional 'OPTIONS' (dict of parameters). + + This function is for internal use only and is cached to improve performance. + """ + edx_submissions_config = getattr(settings, 'EDX_SUBMISSIONS', {}) + storage_config = edx_submissions_config.get('MEDIA') + + if storage_config: + storage_cls = import_string(storage_config['BACKEND']) + options = storage_config.get('OPTIONS', {}) + return storage_cls(**options) + + return default_storage + + +def get_storage(): + """ + Get the configured storage backend or fallback to default storage. + + This function checks for a storage configuration in the Django settings. + It first looks for 'MEDIA' in the 'EDX_SUBMISSIONS' configuration dictionary. + + The function uses an internal cached implementation while remaining + serializable for Django migrations, avoiding "Cannot serialize" errors. + + Returns: + Storage instance: Returns the configured storage if found in EDX_SUBMISSIONS['MEDIA'], + otherwise returns Django's default_storage. + + Example: + # In settings.py + EDX_SUBMISSIONS = { + 'MEDIA': { + 'BACKEND': 'storages.backends.s3boto3.S3Boto3Storage', + 'OPTIONS': { + 'bucket_name': 'my-bucket' + } + } + } + + # Then get_storage() will return an S3Boto3Storage instance + """ + return _get_storage_cached() # For performance while keeping this function serializable for migrations + + +class SubmissionFile(models.Model): + """ + Model to handle files associated with submissions + """ + uuid = models.UUIDField(default=uuid4, editable=False) # legacy S3 key + external_grader = models.ForeignKey( + 'submissions.ExternalGraderDetail', + on_delete=models.SET_NULL, + related_name='files', + null=True, + ) + file = models.FileField( + upload_to=submission_file_path, + max_length=512, + storage=get_storage + ) + original_filename = models.CharField(max_length=255) # This is necessary to send file name to xqueue-watcher + created_at = models.DateTimeField(default=now) + + class Meta: + indexes = [ + models.Index(fields=['external_grader', 'uuid']), + ] + + @property + def xqueue_url(self): + """ + Returns a URL in the XQueue-compatible format: /queue_name/uuid + + This format is used for file references in both the legacy XQueue system + and the new integrated standard. It maintains backward compatibility + while supporting the migration from the external XQueue API to the + integrated Open edX solution. + + The URL follows the pattern: /{queue_name}/{submission_uuid} + where: + - queue_name: identifies the external grader queue + - uuid: uniquely identifies this submission (legacy S3 key) + + Returns: + str: Formatted URL path following XQueue conventions + """ + return f"/{self.external_grader.queue_name}/{self.uuid}" diff --git a/submissions/tests/test_api.py b/submissions/tests/test_api.py index a64b659..58a2b44 100644 --- a/submissions/tests/test_api.py +++ b/submissions/tests/test_api.py @@ -10,6 +10,8 @@ import pytz # Django imports from django.core.cache import cache +from django.core.files.base import ContentFile +from django.core.files.uploadedfile import SimpleUploadedFile from django.db import DatabaseError, IntegrityError, connection, transaction from django.test import TestCase from django.utils.timezone import now @@ -17,8 +19,17 @@ # Local imports from submissions import api +from submissions.api import create_external_grader_detail, get_files_for_grader from submissions.errors import ExternalGraderQueueEmptyError, SubmissionInternalError -from submissions.models import ExternalGraderDetail, ScoreAnnotation, ScoreSummary, StudentItem, Submission, score_set +from submissions.models import ( + ExternalGraderDetail, + ScoreAnnotation, + ScoreSummary, + StudentItem, + Submission, + SubmissionFile, + score_set +) from submissions.serializers import StudentItemSerializer STUDENT_ITEM = { @@ -923,3 +934,188 @@ def test_create_multiple_submission_external_grader_details(self): self.assertEqual(submission2.answer, ANSWER_TWO) self.assertNotEqual(external_grader_detail1.submission.uuid, external_grader_detail2.submission.uuid) + + def test_create_external_grader_detail_with_files(self): + """Test creating a queue record with file handling.""" + + class MockFileObj: + def __init__(self, file_content): + self.file = SimpleUploadedFile( + "test.txt", + file_content, + content_type="text/plain" + ) + + test_file = MockFileObj(b"test content") + + event_data = { + 'queue_name': 'test_queue', + 'files': {'test.txt': test_file} + } + + external_grader_detail = api.create_external_grader_detail(STUDENT_ITEM, ANSWER_ONE, **event_data) + + self.assertEqual(external_grader_detail.queue_name, 'test_queue') + self.assertEqual(external_grader_detail.files.count(), 1) + submission_file = external_grader_detail.files.first() + self.assertEqual(submission_file.original_filename, 'test.txt') + + def test_create_external_grader_detail_with_multiple_files(self): + """Test creating a queue record with multiple files.""" + + class MockFileObj: + def __init__(self, filename, content): + self.file = SimpleUploadedFile( + filename, + content, + content_type="text/plain" + ) + + test_files = { + 'test1.txt': MockFileObj("test1.txt", b"test content 1"), + 'test2.txt': MockFileObj("test2.txt", b"test content 2") + } + + external_grader_detail = { + 'queue_name': 'test_queue', + 'files': test_files + } + external_grader_detail = api.create_external_grader_detail(STUDENT_ITEM, ANSWER_ONE, **external_grader_detail) + + self.assertEqual(external_grader_detail.files.count(), 2) + filenames = set(external_grader_detail.files.values_list('original_filename', flat=True)) + self.assertEqual(filenames, {'test1.txt', 'test2.txt'}) + + def test_create_external_grader_detail_without_files(self): + """Test creating a queue record without any files still works.""" + external_grader_instance = api.create_external_grader_detail(STUDENT_ITEM, ANSWER_ONE, queue_name="test_queue") + self.assertEqual(external_grader_instance.queue_name, 'test_queue') + self.assertEqual(external_grader_instance.files.count(), 0) + + +class TestExternalGraderFileProcessing(TestCase): + """ + Test the file processing functionality for external graders. + """ + + def setUp(self): + """Set up common test data.""" + self.student_item_dict = { + "student_id": "test_student", + "course_id": "test_course", + "item_id": "test_item", + "item_type": "test_type" + } + self.answer = "test answer" + self.queue_name = "test_queue" + + def test_create_external_grader_with_files(self): + """Test processing files within create_external_grader_detail.""" + + class FileObjWithFileAttr: + def __init__(self): + self.file = SimpleUploadedFile("test.txt", b"test content") + + file_obj = FileObjWithFileAttr() + files_dict = {"test.txt": file_obj} + + # Create external grader with files + instance = create_external_grader_detail( + student_item_dict=self.student_item_dict, + answer=self.answer, + queue_name=self.queue_name, + files=files_dict + ) + + # Verify SubmissionFile was created correctly + submission_file = SubmissionFile.objects.get( + external_grader=instance, + original_filename="test.txt" + ) + self.assertTrue(submission_file.xqueue_url.startswith(f"/{self.queue_name}/")) + + def test_get_files_for_grader(self): + """Test the standalone get_files_for_grader function.""" + + class FileObjWithFileAttr: + def __init__(self): + self.file = SimpleUploadedFile("test.txt", b"test content") + + # Create external grader with a file + instance = create_external_grader_detail( + student_item_dict=self.student_item_dict, + answer=self.answer, + queue_name=self.queue_name, + files={"test.txt": FileObjWithFileAttr()} + ) + + # Test get_files_for_grader function + grader_files = get_files_for_grader(instance) + self.assertEqual(len(grader_files), 1) + self.assertIn("test.txt", grader_files) + self.assertTrue(isinstance(grader_files["test.txt"], str)) + + def test_multiple_files_processing(self): + """Test processing multiple files through create_external_grader_detail.""" + + class FileObjWithFileAttr1: + def __init__(self): + self.file = SimpleUploadedFile("test1.txt", b"content1") + + class FileObjWithFileAttr2: + def __init__(self): + self.file = SimpleUploadedFile("test2.txt", b"content2") + + files_dict = { + "test1.txt": FileObjWithFileAttr1(), + "test2.txt": FileObjWithFileAttr2() + } + + # Create external grader with multiple files + instance = create_external_grader_detail( + student_item_dict=self.student_item_dict, + answer=self.answer, + queue_name=self.queue_name, + files=files_dict + ) + + # Verify both files were created + submission_files = SubmissionFile.objects.filter(external_grader=instance) + self.assertEqual(submission_files.count(), 2) + + # Test get_files_for_grader + grader_files = get_files_for_grader(instance) + self.assertEqual(len(grader_files), 2) + self.assertIn("test1.txt", grader_files) + self.assertIn("test2.txt", grader_files) + + def test_complete_file_processing_flow(self): + """ + Test the complete flow of processing a file through to SubmissionFile creation. + """ + + class FileObjWithFileAttr: + def __init__(self): + self.file = ContentFile(b"test binary content", name="complete_test.txt") + + files_dict = {"complete_test.txt": FileObjWithFileAttr()} + + # Create external grader with file + instance = create_external_grader_detail( + student_item_dict=self.student_item_dict, + answer=self.answer, + queue_name=self.queue_name, + files=files_dict + ) + + # Verify SubmissionFile was created correctly + submission_file = SubmissionFile.objects.get( + external_grader=instance, + original_filename="complete_test.txt" + ) + + # Verify URL format + self.assertTrue(submission_file.xqueue_url.startswith(f"/{self.queue_name}/")) + + # Verify file content is accessible + self.assertEqual(submission_file.file.read(), b"test binary content") diff --git a/submissions/tests/test_models.py b/submissions/tests/test_models.py index ddce5a0..30ff51c 100644 --- a/submissions/tests/test_models.py +++ b/submissions/tests/test_models.py @@ -4,9 +4,12 @@ import time from datetime import datetime, timedelta from unittest import mock +from unittest.mock import patch import pytest from django.contrib import auth +from django.core.files.storage import default_storage +from django.core.files.uploadedfile import SimpleUploadedFile from django.test import TestCase from django.utils.timezone import now from pytz import UTC @@ -20,7 +23,10 @@ ScoreSummary, StudentItem, Submission, - TeamSubmission + SubmissionFile, + TeamSubmission, + _get_storage_cached, + get_storage ) User = auth.get_user_model() @@ -687,3 +693,133 @@ def test_submission_mutability(self): self.assertEqual(re_fetched.answer, new_answer) self.assertEqual(re_fetched.attempt_number, new_attempt) self.assertEqual(re_fetched.submitted_at, new_time) + + +class TestSubmissionFile(TestCase): + """ + Test the SubmissionFile model functionality. + """ + + def setUp(self): + """Set up common test data.""" + + self.student_item = StudentItem.objects.create( + student_id="test_student", + course_id="test_course", + item_id="test_item" + ) + self.submission = Submission.objects.create( + student_item=self.student_item, + answer="test answer", + attempt_number=1 + ) + self.external_grader_detail = ExternalGraderDetail.objects.create( + submission=self.submission, + queue_name="test_queue" + ) + + self.test_file = SimpleUploadedFile( + "test_file.txt", + b"test content", + content_type="text/plain" + ) + + self.submission_file = SubmissionFile.objects.create( + external_grader=self.external_grader_detail, + file=self.test_file, + original_filename="test_file.txt" + ) + + def test_create_submission_file(self): + """Test basic submission file creation.""" + self.assertIsNotNone(self.submission_file.uuid) + self.assertEqual(self.submission_file.original_filename, "test_file.txt") + self.assertEqual(self.submission_file.external_grader, self.external_grader_detail) + self.assertIsNotNone(self.submission_file.created_at) + + def test_xqueue_url_format(self): + """Test that xqueue_url property returns the correct format.""" + expected_url = f"/{self.external_grader_detail.queue_name}/{self.submission_file.uuid}" + self.assertEqual(self.submission_file.xqueue_url, expected_url) + + def test_multiple_files_per_submission(self): + """Test that multiple files can be associated with a single submission.""" + second_file = SimpleUploadedFile( + "second_file.txt", + b"more content", + content_type="text/plain" + ) + + second_submission_file = SubmissionFile.objects.create( + external_grader=self.external_grader_detail, + file=second_file, + original_filename="second_file.txt" + ) + + files = self.external_grader_detail.files.all() + self.assertEqual(files.count(), 2) + self.assertIn(self.submission_file, files) + self.assertIn(second_submission_file, files) + + def test_file_path_generation(self): + """Test that files are stored with the correct path structure.""" + file_path = self.submission_file.file.name + expected_parts = [ + self.external_grader_detail.queue_name, + str(self.submission_file.uuid) + ] + + for part in expected_parts: + self.assertIn(part, file_path) + + def test_unique_uids(self): + """Test that each file gets a unique UUID.""" + second_file = SimpleUploadedFile( + "another_file.txt", + b"content", + content_type="text/plain" + ) + + second_submission_file = SubmissionFile.objects.create( + external_grader=self.external_grader_detail, + file=second_file, + original_filename="another_file.txt" + ) + + self.assertNotEqual( + self.submission_file.uuid, + second_submission_file.uuid + ) + + def test_related_name_access(self): + """Test accessing files through the submission queue record.""" + files = self.external_grader_detail.files.all() + self.assertEqual(files.count(), 1) + self.assertEqual(files.first(), self.submission_file) + + def test_ordering(self): + """Test that files are ordered by created_at.""" + past_time = now() - timedelta(hours=1) + + older_file = SimpleUploadedFile( + "older_file.txt", + b"old content", + content_type="text/plain" + ) + older_submission_file = SubmissionFile.objects.create( + external_grader=self.external_grader_detail, + file=older_file, + original_filename="older_file.txt", + created_at=past_time + ) + + files = self.external_grader_detail.files.all() + self.assertEqual(files[0], self.submission_file) + self.assertEqual(files[1], older_submission_file) + + def test_get_storage_default_with_mock(self): + """Test that get_storage returns default_storage when no custom configuration exists.""" + _get_storage_cached.cache_clear() + + with patch('submissions.models.getattr', return_value={}): + self.assertEqual(get_storage(), default_storage)