suitenumerique · lunika · Apr 27, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,8 +6,14 @@ and this project adheres to
 
 ## [Unreleased]
 
+### Added
+
+- ✨(backend) create a dedicated endpoint to update document content 
+- ⚡️(backend) stream s3 file content with a dedicated endpoint
+
 ### Changed
 
+- ♻️(backend) rename documents content endpoint in `formatted-content` (BC)
 - 🚸(frontend) show Crisp from the help menu #2222
 - ♿️(frontend) structure correctly 5xx error alerts #2128
 - ♿️(frontend) make doc search result labels uniquely identifiable #2212
@@ -26,6 +32,7 @@ and this project adheres to
 ### Removed
 
 - 🔥(backend) remove deprecated descendants endpoint #2243
+- 🔥(backend) remove content in document responses
 
 ## [v4.8.6] - 2026-04-08
 

diff --git a/UPGRADE.md b/UPGRADE.md
@@ -16,6 +16,20 @@ the following command inside your docker container:
 
 ## [Unreleased]
 
+We made several changes around document content management leading to several breaking changes in the API.
+
+- The endpoint `/api/v1.0/documents/{document_id}/content/` has been renamed in `/api/v1.0/documents/{document_id}/formatted-content/`
+- There is no more `content` attribute in the response of `/api/v1.0/documents/{document_id}/`, two new endpoints have been added to retrieve or update the document content.
+- A new `GET /api/v1.0/documents/{document_id}/content/` endpoint has been implemented to fetch the document content ; this endpoint streams the whole content with a `text/plain` content-type response.
+- A new `PATCH /api/v1.0/documents/{document_id}/content/` endpoint has been added to update the document content ; expected payload is:
+```json
+{
+  "content": "document content in base64",
+}
+```
+
+Other changes:
+
 - The deprecated endpoint `/api/v1.0/documents/<document_id>/descendants` is removed. The search endpoint should be used instead.
 - Upgrade docspec dependency to version >= 3.0.0
   The docspec service has changed since version 3.0.0, we ware now compatible with this version and not with version 2.x.x anymore

diff --git a/src/backend/core/api/permissions.py b/src/backend/core/api/permissions.py
@@ -12,6 +12,7 @@
 ACTION_FOR_METHOD_TO_PERMISSION = {
     "versions_detail": {"DELETE": "versions_destroy", "GET": "versions_retrieve"},
     "children": {"GET": "children_list", "POST": "children_create"},
+    "content": {"PATCH": "content_patch", "GET": "content_retrieve"},
 }
 
 

diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py
@@ -16,7 +16,7 @@
 import magic
 from rest_framework import serializers
 
-from core import choices, enums, models, utils, validators
+from core import choices, enums, models, validators
 from core.services import mime_types
 from core.services.ai_services import AI_ACTIONS
 from core.services.converter_services import (
@@ -178,7 +178,6 @@ class Meta:
 class DocumentSerializer(ListDocumentSerializer):
     """Serialize documents with all fields for display in detail views."""
 
-    content = serializers.CharField(required=False)
     websocket = serializers.BooleanField(required=False, write_only=True)
     file = serializers.FileField(
         required=False, write_only=True, allow_null=True, max_length=255
@@ -193,7 +192,6 @@ class Meta:
             "ancestors_link_role",
             "computed_link_reach",
             "computed_link_role",
-            "content",
             "created_at",
             "creator",
             "deleted_at",
@@ -242,13 +240,6 @@ def get_fields(self):
         if request:
             if request.method == "POST":
                 fields["id"].read_only = False
-            if (
-                serializers.BooleanField().to_internal_value(
-                    request.query_params.get("without_content", False)
-                )
-                is True
-            ):
-                del fields["content"]
 
         return fields
 
@@ -265,18 +256,6 @@ def validate_id(self, value):
 
         return value
 
-    def validate_content(self, value):
-        """Validate the content field."""
-        if not value:
-            return None
-
-        try:
-            b64decode(value, validate=True)
-        except binascii.Error as err:
-            raise serializers.ValidationError("Invalid base64 content.") from err
-
-        return value
-
     def validate_file(self, file):
         """Add file size and type constraints as defined in settings."""
         if not file:
@@ -310,52 +289,33 @@ def update(self, instance, validated_data):
             return instance  # No data provided, skip the update
         return super().update(instance, validated_data)
 
-    def save(self, **kwargs):
-        """
-        Process the content field to extract attachment keys and update the document's
-        "attachments" field for access control.
-        """
-        content = self.validated_data.get("content", "")
-        extracted_attachments = set(utils.extract_attachments(content))
 
-        existing_attachments = (
-            set(self.instance.attachments or []) if self.instance else set()
-        )
-        new_attachments = extracted_attachments - existing_attachments
+class DocumentContentSerializer(serializers.Serializer):
+    """Serializer for updating only the raw content of a document stored in S3."""
 
-        if new_attachments:
-            attachments_documents = (
-                models.Document.objects.filter(
-                    attachments__overlap=list(new_attachments)
-                )
-                .only("path", "attachments")
-                .order_by("path")
-            )
+    content = serializers.CharField(required=True)
+    websocket = serializers.BooleanField(required=False)
 
-            user = self.context["request"].user
-            readable_per_se_paths = (
-                models.Document.objects.readable_per_se(user)
-                .order_by("path")
-                .values_list("path", flat=True)
-            )
-            readable_attachments_paths = utils.filter_descendants(
-                [doc.path for doc in attachments_documents],
-                readable_per_se_paths,
-                skip_sorting=True,
-            )
+    def validate_content(self, value):
+        """Validate the content field."""
+        try:
+            b64decode(value, validate=True)
+        except binascii.Error as err:
+            raise serializers.ValidationError("Invalid base64 content.") from err
 
-            readable_attachments = set()
-            for document in attachments_documents:
-                if document.path not in readable_attachments_paths:
-                    continue
-                readable_attachments.update(set(document.attachments) & new_attachments)
+        return value
 
-            # Update attachments with readable keys
-            self.validated_data["attachments"] = list(
-                existing_attachments | readable_attachments
-            )
+    def update(self, instance, validated_data):
+        """
+        This serializer does not support updates.
+        """
+        raise NotImplementedError("Update is not supported for this serializer.")
 
-        return super().save(**kwargs)
+    def create(self, validated_data):
+        """
+        This serializer does not support create.
+        """
+        raise NotImplementedError("Create is not supported for this serializer.")
 
 
 class DocumentAccessSerializer(serializers.ModelSerializer):

diff --git a/src/backend/core/api/utils.py b/src/backend/core/api/utils.py
@@ -194,3 +194,8 @@ def get_ident(self, request):
             if x_forwarded_for
             else request.META.get("REMOTE_ADDR")
         )
+
+
+def get_content_metadata_cache_key(document_id):
+    """Return the cache key used to store content metadata."""
+    return f"docs:content-metadata:{document_id!s}"