-
Notifications
You must be signed in to change notification settings - Fork 583
Refactor document content management #2171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
207f214
d7a186a
51d4746
6b3d197
1c2bafb
68f1600
b6c6fc8
6f2cd8a
4d250a7
ff2c61a
100817b
a00c512
5e31eb0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,20 @@ the following command inside your docker container: | |
|
|
||
| ## [Unreleased] | ||
|
|
||
| We made several changes around document content management leading to several breaking changes in the API. | ||
|
|
||
| - The endpoint `/api/v1.0/documents/{document_id}/content/` has been renamed in `/api/v1.0/documents/{document_id}/formatted-content/` | ||
| - There is no more `content` attribute in the response of `/api/v1.0/documents/{document_id}/`, two new endpoints have been added to retrieve or update the document content. | ||
| - A new `GET /api/v1.0/documents/{document_id}/content/` endpoint has been implemented to fetch the document content ; this endpoint streams the whole content with a `text/plain` content-type response. | ||
| - A new `PATCH /api/v1.0/documents/{document_id}/content/` endpoint has been added to update the document content ; expected payload is: | ||
| ```json | ||
| { | ||
| "content": "document content in base64", | ||
| } | ||
| ``` | ||
|
Comment on lines
+24
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix invalid JSON in the payload example and add the missing blank line before the fence. The example payload contains a trailing comma after the last property, which is not valid JSON and will mislead integrators copy-pasting from the upgrade guide. Also, per markdownlint MD031 (flagged by static analysis), fenced code blocks should be surrounded by blank lines. 📝 Proposed fix - A new `PATCH /api/v1.0/documents/{document_id}/content/` endpoint has been added to update the document content ; expected payload is:
+
```json
{
- "content": "document content in base64",
+ "content": "document content in base64"
}🧰 Tools🪛 markdownlint-cli2 (0.22.1)[warning] 25-25: Fenced code blocks should be surrounded by blank lines (MD031, blanks-around-fences) 🤖 Prompt for AI Agents |
||
|
|
||
| Other changes: | ||
|
|
||
| - The deprecated endpoint `/api/v1.0/documents/<document_id>/descendants` is removed. The search endpoint should be used instead. | ||
| - Upgrade docspec dependency to version >= 3.0.0 | ||
| The docspec service has changed since version 3.0.0, we ware now compatible with this version and not with version 2.x.x anymore | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
| import magic | ||
| from rest_framework import serializers | ||
|
|
||
| from core import choices, enums, models, utils, validators | ||
| from core import choices, enums, models, validators | ||
| from core.services import mime_types | ||
| from core.services.ai_services import AI_ACTIONS | ||
| from core.services.converter_services import ( | ||
|
|
@@ -178,7 +178,6 @@ class Meta: | |
| class DocumentSerializer(ListDocumentSerializer): | ||
| """Serialize documents with all fields for display in detail views.""" | ||
|
|
||
| content = serializers.CharField(required=False) | ||
| websocket = serializers.BooleanField(required=False, write_only=True) | ||
| file = serializers.FileField( | ||
| required=False, write_only=True, allow_null=True, max_length=255 | ||
|
|
@@ -193,7 +192,6 @@ class Meta: | |
| "ancestors_link_role", | ||
| "computed_link_reach", | ||
| "computed_link_role", | ||
| "content", | ||
| "created_at", | ||
| "creator", | ||
| "deleted_at", | ||
|
|
@@ -242,13 +240,6 @@ def get_fields(self): | |
| if request: | ||
| if request.method == "POST": | ||
| fields["id"].read_only = False | ||
| if ( | ||
| serializers.BooleanField().to_internal_value( | ||
| request.query_params.get("without_content", False) | ||
| ) | ||
| is True | ||
| ): | ||
| del fields["content"] | ||
|
|
||
| return fields | ||
|
|
||
|
|
@@ -265,18 +256,6 @@ def validate_id(self, value): | |
|
|
||
| return value | ||
|
|
||
| def validate_content(self, value): | ||
| """Validate the content field.""" | ||
| if not value: | ||
| return None | ||
|
|
||
| try: | ||
| b64decode(value, validate=True) | ||
| except binascii.Error as err: | ||
| raise serializers.ValidationError("Invalid base64 content.") from err | ||
|
|
||
| return value | ||
|
|
||
| def validate_file(self, file): | ||
| """Add file size and type constraints as defined in settings.""" | ||
| if not file: | ||
|
|
@@ -310,52 +289,33 @@ def update(self, instance, validated_data): | |
| return instance # No data provided, skip the update | ||
| return super().update(instance, validated_data) | ||
|
|
||
| def save(self, **kwargs): | ||
|
lunika marked this conversation as resolved.
|
||
| """ | ||
| Process the content field to extract attachment keys and update the document's | ||
| "attachments" field for access control. | ||
| """ | ||
| content = self.validated_data.get("content", "") | ||
| extracted_attachments = set(utils.extract_attachments(content)) | ||
|
|
||
| existing_attachments = ( | ||
| set(self.instance.attachments or []) if self.instance else set() | ||
| ) | ||
| new_attachments = extracted_attachments - existing_attachments | ||
| class DocumentContentSerializer(serializers.Serializer): | ||
|
lunika marked this conversation as resolved.
|
||
| """Serializer for updating only the raw content of a document stored in S3.""" | ||
|
|
||
| if new_attachments: | ||
| attachments_documents = ( | ||
| models.Document.objects.filter( | ||
| attachments__overlap=list(new_attachments) | ||
| ) | ||
| .only("path", "attachments") | ||
| .order_by("path") | ||
| ) | ||
| content = serializers.CharField(required=True) | ||
| websocket = serializers.BooleanField(required=False) | ||
|
|
||
| user = self.context["request"].user | ||
| readable_per_se_paths = ( | ||
| models.Document.objects.readable_per_se(user) | ||
| .order_by("path") | ||
| .values_list("path", flat=True) | ||
| ) | ||
| readable_attachments_paths = utils.filter_descendants( | ||
| [doc.path for doc in attachments_documents], | ||
| readable_per_se_paths, | ||
| skip_sorting=True, | ||
| ) | ||
| def validate_content(self, value): | ||
| """Validate the content field.""" | ||
| try: | ||
| b64decode(value, validate=True) | ||
| except binascii.Error as err: | ||
| raise serializers.ValidationError("Invalid base64 content.") from err | ||
|
Comment on lines
+301
to
+304
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Throttle the content PATCH endpoint independently. This serializer powers the new 🤖 Prompt for AI Agents |
||
|
|
||
| readable_attachments = set() | ||
| for document in attachments_documents: | ||
| if document.path not in readable_attachments_paths: | ||
| continue | ||
| readable_attachments.update(set(document.attachments) & new_attachments) | ||
| return value | ||
|
lunika marked this conversation as resolved.
|
||
|
|
||
| # Update attachments with readable keys | ||
| self.validated_data["attachments"] = list( | ||
| existing_attachments | readable_attachments | ||
| ) | ||
| def update(self, instance, validated_data): | ||
| """ | ||
| This serializer does not support updates. | ||
| """ | ||
| raise NotImplementedError("Update is not supported for this serializer.") | ||
|
|
||
| return super().save(**kwargs) | ||
| def create(self, validated_data): | ||
| """ | ||
| This serializer does not support create. | ||
| """ | ||
| raise NotImplementedError("Create is not supported for this serializer.") | ||
|
|
||
|
|
||
| class DocumentAccessSerializer(serializers.ModelSerializer): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.