-
Notifications
You must be signed in to change notification settings - Fork 4.3k
feat: support added to export content libraries to git #38026
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
35e36a3
28b039d
acdac16
4cc8a51
02e70c5
1e7a97e
0651d60
6bfad43
c0d1ad4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,10 +13,12 @@ | |
| from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user | ||
| from django.utils import timezone | ||
| from django.utils.translation import gettext_lazy as _ | ||
| from opaque_keys.edx.locator import LibraryLocator, LibraryLocatorV2 | ||
|
|
||
| from openedx.core.djangoapps.content_libraries.api import extract_library_v2_zip_to_dir | ||
| from xmodule.contentstore.django import contentstore | ||
| from xmodule.modulestore.django import modulestore | ||
| from xmodule.modulestore.xml_exporter import export_course_to_xml | ||
| from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml | ||
|
|
||
| log = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -66,10 +68,33 @@ def cmd_log(cmd, cwd): | |
| return output | ||
|
|
||
|
|
||
| def export_to_git(course_id, repo, user='', rdir=None): | ||
| """Export a course to git.""" | ||
| def export_to_git(content_key, repo, user='', rdir=None): | ||
| """ | ||
| Export a course or library to git. | ||
|
|
||
| Args: | ||
| content_key: CourseKey or LibraryLocator for the content to export | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But also |
||
| repo (str): Git repository URL | ||
| user (str): Optional username for git commit identity | ||
| rdir (str): Optional custom directory name for the repository | ||
|
|
||
| Raises: | ||
| GitExportError: For various git operation failures | ||
| """ | ||
| # pylint: disable=too-many-statements | ||
|
|
||
| # Detect content type and select appropriate export function | ||
| content_type_label = "library" | ||
| is_library_v2 = isinstance(content_key, LibraryLocatorV2) | ||
| if is_library_v2: | ||
| # V2 libraries use backup API with zip extraction | ||
| content_export_func = extract_library_v2_zip_to_dir | ||
| elif isinstance(content_key, LibraryLocator): | ||
| content_export_func = export_library_to_xml | ||
| else: | ||
| content_export_func = export_course_to_xml | ||
| content_type_label = "course" | ||
|
Comment on lines
+88
to
+96
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: we could use default dict here.
Comment on lines
+88
to
+96
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit (optional): You this setup work up here, but don't seem to use it until 65 lines later. You could do a simpler check to make sure it's a recognized key type up here (to error out early if necessary), and defer the rest of the logic until line 161. Then you don't need the indirection with the content_export_func. So the first part could be like: if not isinstance(context_key, (LibraryLocatorV2, LibraryLocator, CourseLocator)):
raise TypeError(
f"{context_key!r} for git export must be LibraryLocatorV2, LibraryLocator, "
f"or CourseLocator, not {type(context_key}"
)And then your logic later on could look something like: # We must check in this order, because LibraryLocator is a subclass of CourseLocator,
# so isinstance(context_key, CourseLocator) on a LibraryLocator would return True.
if isinstance(context_key, (LibraryLocatorV2, LibraryLocator)):
content_type_label = "library"
else:
content_type_label = "course"
if isinstance(context_key, LibraryLocatorV2):
# V2 Library export
export_library_to_xml(context_key, root_dir, content_dir, user)
else:
# V1 Libraries and Courses both use the same modulestore-based export
export_course_to_xml(modulestore(), contentstore(), context_key, root_dir, content_dir)That way, each block stands on its own better, and you don't have to scan up or down the function to understand what's going on. Also, the (Disclaimer: This is off the cuff and I haven't tried actually running this code, so there might be something obviously wrong with it. Please just take it as a broad suggestion. )
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It just occurred to me that we explicitly made the locators involved here implement content_type_label = "course" if context_key.is_course else "library" |
||
|
|
||
| if not GIT_REPO_EXPORT_DIR: | ||
| raise GitExportError(GitExportError.NO_EXPORT_DIR) | ||
|
|
||
|
|
@@ -128,14 +153,19 @@ def export_to_git(course_id, repo, user='', rdir=None): | |
| log.exception('Failed to pull git repository: %r', ex.output) | ||
| raise GitExportError(GitExportError.CANNOT_PULL) from ex | ||
|
|
||
| # export course as xml before commiting and pushing | ||
| # export content as xml (or zip for v2 libraries) before commiting and pushing | ||
| root_dir = os.path.dirname(rdirp) | ||
| course_dir = os.path.basename(rdirp).rsplit('.git', 1)[0] | ||
| content_dir = os.path.basename(rdirp).rsplit('.git', 1)[0] | ||
|
|
||
| try: | ||
| export_course_to_xml(modulestore(), contentstore(), course_id, | ||
| root_dir, course_dir) | ||
| if is_library_v2: | ||
| content_export_func(content_key, root_dir, content_dir, user) | ||
| else: | ||
| # V1 libraries and courses: use XML export (no user parameter) | ||
| content_export_func(modulestore(), contentstore(), content_key, | ||
| root_dir, content_dir) | ||
| except (OSError, AttributeError): | ||
| log.exception('Failed export to xml') | ||
| log.exception('Failed to export %s', content_type_label) | ||
| raise GitExportError(GitExportError.XML_EXPORT_FAIL) # lint-amnesty, pylint: disable=raise-missing-from # noqa: B904 | ||
|
|
||
| # Get current branch if not already set | ||
|
|
@@ -160,9 +190,7 @@ def export_to_git(course_id, repo, user='', rdir=None): | |
| ident = GIT_EXPORT_DEFAULT_IDENT | ||
| time_stamp = timezone.now() | ||
| cwd = os.path.abspath(rdirp) | ||
| commit_msg = "Export from Studio at {time_stamp}".format( # noqa: UP032 | ||
| time_stamp=time_stamp, | ||
| ) | ||
| commit_msg = f"Export {content_type_label} from Studio at {time_stamp}" | ||
| try: | ||
| cmd_log(['git', 'config', 'user.email', ident['email']], cwd) | ||
| cmd_log(['git', 'config', 'user.name', ident['name']], cwd) | ||
|
|
@@ -180,3 +208,10 @@ def export_to_git(course_id, repo, user='', rdir=None): | |
| except subprocess.CalledProcessError as ex: | ||
| log.exception('Error running git push command: %r', ex.output) | ||
| raise GitExportError(GitExportError.CANNOT_PUSH) from ex | ||
|
|
||
| log.info( | ||
| '%s %s exported to git repository %s successfully', | ||
| content_type_label.capitalize(), | ||
| content_key, | ||
| repo, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,76 @@ | ||||||
| """ | ||||||
| Public API for content library backup (zip export) utilities. | ||||||
| """ | ||||||
| from __future__ import annotations | ||||||
|
|
||||||
| import os | ||||||
| import shutil | ||||||
| import zipfile | ||||||
| from datetime import datetime | ||||||
| from tempfile import mkdtemp | ||||||
|
|
||||||
| from django.conf import settings | ||||||
| from django.contrib.auth import get_user_model | ||||||
| from django.utils.text import slugify | ||||||
| from opaque_keys.edx.locator import LibraryLocatorV2, log | ||||||
| from openedx_content.api import create_zip_file as create_lib_zip_file | ||||||
| from path import Path | ||||||
|
|
||||||
| __all__ = ["create_library_v2_zip", "extract_library_v2_zip_to_dir"] | ||||||
|
|
||||||
|
|
||||||
| def create_library_v2_zip(library_key: LibraryLocatorV2, user) -> tuple: | ||||||
| """ | ||||||
| Create a zip backup of a v2 library and return ``(temp_dir, zip_file_path)``. | ||||||
|
|
||||||
| The caller is responsible for cleaning up ``temp_dir`` when done. | ||||||
|
|
||||||
| Args: | ||||||
| library_key: LibraryLocatorV2 identifying the library to export. | ||||||
| user: User object passed to the backup API. | ||||||
|
|
||||||
| Returns: | ||||||
| A tuple of ``(temp_dir as Path, zip_file_path as str)``. | ||||||
| """ | ||||||
| root_dir = Path(mkdtemp()) | ||||||
| sanitized_lib_key = str(library_key).replace(":", "-") | ||||||
| sanitized_lib_key = slugify(sanitized_lib_key, allow_unicode=True) | ||||||
| timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S") | ||||||
| filename = f'{sanitized_lib_key}-{timestamp}.zip' | ||||||
| file_path = os.path.join(root_dir, filename) | ||||||
| origin_server = getattr(settings, 'CMS_BASE', None) | ||||||
|
Comment on lines
+35
to
+41
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try to refactor this so that it shares the same code path as the celery task for backup uses, instead of being a copy that might drift as conventions change. |
||||||
| create_lib_zip_file(lp_key=str(library_key), path=file_path, user=user, origin_server=origin_server) | ||||||
| return root_dir, file_path | ||||||
|
|
||||||
|
|
||||||
| def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, user=None): | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Otherwise, this could be mistaken for a User obj, when it's being used as a username here. |
||||||
| """ | ||||||
| Export a v2 library to a directory by creating a zip backup and extracting it. | ||||||
|
|
||||||
| V2 libraries are stored in Learning Core and use a zip-based backup mechanism. | ||||||
| This function creates a temporary zip backup, extracts its contents into | ||||||
| ``library_dir`` under ``root_dir``, then cleans up the temporary zip. | ||||||
|
|
||||||
| Args: | ||||||
| library_key: LibraryLocatorV2 for the library to export | ||||||
| root_dir: Root directory where library_dir will be created | ||||||
| library_dir: Directory name under root_dir to extract the library into | ||||||
| user: Username string for the backup API (optional) | ||||||
|
|
||||||
| Raises: | ||||||
| Exception: If backup creation or extraction fails | ||||||
| """ | ||||||
| # Get user object for backup API | ||||||
| user_obj = get_user_model().objects.filter(username=user).first() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If someone doesn't want to attach user information, that's one thing. But if someone is explicitly passing a user and that user is not found, it should be an error, and not simply silently fall back to |
||||||
| temp_dir, zip_path = create_library_v2_zip(library_key, user_obj) | ||||||
|
|
||||||
| try: | ||||||
| target_dir = os.path.join(root_dir, library_dir) | ||||||
| os.makedirs(target_dir, exist_ok=True) | ||||||
| # Extract zip contents (will overwrite existing files) | ||||||
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | ||||||
| zip_ref.extractall(target_dir) | ||||||
| log.info('Extracted library v2 backup to %s', target_dir) | ||||||
| finally: | ||||||
| if temp_dir.exists(): | ||||||
|
Comment on lines
+63
to
+75
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some of the code comments are not needed here as the code is simple and readable that we don't really need. Also, there are some unnecessary blank lines as well. |
||||||
| shutil.rmtree(temp_dir) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| """ | ||
| Tests for content library backup (zip export) utilities. | ||
| """ | ||
| from __future__ import annotations | ||
|
|
||
| import shutil | ||
| import tempfile | ||
| import zipfile | ||
| from unittest.mock import MagicMock, patch | ||
|
|
||
| import pytest | ||
| from django.test import TestCase | ||
| from opaque_keys.edx.locator import LibraryLocatorV2 | ||
| from path import Path | ||
|
|
||
| from openedx.core.djangoapps.content_libraries.api.backup import extract_library_v2_zip_to_dir | ||
|
|
||
| LIBRARY_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib') | ||
|
|
||
|
|
||
| class TestExtractLibraryV2ZipToDir(TestCase): | ||
| """ | ||
| Tests for ``extract_library_v2_zip_to_dir``. | ||
| """ | ||
|
|
||
| def _make_zip_in_temp_dir(self, contents=None): | ||
| """ | ||
| Helper: create a real temp dir + zip file and return (temp_dir_path, zip_path). | ||
| ``contents`` is a dict of {filename: bytes} to write into the zip. | ||
| """ | ||
| temp_dir = Path(tempfile.mkdtemp()) | ||
| zip_path = str(temp_dir / 'library.zip') | ||
| with zipfile.ZipFile(zip_path, 'w') as zf: | ||
| for name, data in (contents or {'data.xml': b'<library/>'}).items(): | ||
| zf.writestr(name, data) | ||
| return temp_dir, zip_path | ||
|
|
||
| @patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model') | ||
| @patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip') | ||
| def test_successful_extraction(self, mock_create_zip, mock_get_user_model): | ||
| """ | ||
| On a successful call the function should: | ||
| - resolve the username to a user object via the user model, | ||
| - pass that user object to ``create_library_v2_zip``, | ||
| - create the target directory if it does not already exist, | ||
| - extract the zip contents into <root_dir>/<library_dir>, | ||
| - clean up the temporary zip directory. | ||
| """ | ||
| root_dir = Path(tempfile.mkdtemp()) | ||
| temp_zip_dir, zip_path = self._make_zip_in_temp_dir({'content.xml': b'<lib/>'}) | ||
| mock_create_zip.return_value = (temp_zip_dir, zip_path) | ||
| mock_user = MagicMock() | ||
| mock_get_user_model.return_value.objects.filter.return_value.first.return_value = mock_user | ||
|
|
||
| try: | ||
| target = root_dir / 'my-library' | ||
| assert not target.exists(), "Target dir should not exist before the call" | ||
|
|
||
| extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user='testuser') | ||
|
|
||
| mock_get_user_model.return_value.objects.filter.assert_called_once_with(username='testuser') | ||
| mock_create_zip.assert_called_once_with(LIBRARY_KEY, mock_user) | ||
| assert target.isdir(), "Target dir should have been created" | ||
| assert (target / 'content.xml').exists(), "Zip content should be extracted" | ||
| assert not temp_zip_dir.exists(), "Temp zip dir should have been removed" | ||
| finally: | ||
| shutil.rmtree(root_dir, ignore_errors=True) | ||
| shutil.rmtree(temp_zip_dir, ignore_errors=True) | ||
|
|
||
| @patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model') | ||
| @patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip') | ||
| def test_temp_dir_cleaned_up_even_on_extraction_error(self, mock_create_zip, mock_get_user_model): | ||
| """ | ||
| The temporary directory must be cleaned up even when extraction raises. | ||
| """ | ||
| root_dir = Path(tempfile.mkdtemp()) | ||
| temp_zip_dir, zip_path = self._make_zip_in_temp_dir() | ||
| mock_create_zip.return_value = (temp_zip_dir, zip_path) | ||
| mock_get_user_model.return_value.objects.filter.return_value.first.return_value = None | ||
|
|
||
| try: | ||
| with patch('zipfile.ZipFile.extractall', side_effect=OSError('disk full')): | ||
| with pytest.raises(OSError, match='disk full'): | ||
| extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user=None) | ||
| assert not temp_zip_dir.exists(), "Temp dir should be cleaned up on error" | ||
| finally: | ||
| shutil.rmtree(root_dir, ignore_errors=True) | ||
| shutil.rmtree(temp_zip_dir, ignore_errors=True) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Our terminology for a key that can be a Library or Course is a LearningContextKey, so the var should be named
context_key. But I'm a bit confused how this is working, since my understanding is that you'd have to modify thegit_export.pycommand parsing to go fromCourseKey.from_string()toLearningContextKey.from_string(), and that doesn't seem to be a part of this PR? For unfortunate historical reasons,LibraryLocatoris a kind ofCourseKey, but the new v2 library keys aren't.