Skip to content
57 changes: 46 additions & 11 deletions cms/djangoapps/contentstore/git_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from opaque_keys.edx.locator import LibraryLocator, LibraryLocatorV2

from openedx.core.djangoapps.content_libraries.api import extract_library_v2_zip_to_dir
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.xml_exporter import export_course_to_xml
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,10 +68,33 @@ def cmd_log(cmd, cwd):
return output


def export_to_git(course_id, repo, user='', rdir=None):
"""Export a course to git."""
def export_to_git(content_key, repo, user='', rdir=None):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def export_to_git(content_key, repo, user='', rdir=None):
def export_to_git(context_key, repo, user='', rdir=None):

Our terminology for a key that can be a Library or Course is a LearningContextKey, so the var should be named context_key. But I'm a bit confused how this is working, since my understanding is that you'd have to modify the git_export.py command parsing to go from CourseKey.from_string() to LearningContextKey.from_string(), and that doesn't seem to be a part of this PR? For unfortunate historical reasons, LibraryLocator is a kind of CourseKey, but the new v2 library keys aren't.

"""
Export a course or library to git.

Args:
content_key: CourseKey or LibraryLocator for the content to export
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But also LibraryLocatorV2 now, right?

repo (str): Git repository URL
user (str): Optional username for git commit identity
rdir (str): Optional custom directory name for the repository

Raises:
GitExportError: For various git operation failures
"""
# pylint: disable=too-many-statements

# Detect content type and select appropriate export function
content_type_label = "library"
is_library_v2 = isinstance(content_key, LibraryLocatorV2)
if is_library_v2:
# V2 libraries use backup API with zip extraction
content_export_func = extract_library_v2_zip_to_dir
elif isinstance(content_key, LibraryLocator):
content_export_func = export_library_to_xml
else:
content_export_func = export_course_to_xml
content_type_label = "course"
Comment on lines +88 to +96
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could use default dict here.

Comment on lines +88 to +96
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit (optional): You this setup work up here, but don't seem to use it until 65 lines later. You could do a simpler check to make sure it's a recognized key type up here (to error out early if necessary), and defer the rest of the logic until line 161. Then you don't need the indirection with the content_export_func.

So the first part could be like:

if not isinstance(context_key, (LibraryLocatorV2, LibraryLocator, CourseLocator)):
    raise TypeError(
        f"{context_key!r} for git export must be LibraryLocatorV2, LibraryLocator, "
        f"or CourseLocator, not {type(context_key}"
    )

And then your logic later on could look something like:

# We must check in this order, because LibraryLocator is a subclass of CourseLocator,
# so isinstance(context_key, CourseLocator) on a LibraryLocator would return True.
if isinstance(context_key, (LibraryLocatorV2, LibraryLocator)):
    content_type_label = "library"
else:
    content_type_label = "course"

if isinstance(context_key, LibraryLocatorV2):
    # V2 Library export
    export_library_to_xml(context_key, root_dir, content_dir, user)
else:
    # V1 Libraries and Courses both use the same modulestore-based export
    export_course_to_xml(modulestore(), contentstore(), context_key, root_dir, content_dir)

That way, each block stands on its own better, and you don't have to scan up or down the function to understand what's going on. Also, the content_type_label gets written once, rather than written with a default and then overwritten afterwards, making it just a little more obvious.

(Disclaimer: This is off the cuff and I haven't tried actually running this code, so there might be something obviously wrong with it. Please just take it as a broad suggestion. )

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just occurred to me that we explicitly made the locators involved here implement is_course to make this simpler to deal with. So the "library" vs. "course" logic can be:

content_type_label = "course" if context_key.is_course else "library"


if not GIT_REPO_EXPORT_DIR:
raise GitExportError(GitExportError.NO_EXPORT_DIR)

Expand Down Expand Up @@ -128,14 +153,19 @@ def export_to_git(course_id, repo, user='', rdir=None):
log.exception('Failed to pull git repository: %r', ex.output)
raise GitExportError(GitExportError.CANNOT_PULL) from ex

# export course as xml before commiting and pushing
# export content as xml (or zip for v2 libraries) before commiting and pushing
root_dir = os.path.dirname(rdirp)
course_dir = os.path.basename(rdirp).rsplit('.git', 1)[0]
content_dir = os.path.basename(rdirp).rsplit('.git', 1)[0]

try:
export_course_to_xml(modulestore(), contentstore(), course_id,
root_dir, course_dir)
if is_library_v2:
content_export_func(content_key, root_dir, content_dir, user)
else:
# V1 libraries and courses: use XML export (no user parameter)
content_export_func(modulestore(), contentstore(), content_key,
root_dir, content_dir)
except (OSError, AttributeError):
log.exception('Failed export to xml')
log.exception('Failed to export %s', content_type_label)
raise GitExportError(GitExportError.XML_EXPORT_FAIL) # lint-amnesty, pylint: disable=raise-missing-from # noqa: B904

# Get current branch if not already set
Expand All @@ -160,9 +190,7 @@ def export_to_git(course_id, repo, user='', rdir=None):
ident = GIT_EXPORT_DEFAULT_IDENT
time_stamp = timezone.now()
cwd = os.path.abspath(rdirp)
commit_msg = "Export from Studio at {time_stamp}".format( # noqa: UP032
time_stamp=time_stamp,
)
commit_msg = f"Export {content_type_label} from Studio at {time_stamp}"
try:
cmd_log(['git', 'config', 'user.email', ident['email']], cwd)
cmd_log(['git', 'config', 'user.name', ident['name']], cwd)
Expand All @@ -180,3 +208,10 @@ def export_to_git(course_id, repo, user='', rdir=None):
except subprocess.CalledProcessError as ex:
log.exception('Error running git push command: %r', ex.output)
raise GitExportError(GitExportError.CANNOT_PUSH) from ex

log.info(
'%s %s exported to git repository %s successfully',
content_type_label.capitalize(),
content_key,
repo,
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import subprocess
import unittest
from io import StringIO
from unittest.mock import patch
from uuid import uuid4

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test.utils import override_settings
from opaque_keys.edx.locator import CourseLocator
from opaque_keys.edx.locator import CourseLocator, LibraryLocator, LibraryLocatorV2

import cms.djangoapps.contentstore.git_export_utils as git_export_utils
from cms.djangoapps.contentstore.git_export_utils import GitExportError
Expand All @@ -34,6 +35,9 @@ class TestGitExport(CourseTestCase):
Excercise the git_export django management command with various inputs.
"""

LIBRARY_V2_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib')
LIBRARY_V1_KEY = LibraryLocator(org='TestOrg', library='test-lib')

def setUp(self):
"""
Create/reinitialize bare repo and folders needed
Expand Down Expand Up @@ -182,3 +186,52 @@ def test_no_change(self):
with self.assertRaisesRegex(GitExportError, str(GitExportError.CANNOT_COMMIT)): # noqa: PT027
git_export_utils.export_to_git(
self.course.id, f'file://{self.bare_repo_dir}')

@patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main')
@patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir')
def test_library_v2_export_selects_correct_function(self, mock_extract, mock_cmd_log):
"""
When ``export_to_git`` is given a LibraryLocatorV2 key it must call
``extract_library_v2_zip_to_dir`` and must not call the v1 XML export
functions (``export_library_to_xml`` or ``export_course_to_xml``).
cmd_log is mocked so no real git subprocess or repo state is needed.
"""
mock_extract.return_value = None
repo_url = f'file://{self.bare_repo_dir}'

with patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml, \
patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml') as mock_lib_xml:
git_export_utils.export_to_git(self.LIBRARY_V2_KEY, repo_url, self.user.username)

assert mock_extract.call_args[0][0] == self.LIBRARY_V2_KEY
mock_course_xml.assert_not_called()
mock_lib_xml.assert_not_called()

@patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main')
@patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml')
def test_library_v1_export_selects_correct_function(self, mock_lib_xml, mock_cmd_log):
"""
When ``export_to_git`` is given a LibraryLocator (v1) key it must call
``export_library_to_xml`` and must not call ``extract_library_v2_zip_to_dir``.
cmd_log is mocked so no real git subprocess or repo state is needed.
"""
mock_lib_xml.return_value = None
repo_url = f'file://{self.bare_repo_dir}'

with patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir') as mock_v2, \
patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml:
git_export_utils.export_to_git(self.LIBRARY_V1_KEY, repo_url, self.user.username)

assert mock_lib_xml.called
mock_v2.assert_not_called()
mock_course_xml.assert_not_called()

@patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir',
side_effect=OSError('disk full'))
def test_library_v2_export_failure_raises_xml_export_fail(self, mock_extract):
"""
If ``extract_library_v2_zip_to_dir`` raises, ``export_to_git`` should
wrap it in ``GitExportError.XML_EXPORT_FAIL``.
"""
with self.assertRaisesRegex(GitExportError, str(GitExportError.XML_EXPORT_FAIL)): # noqa: PT027
git_export_utils.export_to_git(self.LIBRARY_V2_KEY, f'file://{self.bare_repo_dir}')
1 change: 1 addition & 0 deletions openedx/core/djangoapps/content_libraries/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Python API for working with content libraries
"""
from . import permissions # noqa: F401
from .backup import * # noqa: F403
from .block_metadata import * # noqa: F403
from .blocks import * # noqa: F403
from .collections import * # noqa: F403
Expand Down
76 changes: 76 additions & 0 deletions openedx/core/djangoapps/content_libraries/api/backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Public API for content library backup (zip export) utilities.
"""
from __future__ import annotations

import os
import shutil
import zipfile
from datetime import datetime
from tempfile import mkdtemp

from django.conf import settings
from django.contrib.auth import get_user_model
from django.utils.text import slugify
from opaque_keys.edx.locator import LibraryLocatorV2, log
from openedx_content.api import create_zip_file as create_lib_zip_file
from path import Path

__all__ = ["create_library_v2_zip", "extract_library_v2_zip_to_dir"]


def create_library_v2_zip(library_key: LibraryLocatorV2, user) -> tuple:
"""
Create a zip backup of a v2 library and return ``(temp_dir, zip_file_path)``.

The caller is responsible for cleaning up ``temp_dir`` when done.

Args:
library_key: LibraryLocatorV2 identifying the library to export.
user: User object passed to the backup API.

Returns:
A tuple of ``(temp_dir as Path, zip_file_path as str)``.
"""
root_dir = Path(mkdtemp())
sanitized_lib_key = str(library_key).replace(":", "-")
sanitized_lib_key = slugify(sanitized_lib_key, allow_unicode=True)
timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
filename = f'{sanitized_lib_key}-{timestamp}.zip'
file_path = os.path.join(root_dir, filename)
origin_server = getattr(settings, 'CMS_BASE', None)
Comment on lines +35 to +41
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please try to refactor this so that it shares the same code path as the celery task for backup uses, instead of being a copy that might drift as conventions change.

create_lib_zip_file(lp_key=str(library_key), path=file_path, user=user, origin_server=origin_server)
return root_dir, file_path


def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, user=None):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, user=None):
def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, username=None):

Otherwise, this could be mistaken for a User obj, when it's being used as a username here.

"""
Export a v2 library to a directory by creating a zip backup and extracting it.

V2 libraries are stored in Learning Core and use a zip-based backup mechanism.
This function creates a temporary zip backup, extracts its contents into
``library_dir`` under ``root_dir``, then cleans up the temporary zip.

Args:
library_key: LibraryLocatorV2 for the library to export
root_dir: Root directory where library_dir will be created
library_dir: Directory name under root_dir to extract the library into
user: Username string for the backup API (optional)

Raises:
Exception: If backup creation or extraction fails
"""
# Get user object for backup API
user_obj = get_user_model().objects.filter(username=user).first()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If someone doesn't want to attach user information, that's one thing. But if someone is explicitly passing a user and that user is not found, it should be an error, and not simply silently fall back to None.

temp_dir, zip_path = create_library_v2_zip(library_key, user_obj)

try:
target_dir = os.path.join(root_dir, library_dir)
os.makedirs(target_dir, exist_ok=True)
# Extract zip contents (will overwrite existing files)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(target_dir)
log.info('Extracted library v2 backup to %s', target_dir)
finally:
if temp_dir.exists():
Comment on lines +63 to +75
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the code comments are not needed here as the code is simple and readable that we don't really need. Also, there are some unnecessary blank lines as well.

shutil.rmtree(temp_dir)
Empty file.
88 changes: 88 additions & 0 deletions openedx/core/djangoapps/content_libraries/api/tests/test_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Tests for content library backup (zip export) utilities.
"""
from __future__ import annotations

import shutil
import tempfile
import zipfile
from unittest.mock import MagicMock, patch

import pytest
from django.test import TestCase
from opaque_keys.edx.locator import LibraryLocatorV2
from path import Path

from openedx.core.djangoapps.content_libraries.api.backup import extract_library_v2_zip_to_dir

LIBRARY_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib')


class TestExtractLibraryV2ZipToDir(TestCase):
"""
Tests for ``extract_library_v2_zip_to_dir``.
"""

def _make_zip_in_temp_dir(self, contents=None):
"""
Helper: create a real temp dir + zip file and return (temp_dir_path, zip_path).
``contents`` is a dict of {filename: bytes} to write into the zip.
"""
temp_dir = Path(tempfile.mkdtemp())
zip_path = str(temp_dir / 'library.zip')
with zipfile.ZipFile(zip_path, 'w') as zf:
for name, data in (contents or {'data.xml': b'<library/>'}).items():
zf.writestr(name, data)
return temp_dir, zip_path

@patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model')
@patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip')
def test_successful_extraction(self, mock_create_zip, mock_get_user_model):
"""
On a successful call the function should:
- resolve the username to a user object via the user model,
- pass that user object to ``create_library_v2_zip``,
- create the target directory if it does not already exist,
- extract the zip contents into <root_dir>/<library_dir>,
- clean up the temporary zip directory.
"""
root_dir = Path(tempfile.mkdtemp())
temp_zip_dir, zip_path = self._make_zip_in_temp_dir({'content.xml': b'<lib/>'})
mock_create_zip.return_value = (temp_zip_dir, zip_path)
mock_user = MagicMock()
mock_get_user_model.return_value.objects.filter.return_value.first.return_value = mock_user

try:
target = root_dir / 'my-library'
assert not target.exists(), "Target dir should not exist before the call"

extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user='testuser')

mock_get_user_model.return_value.objects.filter.assert_called_once_with(username='testuser')
mock_create_zip.assert_called_once_with(LIBRARY_KEY, mock_user)
assert target.isdir(), "Target dir should have been created"
assert (target / 'content.xml').exists(), "Zip content should be extracted"
assert not temp_zip_dir.exists(), "Temp zip dir should have been removed"
finally:
shutil.rmtree(root_dir, ignore_errors=True)
shutil.rmtree(temp_zip_dir, ignore_errors=True)

@patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model')
@patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip')
def test_temp_dir_cleaned_up_even_on_extraction_error(self, mock_create_zip, mock_get_user_model):
"""
The temporary directory must be cleaned up even when extraction raises.
"""
root_dir = Path(tempfile.mkdtemp())
temp_zip_dir, zip_path = self._make_zip_in_temp_dir()
mock_create_zip.return_value = (temp_zip_dir, zip_path)
mock_get_user_model.return_value.objects.filter.return_value.first.return_value = None

try:
with patch('zipfile.ZipFile.extractall', side_effect=OSError('disk full')):
with pytest.raises(OSError, match='disk full'):
extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user=None)
assert not temp_zip_dir.exists(), "Temp dir should be cleaned up on error"
finally:
shutil.rmtree(root_dir, ignore_errors=True)
shutil.rmtree(temp_zip_dir, ignore_errors=True)
Loading
Loading