Skip to content

Commit 0915c49

Browse files
committed
Refactor perseus exercise generation into common base class and perseus class.
Add fixes for formula and image processing.
1 parent 273e174 commit 0915c49

7 files changed

Lines changed: 753 additions & 378 deletions

File tree

contentcuration/contentcuration/tests/test_exportchannel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def test_inherited_language(self):
400400
parent_id=first_topic_node_id
401401
)[1:]:
402402
if child.kind == "topic":
403-
self.assertIsNone(child.lang_id)
403+
self.assertEqual(child.lang_id, self.content_channel.language_id)
404404
self.assertEqual(child.children.first().lang_id, "fr")
405405
else:
406406
self.assertEqual(child.lang_id, "fr")

contentcuration/contentcuration/tests/testdata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ def tree(parent=None):
217217

218218
def channel(name="testchannel"):
219219
channel_creator = user()
220-
channel = cc.Channel.objects.create(name=name, actor_id=channel_creator.id)
220+
channel = cc.Channel.objects.create(
221+
name=name, actor_id=channel_creator.id, language_id="en"
222+
)
221223
channel.save()
222224

223225
channel.main_tree = tree()

contentcuration/contentcuration/tests/utils/test_exercise_creation.py

Lines changed: 212 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from contentcuration.tests.base import StudioTestCase
1717
from contentcuration.tests.testdata import fileobj_exercise_graphie
1818
from contentcuration.tests.testdata import fileobj_exercise_image
19-
from contentcuration.utils.publish import create_perseus_exercise
19+
from contentcuration.utils.assessment.perseus import PerseusExerciseGenerator
2020

2121

2222
class TestPerseusExerciseCreation(StudioTestCase):
@@ -37,8 +37,8 @@ def setUp(self):
3737
# Create an exercise node
3838
self.exercise_node = ContentNode.objects.create(
3939
title="Test Exercise",
40-
node_id="exercise-node-id",
41-
content_id="exercise-content-id",
40+
node_id="1234567890abcdef1234567890abcded",
41+
content_id="fedcba0987654321fedcba0987654321",
4242
kind_id=content_kinds.EXERCISE,
4343
parent=self.channel.main_tree,
4444
extra_fields=json.dumps(
@@ -58,9 +58,6 @@ def setUp(self):
5858
),
5959
)
6060

61-
# Create a kolibri node representation (only needs id for testing)
62-
self.kolibri_node = type("KolibriNode", (), {"id": "kolibri-node-id"})
63-
6461
def _create_assessment_item(
6562
self, item_type, question_text, answers, hints=None, assessment_id=None
6663
):
@@ -81,6 +78,16 @@ def _create_assessment_item(
8178
)
8279
return item
8380

81+
def _create_perseus_zip(self, exercise_data):
82+
generator = PerseusExerciseGenerator(
83+
self.exercise_node,
84+
exercise_data,
85+
self.channel.id,
86+
"en-US",
87+
user_id=self.user.id,
88+
)
89+
return generator.create_exercise_archive()
90+
8491
def _validate_perseus_zip(self, exercise_file):
8592
"""Helper to validate the structure of the Perseus zip file"""
8693
# Use Django's storage backend to read the file
@@ -145,9 +152,7 @@ def test_basic_exercise_creation(self):
145152
}
146153

147154
# Call the function to create the Perseus exercise
148-
create_perseus_exercise(
149-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
150-
)
155+
self._create_perseus_zip(exercise_data)
151156

152157
# Verify that a file was created for the node
153158
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -179,6 +184,101 @@ def test_basic_exercise_creation(self):
179184
# we are deliberately changing the archive generation algorithm for perseus files.
180185
self.assertEqual(exercise_file.checksum, "0ec7e964b466ebc76e81e175570e97f1")
181186

187+
def test_multiple_images_index_mismatch_regression(self):
188+
"""Regression test for index mismatch bug in process_image_strings method.
189+
190+
When content is modified inside the re.finditer loop, subsequent matches
191+
point to invalid positions due to string length changes, resulting in
192+
malformed image processing.
193+
"""
194+
# Create three image files - use mix of resized and non-resized images
195+
# to trigger different replacement lengths
196+
image1 = fileobj_exercise_image(size=(100, 100), color="red")
197+
image2 = fileobj_exercise_image(size=(200, 200), color="blue")
198+
image3 = fileobj_exercise_image(size=(300, 300), color="green")
199+
200+
# Create URLs for all images
201+
image1_url = exercises.CONTENT_STORAGE_FORMAT.format(image1.filename())
202+
image2_url = exercises.CONTENT_STORAGE_FORMAT.format(image2.filename())
203+
image3_url = exercises.CONTENT_STORAGE_FORMAT.format(image3.filename())
204+
205+
# Create question with multiple images - mix of resized and original
206+
# This should create different length replacements
207+
question_text = (
208+
f"First image (resized): ![img1]({image1_url} =50x50)\n"
209+
f"Second image (original): ![img2]({image2_url})\n"
210+
f"Third image (resized): ![img3]({image3_url} =70x70)"
211+
)
212+
213+
item = self._create_assessment_item(
214+
exercises.SINGLE_SELECTION,
215+
question_text,
216+
[{"answer": "Answer", "correct": True, "order": 1}],
217+
)
218+
219+
# Associate all images with the assessment item
220+
for img in [image1, image2, image3]:
221+
img.assessment_item = item
222+
img.save()
223+
224+
exercise_data = {
225+
"mastery_model": exercises.M_OF_N,
226+
"randomize": True,
227+
"n": 1,
228+
"m": 1,
229+
"all_assessment_items": [item.assessment_id],
230+
"assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
231+
}
232+
233+
# Create the Perseus exercise
234+
self._create_perseus_zip(exercise_data)
235+
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
236+
zip_file, _ = self._validate_perseus_zip(exercise_file)
237+
238+
# Get the Perseus item JSON content
239+
item_json = json.loads(
240+
zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
241+
)
242+
question_content = item_json["question"]["content"]
243+
244+
# Extract all markdown image references using the same pattern as the code
245+
markdown_pattern = r"!\[([^\]]*)\]\(([^)]+)\)"
246+
matches = re.findall(markdown_pattern, question_content)
247+
248+
# Check that we have exactly 3 well-formed image references
249+
# If the bug exists, we might get malformed content due to index mismatch
250+
self.assertEqual(
251+
len(matches),
252+
3,
253+
f"Expected 3 image references, found {len(matches)} in content: {question_content}",
254+
)
255+
256+
# Verify each match has proper structure
257+
for i, (alt_text, _) in enumerate(matches):
258+
expected_alt = f"img{i+1}"
259+
self.assertEqual(
260+
alt_text,
261+
expected_alt,
262+
f"Image {i+1} alt text malformed: got '{alt_text}', expected '{expected_alt}'",
263+
)
264+
265+
# Verify that width and height are properly included in the question images
266+
question_images = item_json["question"]["images"]
267+
268+
self.assertEqual(
269+
len(question_images),
270+
2,
271+
f"Expected 2 image entries with dimensions, found {len(question_images)}: {list(question_images.keys())}",
272+
)
273+
274+
# Verify that we have images with the expected dimensions
275+
for image_name, image_data in question_images.items():
276+
width, height = image_data["width"], image_data["height"]
277+
if width == 50 and height != 50:
278+
self.fail("Should find image with 50x50 dimensions")
279+
elif width == 70 and height != 70:
280+
self.fail("Should find image with 70x70 dimensions")
281+
182282
def test_exercise_with_image(self):
183283
image_file = fileobj_exercise_image()
184284

@@ -209,9 +309,7 @@ def test_exercise_with_image(self):
209309
}
210310

211311
# Create the Perseus exercise
212-
create_perseus_exercise(
213-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
214-
)
312+
self._create_perseus_zip(exercise_data)
215313

216314
# Verify that a file was created
217315
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -259,9 +357,7 @@ def test_exercise_with_image_no_attached_file(self):
259357
}
260358

261359
# Create the Perseus exercise
262-
create_perseus_exercise(
263-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
264-
)
360+
self._create_perseus_zip(exercise_data)
265361

266362
# Verify that a file was created
267363
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -311,9 +407,7 @@ def test_exercise_with_image_deleted_file_object(self):
311407
image_file.delete()
312408

313409
# Create the Perseus exercise
314-
create_perseus_exercise(
315-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
316-
)
410+
self._create_perseus_zip(exercise_data)
317411

318412
# Verify that a file was created
319413
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -392,9 +486,7 @@ def test_exercise_with_graphie(self):
392486
}
393487

394488
# Create the Perseus exercise
395-
create_perseus_exercise(
396-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
397-
)
489+
self._create_perseus_zip(exercise_data)
398490

399491
# Verify that a file was created
400492
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -431,7 +523,7 @@ def test_exercise_with_graphie(self):
431523
def test_formula_processing(self):
432524
"""Test that formulas are properly processed in exercises"""
433525
# Create a question with LaTeX formulas
434-
question_text = "Solve: $\\frac{x}{2} = 3$"
526+
question_text = "Solve: $$\\frac{x}{2} = 3$$"
435527
item = self._create_assessment_item(
436528
exercises.INPUT_QUESTION,
437529
question_text,
@@ -449,9 +541,42 @@ def test_formula_processing(self):
449541
}
450542

451543
# Create the Perseus exercise
452-
create_perseus_exercise(
453-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
544+
self._create_perseus_zip(exercise_data)
545+
546+
# Verify that a file was created
547+
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
548+
549+
# Validate the zip file
550+
zip_file, _ = self._validate_perseus_zip(exercise_file)
551+
552+
# Check that the formula was properly processed
553+
item_json = json.loads(
554+
zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
454555
)
556+
self.assertIn("$\\frac{x}{2} = 3$", item_json["question"]["content"])
557+
558+
def test_multiple_formula_processing(self):
559+
"""Test that formulas are properly processed in exercises"""
560+
# Create a question with LaTeX formulas
561+
question_text = "Solve: $$\\frac{x}{2} = 3$$ or maybe $$\\frac{y}{2} = 7$$"
562+
item = self._create_assessment_item(
563+
exercises.INPUT_QUESTION,
564+
question_text,
565+
[{"answer": "6", "correct": True, "order": 1}],
566+
)
567+
568+
# Create the exercise data
569+
exercise_data = {
570+
"mastery_model": exercises.M_OF_N,
571+
"randomize": True,
572+
"n": 1,
573+
"m": 1,
574+
"all_assessment_items": [item.assessment_id],
575+
"assessment_mapping": {item.assessment_id: exercises.INPUT_QUESTION},
576+
}
577+
578+
# Create the Perseus exercise
579+
self._create_perseus_zip(exercise_data)
455580

456581
# Verify that a file was created
457582
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -463,7 +588,10 @@ def test_formula_processing(self):
463588
item_json = json.loads(
464589
zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
465590
)
466-
self.assertIn("\\frac{x}{2} = 3", item_json["question"]["content"])
591+
self.assertIn(
592+
"Solve: $\\frac{x}{2} = 3$ or maybe $\\frac{y}{2} = 7$",
593+
item_json["question"]["content"],
594+
)
467595

468596
def test_multiple_question_types(self):
469597
"""Test creating an exercise with multiple question types"""
@@ -526,9 +654,7 @@ def test_multiple_question_types(self):
526654
}
527655

528656
# Create the Perseus exercise
529-
create_perseus_exercise(
530-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
531-
)
657+
self._create_perseus_zip(exercise_data)
532658

533659
# Verify that a file was created
534660
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -661,6 +787,7 @@ def _test_image_resizing_in_field(self, field_type):
661787

662788
# Create the assessment item
663789
item_type = exercises.SINGLE_SELECTION
790+
664791
item = self._create_assessment_item(item_type, question_text, answers, hints)
665792

666793
# Associate the image with the assessment item
@@ -678,9 +805,7 @@ def _test_image_resizing_in_field(self, field_type):
678805
}
679806

680807
# Create the Perseus exercise
681-
create_perseus_exercise(
682-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
683-
)
808+
self._create_perseus_zip(exercise_data)
684809

685810
# Get the exercise file
686811
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -820,9 +945,7 @@ def test_image_with_same_resize_dimensions(self):
820945
}
821946

822947
# Create the Perseus exercise
823-
create_perseus_exercise(
824-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
825-
)
948+
self._create_perseus_zip(exercise_data)
826949

827950
# Get the exercise file
828951
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -901,9 +1024,7 @@ def test_image_with_similar_dimensions(self):
9011024
}
9021025

9031026
# Create the Perseus exercise
904-
create_perseus_exercise(
905-
self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
906-
)
1027+
self._create_perseus_zip(exercise_data)
9071028

9081029
# Get the exercise file
9091030
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -949,3 +1070,57 @@ def test_image_with_similar_dimensions(self):
9491070
third_image,
9501071
"Images with dimensions outside 1% threshold should use different files",
9511072
)
1073+
1074+
def test_image_with_zero_width(self):
1075+
# Create a base image file
1076+
base_image = fileobj_exercise_image(size=(400, 300), color="red")
1077+
base_image_url = exercises.CONTENT_STORAGE_FORMAT.format(base_image.filename())
1078+
1079+
# Create a question with images that have very similar dimensions
1080+
# The code has logic to use the same image if dimensions are within 1% of each other
1081+
question_text = (
1082+
f"First image: ![shape1]({base_image_url} =0x150)\n"
1083+
f"Second image: ![shape2]({base_image_url} =200x151)"
1084+
)
1085+
1086+
# Create the assessment item
1087+
item = self._create_assessment_item(
1088+
exercises.SINGLE_SELECTION,
1089+
question_text,
1090+
[{"answer": "Answer", "correct": True, "order": 1}],
1091+
)
1092+
1093+
# Associate the image with the assessment item
1094+
base_image.assessment_item = item
1095+
base_image.save()
1096+
1097+
# Create exercise data
1098+
exercise_data = {
1099+
"mastery_model": exercises.M_OF_N,
1100+
"randomize": True,
1101+
"n": 1,
1102+
"m": 1,
1103+
"all_assessment_items": [item.assessment_id],
1104+
"assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
1105+
}
1106+
1107+
# Create the Perseus exercise
1108+
self._create_perseus_zip(exercise_data)
1109+
1110+
# Get the exercise file
1111+
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
1112+
1113+
# Validate the zip file
1114+
zip_file, _ = self._validate_perseus_zip(exercise_file)
1115+
1116+
# Get all image files in the zip
1117+
image_files = [
1118+
name for name in zip_file.namelist() if name.startswith("images/")
1119+
]
1120+
1121+
# Verify we have exactly 1 image file
1122+
self.assertEqual(
1123+
len(image_files),
1124+
1,
1125+
f"Expected 2 resized images, found {len(image_files)}: {image_files}",
1126+
)

contentcuration/contentcuration/utils/assessment/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)