Skip to content

Commit a22d826

Browse files
committed
Add MathML pydantic objects.
Generalize tree generation code from HTML for reuse in MathML.
1 parent 5cff1da commit a22d826

11 files changed

Lines changed: 2207 additions & 104 deletions

File tree

contentcuration/contentcuration/tests/utils/qti/test_mathml.py

Lines changed: 1613 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .base import ElementTreeBase
2+
3+
4+
__all__ = [
5+
"ElementTreeBase",
6+
]

contentcuration/contentcuration/utils/assessment/qti/base.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import List
88
from typing import Optional
99
from typing import Set
10+
from typing import Type
1011
from typing import Union
1112

1213
from pydantic import BaseModel
@@ -166,3 +167,108 @@ class BaseSequence(XMLElement):
166167
label: Optional[str] = None
167168
# We explicitly do not set the base value.
168169
dir_: Optional[Dir] = None
170+
171+
172+
# Pydantic's BaseModel Metaclass is only importable from an internal module,
173+
# so we inspect the BaseSequence class to get its metaclass.
174+
BaseSequenceMetaclass = type(BaseSequence)
175+
176+
177+
class RegistryMeta(BaseSequenceMetaclass):
178+
"""Generic metaclass that creates separate registries for each subclass"""
179+
180+
def __new__(mcs, name, bases, attrs):
181+
cls = super().__new__(mcs, name, bases, attrs)
182+
183+
# Each metaclass gets its own registry
184+
if not hasattr(mcs, "_registry"):
185+
mcs._registry = {}
186+
187+
element_name = cls.element_name()
188+
if element_name in mcs._registry and mcs._registry[element_name] is not cls:
189+
raise ValueError(
190+
f"Element name '{element_name}' already registered in {mcs.__name__}"
191+
)
192+
mcs._registry[element_name] = cls
193+
194+
return cls
195+
196+
@classmethod
197+
def _ensure_registry_complete(cls):
198+
"""Ensure all HTML and MathML classes are registered"""
199+
if not hasattr(cls, "_registry_initialized"):
200+
# Import modules to trigger registration
201+
from contentcuration.utils.assessment.qti import html, mathml # noqa: F401
202+
203+
cls._registry_initialized = True
204+
205+
@classmethod
206+
def get_class_for_tag(cls, tag_name: str) -> Optional[Type]:
207+
"""Get the registered class for a given tag name"""
208+
cls._ensure_registry_complete()
209+
return getattr(cls, "_registry", {}).get(tag_name)
210+
211+
212+
class ElementTreeBase(BaseSequence, metaclass=RegistryMeta):
213+
@classmethod
214+
def from_element(cls, element: ET.Element) -> "ElementTreeBase":
215+
# Get the appropriate class for this tag
216+
target_class = type(cls).get_class_for_tag(element.tag)
217+
if target_class is None:
218+
raise ValueError(f"No registered class found for tag: {element.tag}")
219+
220+
# Convert attributes to field data - Pydantic will handle type coercion
221+
field_data = {}
222+
for attr_name, attr_value in element.attrib.items():
223+
field_name = cls._attr_name_to_field_name(attr_name)
224+
field_data[field_name] = attr_value
225+
226+
# Convert children and text
227+
children = cls._extract_children(element)
228+
if children:
229+
field_data["children"] = children
230+
231+
return target_class(**field_data)
232+
233+
@classmethod
234+
def _attr_name_to_field_name(cls, attr_name: str) -> str:
235+
"""Convert attribute name to Python field name"""
236+
# kebab-case -> snake_case, : -> __
237+
field_name = attr_name.replace(":", "__").replace("-", "_")
238+
239+
# Add trailing underscore for Python keywords
240+
if field_name in {"class", "for", "type", "id", "dir"}:
241+
field_name += "_"
242+
243+
return field_name
244+
245+
@classmethod
246+
def _extract_children(
247+
cls, element: ET.Element
248+
) -> List[Union["ElementTreeBase", TextNode]]:
249+
"""Extract child elements and text nodes from XML element"""
250+
children = []
251+
252+
# Add initial text if present
253+
if element.text and element.text.strip():
254+
children.append(TextNode(text=element.text))
255+
256+
# Process child elements
257+
for child_elem in element:
258+
children.append(cls.from_element(child_elem))
259+
# Add tail text after child element
260+
if child_elem.tail and child_elem.tail.strip():
261+
children.append(TextNode(text=child_elem.tail))
262+
263+
return children
264+
265+
@classmethod
266+
def from_string(cls, string: str) -> List["ElementTreeBase"]:
267+
"""Parse markup string and return list of ElementTreeBase instances"""
268+
try:
269+
# Wrap in a root element to handle multiple top-level elements
270+
wrapped_markup = f"<root>{string}</root>"
271+
root = ET.fromstring(wrapped_markup)
272+
return [cls.from_element(child) for child in root]
273+
except ET.ParseError as e:
274+
raise ValueError(f"Invalid Markup: {e}") from e

contentcuration/contentcuration/utils/assessment/qti/html/base.py

Lines changed: 3 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,22 @@
1-
import xml.etree.ElementTree as ET
2-
from typing import Dict
31
from typing import List
42
from typing import Optional
5-
from typing import Type
6-
from typing import Union
73

84
from pydantic import model_validator
95

10-
from contentcuration.utils.assessment.qti.base import BaseSequence
11-
from contentcuration.utils.assessment.qti.base import TextNode
6+
from contentcuration.utils.assessment.qti.base import ElementTreeBase
127
from contentcuration.utils.assessment.qti.fields import LocalSrcPath
138
from contentcuration.utils.assessment.qti.fields import LocalSrcSet
149

1510

16-
# Pydantic's BaseModel Metaclass is only importable from an internal module,
17-
# so we inspect the BaseSequence class to get its metaclass.
18-
BaseSequenceMetaclass = type(BaseSequence)
19-
20-
21-
class HTMLElementMeta(BaseSequenceMetaclass):
22-
"""Metaclass that auto-registers HTML element classes by their tag name"""
23-
24-
# Class registry mapping tag names to classes
25-
_registry: Dict[str, Type["HTMLElement"]] = {}
26-
27-
def __new__(mcs, name, bases, attrs):
28-
cls = super().__new__(mcs, name, bases, attrs)
29-
element_name = cls.element_name()
30-
mcs._registry[element_name] = cls
31-
return cls
32-
33-
@classmethod
34-
def get_class_for_tag(mcs, tag_name: str) -> Optional[Type["HTMLElement"]]:
35-
"""Get the registered class for a given tag name"""
36-
return mcs._registry.get(tag_name)
37-
38-
@classmethod
39-
def register_class(mcs, tag_name: str, cls: Type["HTMLElement"]):
40-
"""Manually register a class for a tag name"""
41-
mcs._registry[tag_name] = cls
42-
43-
44-
class HTMLElement(BaseSequence, metaclass=HTMLElementMeta):
11+
class HTMLElement(ElementTreeBase):
4512
"""
4613
Represents an HTML element within QTI.
4714
"""
4815

49-
@classmethod
50-
def element_name(cls):
51-
return cls.__name__.lower()
52-
53-
@classmethod
54-
def from_element(cls, element: ET.Element) -> "HTMLElement":
55-
"""Create HTMLElement instance from ET.Element"""
56-
# Get the appropriate class for this tag
57-
target_class = HTMLElementMeta.get_class_for_tag(element.tag)
58-
if target_class is None:
59-
raise ValueError(f"No registered class found for tag: {element.tag}")
60-
61-
# Convert attributes to field data - Pydantic will handle type coercion
62-
field_data = {}
63-
for attr_name, attr_value in element.attrib.items():
64-
field_name = cls._attr_name_to_field_name(attr_name)
65-
field_data[field_name] = attr_value
66-
67-
# Convert children and text
68-
children = cls._extract_children(element)
69-
if children:
70-
field_data["children"] = children
71-
72-
return target_class(**field_data)
73-
74-
@classmethod
75-
def _attr_name_to_field_name(cls, attr_name: str) -> str:
76-
"""Convert HTML attribute name to Python field name"""
77-
# kebab-case -> snake_case, : -> __
78-
field_name = attr_name.replace(":", "__").replace("-", "_")
79-
80-
# Add trailing underscore for Python keywords
81-
if field_name in {"class", "for", "type", "id", "dir"}:
82-
field_name += "_"
83-
84-
return field_name
85-
86-
@classmethod
87-
def _extract_children(
88-
cls, element: ET.Element
89-
) -> List[Union["HTMLElement", TextNode]]:
90-
"""Extract child elements and text nodes from XML element"""
91-
children = []
92-
93-
# Add initial text if present
94-
if element.text and element.text.strip():
95-
children.append(TextNode(text=element.text))
96-
97-
# Process child elements
98-
for child_elem in element:
99-
children.append(cls.from_element(child_elem))
100-
# Add tail text after child element
101-
if child_elem.tail and child_elem.tail.strip():
102-
children.append(TextNode(text=child_elem.tail))
103-
104-
return children
105-
10616
@classmethod
10717
def from_html_string(cls, html_string: str) -> List["HTMLElement"]:
10818
"""Parse HTML string and return list of HTMLElement instances"""
109-
try:
110-
# Wrap in a root element to handle multiple top-level elements
111-
wrapped_html = f"<root>{html_string}</root>"
112-
root = ET.fromstring(wrapped_html)
113-
return [cls.from_element(child) for child in root]
114-
except ET.ParseError as e:
115-
raise ValueError(f"Invalid HTML: {e}") from e
19+
return cls.from_string(html_string)
11620

11721

11822
class FlowContentElement(HTMLElement):

contentcuration/contentcuration/utils/assessment/qti/html/content_types.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
from contentcuration.utils.assessment.qti.base import TextType
55
from contentcuration.utils.assessment.qti.html.base import FlowContentElement
66
from contentcuration.utils.assessment.qti.html.base import InlineContentElement
7+
from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
78
from contentcuration.utils.assessment.qti.interaction_types.base import (
89
InlineInteraction,
910
)
11+
from contentcuration.utils.assessment.qti.mathml import Math
1012

1113

1214
FlowContent = Union[FlowContentElement, TextType]
@@ -26,8 +28,17 @@
2628
# InlineChoiceInteraction,
2729
# EndAttemptInteraction,
2830
# CustomInteraction,
29-
# Math,
31+
Math,
3032
# Include,
3133
]
3234

3335
InlineGroupList = List[Union[InlineGroup, TextType]]
36+
37+
FlowGroup = Union[
38+
FlowContentElement,
39+
BlockInteraction,
40+
InlineInteraction,
41+
Math,
42+
]
43+
44+
FlowGroupList = List[Union[FlowGroup, TextType]]

contentcuration/contentcuration/utils/assessment/qti/html/flow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pydantic import HttpUrl
55

66
from contentcuration.utils.assessment.qti.html.base import BlockContentElement
7-
from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
7+
from contentcuration.utils.assessment.qti.html.content_types import FlowGroupList
88

99

1010
class HTMLFlowContainer(BlockContentElement):
@@ -14,7 +14,7 @@ class HTMLFlowContainer(BlockContentElement):
1414
Corresponds to HTML "Flow Content" category.
1515
"""
1616

17-
children: FlowContentList = Field(default_factory=list)
17+
children: FlowGroupList = Field(default_factory=list)
1818

1919

2020
class Blockquote(HTMLFlowContainer):
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from .base import MathMLElement
2+
from .core import Annotation
3+
from .core import AnnotationXml
4+
from .core import Math
5+
from .core import Merror
6+
from .core import Mfrac
7+
from .core import Mi
8+
from .core import Mmultiscripts
9+
from .core import Mn
10+
from .core import Mo
11+
from .core import Mover
12+
from .core import Mpadded
13+
from .core import Mphantom
14+
from .core import Mprescripts
15+
from .core import Mroot
16+
from .core import Mrow
17+
from .core import Ms
18+
from .core import Mspace
19+
from .core import Msqrt
20+
from .core import Mstyle
21+
from .core import Msub
22+
from .core import Msubsup
23+
from .core import Msup
24+
from .core import Mtable
25+
from .core import Mtd
26+
from .core import Mtext
27+
from .core import Mtr
28+
from .core import Munder
29+
from .core import Munderover
30+
from .core import Semantics
31+
from .fields import MathMLDisplay
32+
from .fields import MathMLForm
33+
34+
__all__ = [
35+
"MathMLElement",
36+
# Root element
37+
"Math",
38+
# Token elements
39+
"Mi",
40+
"Mn",
41+
"Mo",
42+
"Mtext",
43+
"Ms",
44+
"Mspace",
45+
# Layout elements
46+
"Mrow",
47+
"Mfrac",
48+
"Msqrt",
49+
"Mroot",
50+
"Mpadded",
51+
# Script elements
52+
"Msub",
53+
"Msup",
54+
"Msubsup",
55+
"Munder",
56+
"Mover",
57+
"Munderover",
58+
"Mmultiscripts",
59+
"Mprescripts",
60+
# Table elements
61+
"Mtd",
62+
"Mtr",
63+
"Mtable",
64+
# Grouping elements
65+
"Mstyle",
66+
"Merror",
67+
"Mphantom",
68+
# Semantic elements
69+
"Annotation",
70+
"AnnotationXml",
71+
"Semantics",
72+
# enums
73+
"MathMLForm",
74+
"MathMLDisplay",
75+
]

0 commit comments

Comments
 (0)