Skip to content

Commit ce0513f

Browse files
committed
Add MathML pydantic objects.
Generalize tree generation code from HTML for reuse in MathML.
1 parent 498ba3a commit ce0513f

11 files changed

Lines changed: 2114 additions & 104 deletions

File tree

contentcuration/contentcuration/tests/utils/qti/test_mathml.py

Lines changed: 1554 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .base import ElementTreeBase
2+
3+
4+
__all__ = [
5+
"ElementTreeBase",
6+
]

contentcuration/contentcuration/utils/assessment/qti/base.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import List
88
from typing import Optional
99
from typing import Set
10+
from typing import Type
1011
from typing import Union
1112

1213
from pydantic import BaseModel
@@ -174,3 +175,108 @@ class BaseSequence(XMLElement):
174175
label: Optional[str] = None
175176
# We explicitly do not set the base value.
176177
dir_: Optional[Dir] = None
178+
179+
180+
# Pydantic's BaseModel Metaclass is only importable from an internal module,
181+
# so we inspect the BaseSequence class to get its metaclass.
182+
BaseSequenceMetaclass = type(BaseSequence)
183+
184+
185+
class RegistryMeta(BaseSequenceMetaclass):
186+
"""Generic metaclass that creates separate registries for each subclass"""
187+
188+
def __new__(mcs, name, bases, attrs):
189+
cls = super().__new__(mcs, name, bases, attrs)
190+
191+
# Each metaclass gets its own registry
192+
if not hasattr(mcs, "_registry"):
193+
mcs._registry = {}
194+
195+
element_name = cls.element_name()
196+
if element_name in mcs._registry and mcs._registry[element_name] is not cls:
197+
raise ValueError(
198+
f"Element name '{element_name}' already registered in {mcs.__name__}"
199+
)
200+
mcs._registry[element_name] = cls
201+
202+
return cls
203+
204+
@classmethod
205+
def _ensure_registry_complete(cls):
206+
"""Ensure all HTML and MathML classes are registered"""
207+
if not hasattr(cls, "_registry_initialized"):
208+
# Import modules to trigger registration
209+
from contentcuration.utils.assessment.qti import html, mathml # noqa: F401
210+
211+
cls._registry_initialized = True
212+
213+
@classmethod
214+
def get_class_for_tag(cls, tag_name: str) -> Optional[Type]:
215+
"""Get the registered class for a given tag name"""
216+
cls._ensure_registry_complete()
217+
return getattr(cls, "_registry", {}).get(tag_name)
218+
219+
220+
class ElementTreeBase(BaseSequence, metaclass=RegistryMeta):
221+
@classmethod
222+
def from_element(cls, element: ET.Element) -> "ElementTreeBase":
223+
# Get the appropriate class for this tag
224+
target_class = type(cls).get_class_for_tag(element.tag)
225+
if target_class is None:
226+
raise ValueError(f"No registered class found for tag: {element.tag}")
227+
228+
# Convert attributes to field data - Pydantic will handle type coercion
229+
field_data = {}
230+
for attr_name, attr_value in element.attrib.items():
231+
field_name = cls._attr_name_to_field_name(attr_name)
232+
field_data[field_name] = attr_value
233+
234+
# Convert children and text
235+
children = cls._extract_children(element)
236+
if children:
237+
field_data["children"] = children
238+
239+
return target_class(**field_data)
240+
241+
@classmethod
242+
def _attr_name_to_field_name(cls, attr_name: str) -> str:
243+
"""Convert attribute name to Python field name"""
244+
# kebab-case -> snake_case, : -> __
245+
field_name = attr_name.replace(":", "__").replace("-", "_")
246+
247+
# Add trailing underscore for Python keywords
248+
if field_name in {"class", "for", "type", "id", "dir"}:
249+
field_name += "_"
250+
251+
return field_name
252+
253+
@classmethod
254+
def _extract_children(
255+
cls, element: ET.Element
256+
) -> List[Union["ElementTreeBase", TextNode]]:
257+
"""Extract child elements and text nodes from XML element"""
258+
children = []
259+
260+
# Add initial text if present
261+
if element.text and element.text.strip():
262+
children.append(TextNode(text=element.text))
263+
264+
# Process child elements
265+
for child_elem in element:
266+
children.append(cls.from_element(child_elem))
267+
# Add tail text after child element
268+
if child_elem.tail and child_elem.tail.strip():
269+
children.append(TextNode(text=child_elem.tail))
270+
271+
return children
272+
273+
@classmethod
274+
def from_string(cls, string: str) -> List["ElementTreeBase"]:
275+
"""Parse markup string and return list of ElementTreeBase instances"""
276+
try:
277+
# Wrap in a root element to handle multiple top-level elements
278+
wrapped_markup = f"<root>{string}</root>"
279+
root = ET.fromstring(wrapped_markup)
280+
return [cls.from_element(child) for child in root]
281+
except ET.ParseError as e:
282+
raise ValueError(f"Invalid Markup: {e}") from e

contentcuration/contentcuration/utils/assessment/qti/html/base.py

Lines changed: 3 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,22 @@
1-
import xml.etree.ElementTree as ET
2-
from typing import Dict
31
from typing import List
42
from typing import Optional
5-
from typing import Type
6-
from typing import Union
73

84
from pydantic import model_validator
95

10-
from contentcuration.utils.assessment.qti.base import BaseSequence
11-
from contentcuration.utils.assessment.qti.base import TextNode
6+
from contentcuration.utils.assessment.qti.base import ElementTreeBase
127
from contentcuration.utils.assessment.qti.fields import LocalSrcPath
138
from contentcuration.utils.assessment.qti.fields import LocalSrcSet
149

1510

16-
# Pydantic's BaseModel Metaclass is only importable from an internal module,
17-
# so we inspect the BaseSequence class to get its metaclass.
18-
BaseSequenceMetaclass = type(BaseSequence)
19-
20-
21-
class HTMLElementMeta(BaseSequenceMetaclass):
22-
"""Metaclass that auto-registers HTML element classes by their tag name"""
23-
24-
# Class registry mapping tag names to classes
25-
_registry: Dict[str, Type["HTMLElement"]] = {}
26-
27-
def __new__(mcs, name, bases, attrs):
28-
cls = super().__new__(mcs, name, bases, attrs)
29-
element_name = cls.element_name()
30-
mcs._registry[element_name] = cls
31-
return cls
32-
33-
@classmethod
34-
def get_class_for_tag(mcs, tag_name: str) -> Optional[Type["HTMLElement"]]:
35-
"""Get the registered class for a given tag name"""
36-
return mcs._registry.get(tag_name)
37-
38-
@classmethod
39-
def register_class(mcs, tag_name: str, cls: Type["HTMLElement"]):
40-
"""Manually register a class for a tag name"""
41-
mcs._registry[tag_name] = cls
42-
43-
44-
class HTMLElement(BaseSequence, metaclass=HTMLElementMeta):
11+
class HTMLElement(ElementTreeBase):
4512
"""
4613
Represents an HTML element within QTI.
4714
"""
4815

49-
@classmethod
50-
def element_name(cls):
51-
return cls.__name__.lower()
52-
53-
@classmethod
54-
def from_element(cls, element: ET.Element) -> "HTMLElement":
55-
"""Create HTMLElement instance from ET.Element"""
56-
# Get the appropriate class for this tag
57-
target_class = HTMLElementMeta.get_class_for_tag(element.tag)
58-
if target_class is None:
59-
raise ValueError(f"No registered class found for tag: {element.tag}")
60-
61-
# Convert attributes to field data - Pydantic will handle type coercion
62-
field_data = {}
63-
for attr_name, attr_value in element.attrib.items():
64-
field_name = cls._attr_name_to_field_name(attr_name)
65-
field_data[field_name] = attr_value
66-
67-
# Convert children and text
68-
children = cls._extract_children(element)
69-
if children:
70-
field_data["children"] = children
71-
72-
return target_class(**field_data)
73-
74-
@classmethod
75-
def _attr_name_to_field_name(cls, attr_name: str) -> str:
76-
"""Convert HTML attribute name to Python field name"""
77-
# kebab-case -> snake_case, : -> __
78-
field_name = attr_name.replace(":", "__").replace("-", "_")
79-
80-
# Add trailing underscore for Python keywords
81-
if field_name in {"class", "for", "type", "id", "dir"}:
82-
field_name += "_"
83-
84-
return field_name
85-
86-
@classmethod
87-
def _extract_children(
88-
cls, element: ET.Element
89-
) -> List[Union["HTMLElement", TextNode]]:
90-
"""Extract child elements and text nodes from XML element"""
91-
children = []
92-
93-
# Add initial text if present
94-
if element.text and element.text.strip():
95-
children.append(TextNode(text=element.text))
96-
97-
# Process child elements
98-
for child_elem in element:
99-
children.append(cls.from_element(child_elem))
100-
# Add tail text after child element
101-
if child_elem.tail and child_elem.tail.strip():
102-
children.append(TextNode(text=child_elem.tail))
103-
104-
return children
105-
10616
@classmethod
10717
def from_html_string(cls, html_string: str) -> List["HTMLElement"]:
10818
"""Parse HTML string and return list of HTMLElement instances"""
109-
try:
110-
# Wrap in a root element to handle multiple top-level elements
111-
wrapped_html = f"<root>{html_string}</root>"
112-
root = ET.fromstring(wrapped_html)
113-
return [cls.from_element(child) for child in root]
114-
except ET.ParseError as e:
115-
raise ValueError(f"Invalid HTML: {e}") from e
19+
return cls.from_string(html_string)
11620

11721

11822
class FlowContentElement(HTMLElement):

contentcuration/contentcuration/utils/assessment/qti/html/content_types.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
from contentcuration.utils.assessment.qti.base import TextType
55
from contentcuration.utils.assessment.qti.html.base import FlowContentElement
66
from contentcuration.utils.assessment.qti.html.base import InlineContentElement
7+
from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
78
from contentcuration.utils.assessment.qti.interaction_types.base import (
89
InlineInteraction,
910
)
11+
from contentcuration.utils.assessment.qti.mathml import Math
1012

1113

1214
FlowContent = Union[FlowContentElement, TextType]
@@ -26,8 +28,17 @@
2628
# InlineChoiceInteraction,
2729
# EndAttemptInteraction,
2830
# CustomInteraction,
29-
# Math,
31+
Math,
3032
# Include,
3133
]
3234

3335
InlineGroupList = List[Union[InlineGroup, TextType]]
36+
37+
FlowGroup = Union[
38+
FlowContentElement,
39+
BlockInteraction,
40+
InlineInteraction,
41+
Math,
42+
]
43+
44+
FlowGroupList = List[Union[FlowGroup, TextType]]

contentcuration/contentcuration/utils/assessment/qti/html/flow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pydantic import HttpUrl
55

66
from contentcuration.utils.assessment.qti.html.base import BlockContentElement
7-
from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
7+
from contentcuration.utils.assessment.qti.html.content_types import FlowGroupList
88

99

1010
class HTMLFlowContainer(BlockContentElement):
@@ -14,7 +14,7 @@ class HTMLFlowContainer(BlockContentElement):
1414
Corresponds to HTML "Flow Content" category.
1515
"""
1616

17-
children: FlowContentList = Field(default_factory=list)
17+
children: FlowGroupList = Field(default_factory=list)
1818

1919

2020
class Blockquote(HTMLFlowContainer):
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from .base import MathMLElement
2+
from .core import Annotation
3+
from .core import AnnotationXml
4+
from .core import Maction
5+
from .core import Math
6+
from .core import Merror
7+
from .core import Mfrac
8+
from .core import Mi
9+
from .core import Mmultiscripts
10+
from .core import Mn
11+
from .core import Mo
12+
from .core import Mover
13+
from .core import Mpadded
14+
from .core import Mphantom
15+
from .core import Mprescripts
16+
from .core import Mroot
17+
from .core import Mrow
18+
from .core import Ms
19+
from .core import Mspace
20+
from .core import Msqrt
21+
from .core import Mstyle
22+
from .core import Msub
23+
from .core import Msubsup
24+
from .core import Msup
25+
from .core import Mtable
26+
from .core import Mtd
27+
from .core import Mtext
28+
from .core import Mtr
29+
from .core import Munder
30+
from .core import Munderover
31+
from .core import Semantics
32+
from .fields import MathMLDisplay
33+
from .fields import MathMLForm
34+
35+
__all__ = [
36+
"MathMLElement",
37+
# Root element
38+
"Math",
39+
# Token elements
40+
"Mi",
41+
"Mn",
42+
"Mo",
43+
"Mtext",
44+
"Ms",
45+
"Mspace",
46+
# Layout elements
47+
"Mrow",
48+
"Mfrac",
49+
"Msqrt",
50+
"Mroot",
51+
"Mpadded",
52+
# Script elements
53+
"Msub",
54+
"Msup",
55+
"Msubsup",
56+
"Munder",
57+
"Mover",
58+
"Munderover",
59+
"Mmultiscripts",
60+
"Mprescripts",
61+
# Table elements
62+
"Mtd",
63+
"Mtr",
64+
"Mtable",
65+
# Grouping elements
66+
"Mstyle",
67+
"Merror",
68+
"Mphantom",
69+
"Maction",
70+
# Semantic elements
71+
"Annotation",
72+
"AnnotationXml",
73+
"Semantics",
74+
# enums
75+
"MathMLForm",
76+
"MathMLDisplay",
77+
]

0 commit comments

Comments
 (0)