|
| 1 | +import xml.etree.ElementTree as ET |
| 2 | + |
| 3 | +from django.core.management.base import BaseCommand |
| 4 | +from django.db import transaction |
| 5 | + |
| 6 | +from problem.models import FracasPremise, FracasProblem |
| 7 | +from problem.utils import progress |
| 8 | + |
| 9 | + |
| 10 | +class Command(BaseCommand): |
| 11 | + help = "Import FraCaS problems from fracas.xml." |
| 12 | + |
| 13 | + def add_arguments(self, parser): |
| 14 | + parser.add_argument( |
| 15 | + "--fracas_path", |
| 16 | + type=str, |
| 17 | + default="problem/data/fracas.xml", |
| 18 | + help="Path to the fracas.xml file.", |
| 19 | + ) |
| 20 | + |
| 21 | + def handle(self, *args, **options): |
| 22 | + fracas_path = options["fracas_path"] |
| 23 | + self.import_fracas_problems(fracas_path) |
| 24 | + |
| 25 | + def annotate_section_subsections(self, tree: ET.ElementTree) -> None: |
| 26 | + current_section = None |
| 27 | + current_subsection = None |
| 28 | + current_subsubsection = None |
| 29 | + |
| 30 | + root = tree.getroot() |
| 31 | + |
| 32 | + for element in root: |
| 33 | + if element.tag == "comment" and element.attrib.get("class") == "section": |
| 34 | + current_section = element.text.strip() |
| 35 | + elif ( |
| 36 | + element.tag == "comment" and element.attrib.get("class") == "subsection" |
| 37 | + ): |
| 38 | + current_subsection = element.text.strip() |
| 39 | + elif element.tag == "comment" and element.attrib.get("class") == "subsubsection": |
| 40 | + current_subsubsection = element.text.strip() |
| 41 | + elif element.tag == "problem": |
| 42 | + if current_section: |
| 43 | + element.set("section", current_section) |
| 44 | + if current_subsection: |
| 45 | + element.set("subsection", current_subsection) |
| 46 | + if current_subsubsection: |
| 47 | + element.set("subsubsection", current_subsubsection) |
| 48 | + |
| 49 | + def import_fracas_problems(self, fracas_path: str) -> None: |
| 50 | + # Parse the XML file |
| 51 | + tree = ET.parse(fracas_path) |
| 52 | + self.annotate_section_subsections(tree) |
| 53 | + root = tree.getroot() |
| 54 | + |
| 55 | + all_problems = root.findall("problem") |
| 56 | + total = len(all_problems) |
| 57 | + n = 1 |
| 58 | + |
| 59 | + skipped = 0 |
| 60 | + |
| 61 | + def text_from_element(element: ET.Element) -> str: |
| 62 | + """ |
| 63 | + Extracts stripped text from an XML element, returning an empty string if the element is None or has no text. |
| 64 | + """ |
| 65 | + return element.text.strip() if element is not None and element.text else "" |
| 66 | + |
| 67 | + for problem in root.findall("problem"): |
| 68 | + problem_id = problem.get("id") |
| 69 | + |
| 70 | + if problem_id is None: |
| 71 | + raise ValueError( |
| 72 | + "Problem ID is missing in the XML file for problem: {}".format( |
| 73 | + problem |
| 74 | + ) |
| 75 | + ) |
| 76 | + |
| 77 | + progress(n, total) |
| 78 | + n += 1 |
| 79 | + |
| 80 | + if FracasProblem.objects.filter(fracas_id=problem_id).exists(): |
| 81 | + skipped += 1 |
| 82 | + continue |
| 83 | + |
| 84 | + question = text_from_element(problem.find("q")) |
| 85 | + hypothesis = text_from_element(problem.find("h")) |
| 86 | + answer = text_from_element(problem.find("a")) |
| 87 | + note = text_from_element(problem.find("note")) |
| 88 | + |
| 89 | + section = problem.get("section") |
| 90 | + subsection = problem.get("subsection") |
| 91 | + fracas_answer = problem.get("fracas_answer") |
| 92 | + fracas_nonstandard = problem.get("fracas_nonstandard", False) == "true" |
| 93 | + |
| 94 | + with transaction.atomic(): |
| 95 | + fracas_problem = FracasProblem.objects.create( |
| 96 | + fracas_id=int(problem_id), |
| 97 | + question=question, |
| 98 | + hypothesis=hypothesis, |
| 99 | + answer=answer, |
| 100 | + fracas_answer=fracas_answer, |
| 101 | + fracas_non_standard=fracas_nonstandard, |
| 102 | + note=note, |
| 103 | + section_name=section, |
| 104 | + subsection_name=subsection, |
| 105 | + ) |
| 106 | + |
| 107 | + premises = problem.findall("p") |
| 108 | + for premise in premises: |
| 109 | + premise_index = premise.get("idx", None) |
| 110 | + if premise_index is None: |
| 111 | + raise ValueError( |
| 112 | + "Premise index is missing in the XML file for problem: {}".format( |
| 113 | + problem |
| 114 | + ) |
| 115 | + ) |
| 116 | + FracasPremise.objects.create( |
| 117 | + fracas_problem=fracas_problem, |
| 118 | + premise_index=int(premise_index), |
| 119 | + premise=premise.text.strip() if premise.text else "", |
| 120 | + ) |
| 121 | + |
| 122 | + print(f"FraCaS problems import complete! Total: {total} | Skipped: {skipped}") |
0 commit comments