Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ venv/
ENV/
env.bak/
venv.bak/

# Data files
problem/data/*
5 changes: 0 additions & 5 deletions backend/example/apps.py

This file was deleted.

3 changes: 0 additions & 3 deletions backend/example/models.py

This file was deleted.

11 changes: 0 additions & 11 deletions backend/example/views.py

This file was deleted.

39 changes: 36 additions & 3 deletions backend/langpro_annotator/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@
# cf. https://github.com/iMerica/dj-rest-auth/pull/110.
'allauth.socialaccount',
'user',

'revproxy',
'example'
'problem',
]

MIDDLEWARE = [
Expand Down Expand Up @@ -67,4 +66,38 @@

REST_AUTH = {
"USER_DETAILS_SERIALIZER": "user.serializers.CustomUserDetailsSerializer",
}
}

LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"verbose": {
"format": "{levelname} {asctime} {module} {message}",
"style": "{",
},
"simple": {
"format": "{levelname} {message}",
"style": "{",
},
},
"handlers": {
"console": {
"level": "INFO",
"class": "logging.StreamHandler",
"formatter": "simple",
},
},
"loggers": {
"django": {
"handlers": ["console"],
"level": "INFO",
"propagate": False,
},
"LangProAnnotator": {
"handlers": ["console"],
"level": "INFO",
"propagate": False,
},
},
}
3 changes: 3 additions & 0 deletions backend/langpro_annotator/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger('LangProAnnotator')
4 changes: 3 additions & 1 deletion backend/langpro_annotator/proxy_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from django.views.decorators.csrf import ensure_csrf_cookie

from revproxy.views import ProxyView

view = ProxyView.as_view(upstream=settings.PROXY_FRONTEND)


@ensure_csrf_cookie
def proxy_frontend(*args, **kwargs):
""" Wrapper for calls to the SPA ensuring the precense of a CSRF cookie."""
"""Wrapper for calls to the SPA ensuring the presence of a CSRF cookie."""
global view
return view(*args, **kwargs)
33 changes: 17 additions & 16 deletions backend/langpro_annotator/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
1. Import the include() function: from django.conf.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""

from django.conf import settings
from django.urls import path, re_path, include
from django.contrib import admin
Expand All @@ -24,28 +25,28 @@
from .proxy_frontend import proxy_frontend
from .i18n import i18n

from example.views import hooray as ExampleView # DELETEME, see below

api_router = routers.DefaultRouter() # register viewsets with this router


if settings.PROXY_FRONTEND:
spa_url = re_path(r'^(?P<path>.*)$', proxy_frontend)
spa_url = re_path(r"^(?P<path>.*)$", proxy_frontend)
else:
spa_url = re_path(r'', index)
spa_url = re_path(r"", index)

urlpatterns = [
path('api/example/', ExampleView), # this is just an example, please delete and utilize router above.
path('admin', RedirectView.as_view(url='/admin/', permanent=True)),
path('api', RedirectView.as_view(url='/api/', permanent=True)),
path('api-auth', RedirectView.as_view(url='/api-auth/', permanent=True)),
path('admin/', admin.site.urls),
path('api/', include(api_router.urls)),
path('api-auth/', include(
'rest_framework.urls',
namespace='rest_framework',
)),
path('api/i18n/', i18n),path("users/", include("user.urls")),

path("admin", RedirectView.as_view(url="/admin/", permanent=True)),
path("api", RedirectView.as_view(url="/api/", permanent=True)),
path("api-auth", RedirectView.as_view(url="/api-auth/", permanent=True)),
path("admin/", admin.site.urls),
path("api/", include(api_router.urls)),
path(
"api-auth/",
include(
"rest_framework.urls",
namespace="rest_framework",
),
),
path("api/i18n/", i18n),
path("users/", include("user.urls")),
spa_url, # catch-all; unknown paths to be handled by a SPA
]
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions backend/problem/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ProblemConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "problem"
125 changes: 125 additions & 0 deletions backend/problem/management/commands/import_fracas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import json
import xml.etree.ElementTree as ET

from django.core.management.base import BaseCommand
from django.db import transaction
from tqdm import tqdm

from langpro_annotator.logger import logger
from problem.services import get_fracas_problems
from problem.models import Problem


class Command(BaseCommand):
help = "Import FraCaS problems from fracas.xml."

def add_arguments(self, parser):
parser.add_argument(
"--fracas_path",
type=str,
default="problem/data/fracas.xml",
help="Path to the fracas.xml file.",
)

def handle(self, *args, **options):
fracas_path = options["fracas_path"]
self.import_fracas_problems(fracas_path)

@staticmethod
def _text_from_element(element: ET.Element) -> str:
"""
Extracts stripped text from an XML element, returning an empty string if the element is None or has no text.
"""
return element.text.strip() if element is not None and element.text else ""

@staticmethod
def _annotate_section_subsections(tree: ET.ElementTree) -> None:
"""
Annotates each problem in the XML tree with its corresponding section, subsection, and subsubsection.
"""
current_section = None
current_subsection = None
current_subsubsection = None

root = tree.getroot()

for element in root:
if element.tag == "comment" and element.attrib.get("class") == "section":
current_section = element.text.strip()
elif (
element.tag == "comment" and element.attrib.get("class") == "subsection"
):
current_subsection = element.text.strip()
elif (
element.tag == "comment"
and element.attrib.get("class") == "subsubsection"
):
current_subsubsection = element.text.strip()
elif element.tag == "problem":
if current_section:
element.set("section", current_section)
if current_subsection:
element.set("subsection", current_subsection)
if current_subsubsection:
element.set("subsubsection", current_subsubsection)

def import_fracas_problems(self, fracas_path: str) -> None:
tree = ET.parse(fracas_path)
self._annotate_section_subsections(tree)
root = tree.getroot()
all_problems = root.findall("problem")

created = 0
skipped = 0

existing_fracas_problems = get_fracas_problems()
existing_fracas_ids = {p.fracas_id for p in existing_fracas_problems}

for problem in tqdm(all_problems, desc="Importing FraCaS problems"):
problem_id = problem.get("id")
if problem_id is None:
raise ValueError(
"Problem ID is missing in the XML file for problem: {}".format(
problem
)
)

if int(problem_id) in existing_fracas_ids:
skipped += 1
continue

question = self._text_from_element(problem.find("q"))
hypothesis = self._text_from_element(problem.find("h"))
answer = self._text_from_element(problem.find("a"))
note = self._text_from_element(problem.find("note"))

section = problem.get("section")
subsection = problem.get("subsection")
fracas_answer = problem.get("fracas_answer")
fracas_nonstandard = problem.get("fracas_nonstandard", False) == "true"

premise_nodes = problem.findall("p")
premises = [node.text.strip() for node in premise_nodes if node.text]

Problem.objects.create(
type=Problem.ProblemType.FRACAS,
content=json.dumps(
{
"fracas_id": int(problem_id),
"question": question,
"hypothesis": hypothesis,
"answer": answer,
"fracas_answer": fracas_answer,
"fracas_non_standard": fracas_nonstandard,
"note": note,
"section_name": section,
"subsection_name": subsection,
"premises": premises,
}
),
)
created += 1

logger.info(
f"FraCaS problems import complete! Total: {created} | Skipped: {skipped}"
)
55 changes: 55 additions & 0 deletions backend/problem/management/commands/import_sick.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import csv
import json

from django.core.management.base import BaseCommand
from tqdm import tqdm

from langpro_annotator.logger import logger
from problem.models import Problem
from problem.services import get_sick_problems


class Command(BaseCommand):
help = "Import SICK problems from SICK.txt (a TSV file)."

def add_arguments(self, parser):
parser.add_argument(
"--sick_path",
type=str,
default="problem/data/SICK.txt",
help="Path to the SICK.txt file.",
)

def handle(self, *args, **options):
sick_path = options["sick_path"]
self.import_sick_problems(sick_path)

def import_sick_problems(self, sick_path: str) -> None:
"""
Import SICK problems from SICK.txt (a TSV file) and enter them into the database.
"""

skipped = 0
created = 0

existing_sick_problems = get_sick_problems()
existing_pair_ids = {p.pair_id for p in existing_sick_problems}

with open(sick_path, "r", encoding="utf-8") as file:
reader = csv.DictReader(file, delimiter="\t")
problem_list = list(reader)

for problem in tqdm(problem_list, desc="Importing SICK problems"):
if problem["pair_ID"] in existing_pair_ids:
Comment thread
XanderVertegaal marked this conversation as resolved.
skipped += 1
continue

created += 1
Problem.objects.create(
type=Problem.ProblemType.SICK,
content=json.dumps(problem),
)

logger.info(
f"SICK problems import complete! Created: {created} | Skipped: {skipped}"
)
34 changes: 34 additions & 0 deletions backend/problem/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 4.2.20 on 2025-05-22 13:40

from django.db import migrations, models


class Migration(migrations.Migration):

initial = True

dependencies = []

operations = [
migrations.CreateModel(
name="Problem",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"type",
models.CharField(
choices=[("sick", "Sick"), ("fracas", "FraCaS")], max_length=255
),
),
("content", models.JSONField()),
],
),
]
Empty file.
14 changes: 14 additions & 0 deletions backend/problem/models.py
Comment thread
XanderVertegaal marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from django.db import models


class Problem(models.Model):
class ProblemType(models.TextChoices):
SICK = "sick", "Sick"
FRACAS = "fracas", "FraCaS"

type = models.CharField(
max_length=255,
choices=ProblemType.choices,
)

content = models.JSONField()
Loading