Skip to content

Commit 0803857

Browse files
committed
Sanitize metadata input
1 parent a0f50e5 commit 0803857

6 files changed

Lines changed: 236 additions & 4 deletions

File tree

geonode/metadata/handlers/abstract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def _set_error(errors: dict, path: list, msg: str):
160160

161161
@staticmethod
162162
def localize_message(context: dict, msg_code: str, msg_info: dict):
163-
msg_loc: str = labelResolver.gettext(msg_code)
163+
msg_loc: str = labelResolver.gettext(msg_code, lang=context.get("lang", None))
164164
if msg_loc:
165165
tokens = defaultdict(lambda: "N/A", msg_info or {})
166166
return msg_loc.format_map(tokens)

geonode/metadata/handlers/meta.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#########################################################################
2+
#
3+
# Copyright (C) 2026 OSGeo
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
#########################################################################
19+
20+
import logging
21+
from html import unescape
22+
import re
23+
24+
from bs4 import BeautifulSoup
25+
26+
from geonode.base.models import ResourceBase
27+
from geonode.metadata.handlers.abstract import MetadataHandler
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class CleanupHandler(MetadataHandler):
33+
_HTML_LIKE_PATTERN = re.compile(r"<\s*/?\s*[a-zA-Z][^>]*>")
34+
_DANGEROUS_TAGS = ("script", "style", "noscript", "iframe", "object", "embed")
35+
36+
@staticmethod
37+
def _preview(value, max_len=120):
38+
text = repr(value)
39+
return text if len(text) <= max_len else f"{text[: max_len - 1]}…"
40+
41+
@classmethod
42+
def _sanitize_string(cls, value: str):
43+
normalized = unescape(value)
44+
if not cls._HTML_LIKE_PATTERN.search(normalized):
45+
return value, False
46+
47+
soup = BeautifulSoup(normalized, "html.parser")
48+
for tag in soup(cls._DANGEROUS_TAGS):
49+
tag.decompose()
50+
51+
sanitized = soup.get_text()
52+
return sanitized, sanitized != value
53+
54+
def _sanitize_instance(self, value, context, errors, path=None):
55+
path = path or []
56+
57+
if isinstance(value, dict):
58+
for key, nested_value in list(value.items()):
59+
nested_path = path + [str(key)]
60+
value[key] = self._sanitize_instance(nested_value, context, errors, nested_path)
61+
return value
62+
63+
if isinstance(value, list):
64+
for idx, nested_value in enumerate(list(value)):
65+
nested_path = path + [f"[{idx}]"]
66+
value[idx] = self._sanitize_instance(nested_value, context, errors, nested_path)
67+
return value
68+
69+
if isinstance(value, str):
70+
sanitized, changed = self._sanitize_string(value)
71+
if changed:
72+
logger.warning(
73+
"Sanitized potentially unsafe metadata field '%s': %s -> %s",
74+
".".join(path) if path else "ROOT",
75+
self._preview(value),
76+
self._preview(sanitized),
77+
)
78+
self._set_error(
79+
errors,
80+
path[0:1], # set error on root field
81+
self.localize_message(context, "metadata_error_sanitized", {}),
82+
)
83+
return sanitized
84+
85+
return value
86+
87+
def update_schema(self, jsonschema: dict, context: dict, lang=None):
88+
return jsonschema
89+
90+
def get_jsonschema_instance(
91+
self, resource: ResourceBase, field_name: str, context: dict, errors: dict, lang: str = None
92+
):
93+
pass
94+
95+
def update_resource(
96+
self, resource: ResourceBase, field_name: str, json_instance: dict, context: dict, errors: dict, **kwargs
97+
):
98+
pass
99+
100+
def pre_deserialization(self, resource, jsonschema: dict, instance: dict, partial: set, context: dict):
101+
errors = context["errors"]
102+
self._sanitize_instance(instance, context, errors)

geonode/metadata/manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def post_init(self):
5959
handler.post_init()
6060

6161
def _init_schema_context(self, lang):
62-
return {}
62+
return {"lang": lang} if lang else {}
6363

6464
def build_schema(self, lang=None):
6565
logger.debug(f"build_schema {lang}")
@@ -144,6 +144,7 @@ def update_schema_instance(self, resource, request_obj, lang=None, partial=None)
144144
handler.load_deserialization_context(resource, schema, context)
145145

146146
errors = {}
147+
context["errors"] = errors
147148

148149
for handler in self.handlers.values():
149150
handler.pre_deserialization(resource, schema, json_instance, partial, context)

geonode/metadata/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
JSONSCHEMA_BASE = os.path.join(PROJECT_ROOT, "metadata/schemas/base.json")
1414

1515
METADATA_HANDLERS = {
16+
"metadata_cleaner": "geonode.metadata.handlers.meta.CleanupHandler",
1617
"base": "geonode.metadata.handlers.base.BaseHandler",
1718
"thesaurus": "geonode.metadata.handlers.thesaurus.TKeywordsHandler",
1819
"hkeyword": "geonode.metadata.handlers.hkeyword.HKeywordHandler",

geonode/metadata/tests/tests.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@
2525

2626
from django.urls import reverse
2727
from django.contrib.auth import get_user_model
28-
from django.test import RequestFactory
28+
from django.test import RequestFactory, TestCase, override_settings
2929
from rest_framework import status
3030
from django.utils.translation import gettext as _
3131

3232
from rest_framework.test import APITestCase
33+
34+
from geonode.metadata.handlers.multilang import MultiLangHandler
3335
from geonode.metadata.settings import MODEL_SCHEMA
3436
from geonode.metadata.manager import metadata_manager, CACHE_KEY_SCHEMA
37+
from geonode.metadata.handlers.meta import CleanupHandler
3538
from geonode.metadata.api.views import (
3639
ProfileAutocomplete,
3740
MetadataLinkedResourcesAutocomplete,
@@ -919,7 +922,7 @@ def test_update_schema_instance_no_errors(self, mock_get_schema):
919922
mock_request.data = {"field1": "new_value1", "new_field2": "new_value2"}
920923
mock_request.user = self.test_user_1
921924

922-
expected_context = {"user": self.test_user_1}
925+
expected_context = {"user": self.test_user_1, "errors": {}}
923926

924927
mock_get_schema.return_value = self.fake_schema
925928

@@ -1151,3 +1154,58 @@ def test_delete_schema_conflict_returns_409(self, mock_get_schema):
11511154
url = self._url(self.resource.pk, "title")
11521155
response = self.client.delete(url)
11531156
self.assertEqual(response.status_code, status.HTTP_409_CONFLICT)
1157+
1158+
1159+
class CleanupHandlerTests(TestCase):
1160+
def setUp(self):
1161+
self.handler = CleanupHandler()
1162+
self.owner = get_user_model().objects.create_user(
1163+
"cleanup_owner", "cleanup_owner@fakemail.com", "cleanup_owner_password", is_active=True
1164+
)
1165+
self.resource = ResourceBase.objects.create(title="Cleanup Test Resource", uuid=str(uuid4()), owner=self.owner)
1166+
1167+
@override_settings(LANGUAGE_CODE="en")
1168+
def test_pre_deserialization_sanitizes_nested_values_and_logs_warnings(self):
1169+
instance = {
1170+
"title": "<i>xss</i><img src=/ onerror=\"alert('XSS');\" />",
1171+
"details": {
1172+
"summary": "plain text",
1173+
"body": "<script>alert(1)</script>safe",
1174+
},
1175+
"items": ["ok", "<b>bad</b>"],
1176+
"count": 3,
1177+
}
1178+
1179+
with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm:
1180+
context = {"errors": {}}
1181+
self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)
1182+
1183+
self.assertEqual(instance["title"], "xss")
1184+
self.assertEqual(instance["details"]["body"], "safe")
1185+
self.assertEqual(instance["items"][1], "bad")
1186+
self.assertEqual(instance["count"], 3)
1187+
1188+
logs = "\n".join(cm.output)
1189+
self.assertIn("Sanitized potentially unsafe metadata field 'title'", logs)
1190+
self.assertIn("Sanitized potentially unsafe metadata field 'details.body'", logs)
1191+
self.assertIn("Sanitized potentially unsafe metadata field 'items.[1]'", logs)
1192+
1193+
self.assertIn("title", context["errors"])
1194+
self.assertIn("__errors", context["errors"]["title"])
1195+
self.assertIn("metadata_error_sanitized", context["errors"]["title"]["__errors"])
1196+
1197+
@override_settings(LANGUAGE_CODE="en", MULTILANG_FIELDS=["title"])
1198+
def test_pre_deserialization_copies_sanitized_default_lang_value(self):
1199+
instance = {
1200+
"title_multilang_en": '<span>Hello</span><img src=x onerror="alert(1)" />',
1201+
}
1202+
context = {"errors": {}}
1203+
1204+
ml_handler = MultiLangHandler()
1205+
with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm:
1206+
self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)
1207+
ml_handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)
1208+
1209+
self.assertEqual(instance["title_multilang_en"], "Hello")
1210+
self.assertEqual(instance["title"], "Hello")
1211+
self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output))
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?xml version='1.0' encoding='UTF-8'?>
2+
<rdf:RDF xmlns="http://www.w3.org/2004/02/skos/core#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/">
3+
<ConceptScheme rdf:about="https://i18n.geonode.org">
4+
<dc:title>Localizzazione labels</dc:title>
5+
<dc:title xml:lang="en">Labels localization</dc:title>
6+
<dc:title xml:lang="it">Localizzazione labels</dc:title>
7+
<dcterms:issued>2026-06-10T16:54:03</dcterms:issued>
8+
<dcterms:modified>2026-06-10T16:54:03</dcterms:modified>
9+
</ConceptScheme>
10+
<Concept rdf:about="metadata_error_empty_field">
11+
<inScheme rdf:resource="https://i18n.geonode.org"/>
12+
<altLabel>metadata_error_empty_field fieldname:{fieldname}</altLabel>
13+
<prefLabel xml:lang="en">Missing value</prefLabel>
14+
<prefLabel xml:lang="it">Valore richiesto</prefLabel>
15+
</Concept>
16+
<Concept rdf:about="metadata_error_indexing">
17+
<inScheme rdf:resource="https://i18n.geonode.org"/>
18+
<altLabel>metadata_error_indexing exc:{exc}</altLabel>
19+
<prefLabel xml:lang="en">Error while indexing metadata: {exc}</prefLabel>
20+
<prefLabel xml:lang="it">Errore nell'indicizzazione dei metadati: {exc}</prefLabel>
21+
</Concept>
22+
<Concept rdf:about="metadata_error_post_save">
23+
<inScheme rdf:resource="https://i18n.geonode.org"/>
24+
<altLabel>metadata_error_post_save handler:{handler} exc:{exc}</altLabel>
25+
<prefLabel xml:lang="en">Error in post-save procedure: {exc}</prefLabel>
26+
<prefLabel xml:lang="it">Errore nella procedura di post-save: {exc}</prefLabel>
27+
</Concept>
28+
<Concept rdf:about="metadata_error_pre_save">
29+
<inScheme rdf:resource="https://i18n.geonode.org"/>
30+
<altLabel>metadata_error_pre_save handler:{handler} exc:{exc}</altLabel>
31+
<prefLabel xml:lang="en">Error in pre-save procedure: {exc}</prefLabel>
32+
<prefLabel xml:lang="it">Errore nella procedura di pre-save: {exc}</prefLabel>
33+
</Concept>
34+
<Concept rdf:about="metadata_error_sanitized">
35+
<inScheme rdf:resource="https://i18n.geonode.org"/>
36+
<altLabel>metadata_error_sanitized</altLabel>
37+
<prefLabel xml:lang="en">WARNING: possible injection attempt, this field has been sanitized. Reload this page.</prefLabel>
38+
<prefLabel xml:lang="it">ATTENZIONE: possibile injection, il campo è stato modificato. Ricaricare la pagina.</prefLabel>
39+
</Concept>
40+
<Concept rdf:about="metadata_error_save">
41+
<inScheme rdf:resource="https://i18n.geonode.org"/>
42+
<altLabel>metadata_error_save: {exc}</altLabel>
43+
<prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel>
44+
<prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel>
45+
</Concept>
46+
<Concept rdf:about="metadata_error_store">
47+
<inScheme rdf:resource="https://i18n.geonode.org"/>
48+
<altLabel>metadata_error_store fieldname:{fieldname} exc:{exc}</altLabel>
49+
<prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel>
50+
<prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel>
51+
</Concept>
52+
<Concept rdf:about="metadata_error_update">
53+
<inScheme rdf:resource="https://i18n.geonode.org"/>
54+
<altLabel>metadata_error_update fieldname:{fieldname} handler:{handler} exc:{exc}</altLabel>
55+
<prefLabel xml:lang="en">Error while updating metadata: {exc}</prefLabel>
56+
<prefLabel xml:lang="it">Errore nell'aggiornamento dei metadati: {exc}</prefLabel>
57+
</Concept>
58+
<Concept rdf:about="metadata_sparse_error_parse">
59+
<inScheme rdf:resource="https://i18n.geonode.org"/>
60+
<altLabel>metadata_sparse_error_parse fieldname:{fieldname} type:{type} value:{value}</altLabel>
61+
<prefLabel xml:lang="en">Parsing error</prefLabel>
62+
<prefLabel xml:lang="it">Errore nel parsing</prefLabel>
63+
</Concept>
64+
<Concept rdf:about="metadata_sparse_error_type">
65+
<inScheme rdf:resource="https://i18n.geonode.org"/>
66+
<altLabel>metadata_sparse_error_type fieldname:{fieldname} type:{type}</altLabel>
67+
<prefLabel xml:lang="en">Unexpected field type: {type}</prefLabel>
68+
<prefLabel xml:lang="it">Tipo inaspettato: {type}</prefLabel>
69+
</Concept>
70+
</rdf:RDF>

0 commit comments

Comments
 (0)