Skip to content

Commit c78706c

Browse files
[Fixes #14000] Refact extraMetadata (#14286) (#14351)
* Squashed PR #14001 (cherry picked from commit a9ade43) Co-authored-by: Emanuele Tajariol <etj@geo-solutions.it>
1 parent 2f11330 commit c78706c

16 files changed

Lines changed: 688 additions & 315 deletions

File tree

geonode/api/resourcebase_api.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#
1818
#########################################################################
1919
from geonode.base.enumerations import LAYER_TYPES
20+
import json
2021
import logging
2122

2223
from django.db.models import Q
@@ -45,6 +46,7 @@
4546
from geonode.groups.models import GroupProfile
4647
from geonode.utils import check_ogc_backend
4748
from geonode.security.utils import get_visible_resources
49+
from geonode.metadata.models import SparseField
4850
from .authentication import OAuthAuthentication
4951
from .authorization import GeoNodeAuthorization, GeonodeApiKeyAuthentication
5052

@@ -133,6 +135,11 @@ class CommonModelApi(ModelResource):
133135
"metadata_only",
134136
]
135137

138+
@staticmethod
139+
def _extract_deprecated_metadata_filters(filters):
140+
"""Extract legacy ``metadata__*`` query params for sparse-field lookup."""
141+
return {key: value for key, value in filters.items() if key.startswith("metadata__")}
142+
136143
def build_filters(self, filters=None, ignore_bad_filters=False, **kwargs):
137144
if filters is None:
138145
filters = {}
@@ -142,7 +149,9 @@ def build_filters(self, filters=None, ignore_bad_filters=False, **kwargs):
142149
if "app_type__in" in filters:
143150
orm_filters.update({"resource_type": filters["app_type__in"].lower()})
144151

145-
_metadata = {f"metadata__{_k}": _v for _k, _v in filters.items() if _k.startswith("metadata__")}
152+
# Deprecated compatibility: keep supporting metadata__* filters
153+
# by mapping them to SparseField lookups in apply_filters.
154+
_metadata = self._extract_deprecated_metadata_filters(filters)
146155
if _metadata:
147156
orm_filters.update({"metadata_filters": _metadata})
148157

@@ -208,7 +217,7 @@ def apply_filters(self, request, applicable_filters):
208217
filtered = self.filter_h_keywords(filtered, keywords)
209218

210219
if metadata_filters:
211-
filtered = filtered.filter(**metadata_filters)
220+
filtered = self.filter_sparse_fields(filtered, metadata_filters)
212221

213222
# return filtered
214223
return get_visible_resources(
@@ -236,6 +245,75 @@ def filter_h_keywords(self, queryset, keywords):
236245
filtered = queryset
237246
return filtered
238247

248+
def filter_sparse_fields(self, queryset, metadata_filters):
249+
"""
250+
Filter queryset by sparse field values (metadata custom fields).
251+
252+
Queryset is filtered by interrogating SparseField entries that match
253+
the given metadata filter specifications.
254+
255+
Args:
256+
queryset: ResourceBase queryset to filter
257+
metadata_filters: dict with keys like "metadata__key" and values to match
258+
259+
Returns:
260+
Filtered queryset containing only resources with matching sparse fields
261+
"""
262+
if not metadata_filters:
263+
return queryset
264+
265+
filtered_pks = set()
266+
found_metadata_filter = False
267+
268+
for filter_key, filter_value in metadata_filters.items():
269+
# Extract field name from "metadata__fieldname"
270+
if not filter_key.startswith("metadata__"):
271+
continue
272+
273+
found_metadata_filter = True
274+
field_name = filter_key[len("metadata__") :]
275+
276+
# Text prefilter to reduce the set to deserialize; the actual
277+
# semantic match is performed after json.loads.
278+
sparse_fields = (
279+
SparseField.objects.filter(name__startswith="extra_")
280+
.filter(value__icontains=field_name)
281+
.filter(value__icontains=str(filter_value))
282+
)
283+
284+
batch_pks = set()
285+
for sf in sparse_fields:
286+
try:
287+
# ExtraMetadata was a JSONfield
288+
stored_value = json.loads(sf.value) if sf.value and sf.value.startswith("{") else sf.value
289+
except (json.JSONDecodeError, TypeError):
290+
logger.warning(
291+
f"Bad migrated ExtraMetadata into SparseField: {sf.name} for resource {sf.resource.id}:{sf.resource.title}"
292+
)
293+
continue
294+
295+
if not isinstance(stored_value, dict):
296+
logger.warning(
297+
f"Unexpected non-dict value in SparseField: {sf.name} for resource {sf.resource.id}:{sf.resource.title}"
298+
)
299+
continue
300+
301+
# Compare values in a type-agnostic way.
302+
if str(stored_value.get(field_name, None)) == str(filter_value):
303+
batch_pks.add(sf.resource.pk)
304+
305+
filtered_pks.update(batch_pks)
306+
307+
# Filter by the collected PKs
308+
# If we processed metadata filters, return only resources with matching values
309+
if found_metadata_filter:
310+
filtered = queryset.filter(pk__in=filtered_pks) if filtered_pks else queryset.none()
311+
else:
312+
# No metadata filters found, return queryset as-is
313+
filtered = queryset
314+
315+
return filtered
316+
239317
def get_list(self, request, **kwargs):
240318
"""
241319
Returns a serialized list of resources.
@@ -249,6 +327,7 @@ def get_list(self, request, **kwargs):
249327
# impossible.
250328
base_bundle = self.build_bundle(request=request)
251329
objects = self.obj_get_list(bundle=base_bundle, **self.remove_api_resource_names(kwargs))
330+
252331
sorted_objects = self.apply_sorting(objects, options=request.GET)
253332

254333
paginator = self._meta.paginator_class(

geonode/api/tests.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
from geonode.layers.models import Dataset
3838
from geonode.documents.models import Document
3939
from geonode.base.models import (
40-
ExtraMetadata,
4140
Thesaurus,
4241
ThesaurusLabel,
4342
ThesaurusKeyword,
@@ -52,6 +51,7 @@
5251
from geonode.tests.base import GeoNodeBaseTestSupport
5352
from geonode.base.populate_test_data import all_public, create_models, remove_models
5453
from geonode.security.registry import permissions_registry
54+
from geonode.metadata.models import SparseField
5555
from geonode.assets.models import Asset
5656
from django.core.files.uploadedfile import SimpleUploadedFile
5757
from geonode.base.models import Link
@@ -536,35 +536,49 @@ def test_category_filters(self):
536536
self.assertEqual(len(self.deserialize(resp)["objects"]), 5)
537537

538538
def test_metadata_filters(self):
539-
"""Test category filtering"""
539+
"""Test metadata filtering against sparse fields."""
540540
_r = Dataset.objects.first()
541-
_m = ExtraMetadata.objects.create(
541+
542+
# Create sparse field using migrated ExtraMetadata format.
543+
SparseField.objects.update_or_create(
542544
resource=_r,
543-
metadata={
544-
"name": "metadata-updated",
545-
"slug": "metadata-slug-updated",
546-
"help_text": "this is the help text-updated",
547-
"field_type": "str-updated",
548-
"value": "my value-updated",
549-
"category": "category",
550-
},
545+
name="extra_1",
546+
defaults={"value": json.dumps({"category": "category"})},
551547
)
552548

549+
# Verify sparse field was created
550+
_sf_check = SparseField.objects.filter(resource=_r, name="extra_1")
551+
self.assertTrue(_sf_check.exists(), "SparseField should have been created")
552+
self.assertEqual(_sf_check.first().value, json.dumps({"category": "category"}))
553+
553554
list_url = reverse("api_dispatch_list", kwargs={"api_name": "api", "resource_name": "datasets"})
554-
_r.metadata.add(_m)
555-
# check we get the correct layers number returnered filtering on one
556-
# and then two different categories
557-
filter_url = f"{list_url}?metadata__category=category"
558555

556+
# Test 1: Filter for existing category value
557+
filter_url = f"{list_url}?metadata__category=category"
559558
resp = self.api_client.get(filter_url)
560559
self.assertValidJSONResponse(resp)
561-
self.assertEqual(len(self.deserialize(resp)["objects"]), 1)
562560

563-
filter_url = f"{list_url}?metadata__category=not-existing-category"
561+
result = self.deserialize(resp)
562+
result_count = len(result["objects"])
563+
logger.debug(f"Test 1 - Filter metadata__category=category: Got {result_count} results (expected 1)")
564+
logger.debug(f"Result PKs: {[obj['id'] for obj in result['objects']]}")
565+
logger.debug(f"Created resource PK: {_r.pk}")
564566

567+
self.assertEqual(result_count, 1, f"Expected 1 result, got {result_count}")
568+
self.assertEqual(result["objects"][0]["id"], _r.pk)
569+
570+
# Test 2: Filter for non-existing category value
571+
filter_url = f"{list_url}?metadata__category=not-existing-category"
565572
resp = self.api_client.get(filter_url)
566573
self.assertValidJSONResponse(resp)
567-
self.assertEqual(len(self.deserialize(resp)["objects"]), 0)
574+
575+
result = self.deserialize(resp)
576+
result_count = len(result["objects"])
577+
logger.debug(
578+
f"Test 2 - Filter metadata__category=not-existing-category: Got {result_count} results (expected 0)"
579+
)
580+
581+
self.assertEqual(result_count, 0, f"Expected 0 results, got {result_count}")
568582

569583
def test_tag_filters(self):
570584
"""Test keywords filtering"""

0 commit comments

Comments
 (0)