Skip to content

Commit a73c0ea

Browse files
committed
Add resource matching to a dataset for a resource
1 parent 8237e40 commit a73c0ea

2 files changed

Lines changed: 41 additions & 5 deletions

File tree

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ ply==3.11
175175
# libhxl
176176
pockets==0.9.1
177177
# via sphinxcontrib-napoleon
178-
pre-commit==4.5.0
178+
pre-commit==4.5.1
179179
# via hdx-python-api (pyproject.toml)
180180
pyasn1==0.6.1
181181
# via

src/hdx/data/resource.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Any, Dict, List, Optional, Tuple, Union
99

1010
import hdx.data.dataset
11+
import hdx.data.resource_matcher
1112
from hdx.api.configuration import Configuration
1213
from hdx.api.utilities.date_helper import DateHelper
1314
from hdx.api.utilities.size_hash import get_size_and_hash
@@ -460,6 +461,36 @@ def _resource_merge_hdx_update(
460461
self._merge_hdx_update("resource", "id", files, True, **kwargs)
461462
return status
462463

464+
def _get_resource_id(self, **kwargs: Any) -> Optional[str]:
465+
"""Helper function to get resource id if available from given resource or by
466+
comparing ot a given dataset's resources.
467+
468+
Args:
469+
**kwargs: See below
470+
dataset (Dataset): Existing dataset if available to obtain resource id
471+
472+
Returns:
473+
Optional[str]: Resource id or None
474+
"""
475+
loadedid = self.data.get("id")
476+
if loadedid is None:
477+
dataset = kwargs.get("dataset")
478+
if dataset:
479+
dataset_resources = dataset.get_resources()
480+
matching_index = (
481+
hdx.data.resource_matcher.ResourceMatcher.match_resource_list(
482+
dataset_resources, self
483+
)
484+
)
485+
if matching_index:
486+
matching_resource = dataset_resources[matching_index]
487+
loadedid = matching_resource.get("id")
488+
if loadedid:
489+
self.data["id"] = loadedid
490+
else:
491+
loadedid = None
492+
return loadedid
493+
463494
def update_in_hdx(self, **kwargs: Any) -> int:
464495
"""Check if resource exists in HDX and if so, update it. To indicate
465496
that the data in an external resource (given by a URL) has been
@@ -482,11 +513,13 @@ def update_in_hdx(self, **kwargs: Any) -> int:
482513
data_updated (bool): If True, set last_modified to now. Defaults to False.
483514
date_data_updated (datetime): Date to use for last_modified. Default to None.
484515
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
516+
dataset (Dataset): Existing dataset if available to obtain resource id
485517
486518
Returns:
487519
int: Status code
488520
"""
489521
self.check_both_url_filetoupload()
522+
_ = self._get_resource_id(**kwargs)
490523
self._check_load_existing_object("resource", "id")
491524
return self._resource_merge_hdx_update(**kwargs)
492525

@@ -513,15 +546,18 @@ def create_in_hdx(self, **kwargs: Any) -> int:
513546
data_updated (bool): If True, set last_modified to now. Defaults to False.
514547
date_data_updated (datetime): Date to use for last_modified. Default to None.
515548
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
549+
dataset (Dataset): Existing dataset if available to obtain resource id
516550
517551
Returns:
518552
int: Status code
519553
"""
520554
self.check_both_url_filetoupload()
521-
id = self.data.get("id")
522-
if id and self._load_from_hdx("resource", id):
523-
logger.warning(f"{'resource'} exists. Updating {id}")
524-
return self._resource_merge_hdx_update(**kwargs)
555+
loadedid = self._get_resource_id(**kwargs)
556+
if loadedid:
557+
if self._load_from_hdx("resource", loadedid):
558+
logger.warning(f"{'resource'} exists. Updating {loadedid}")
559+
return self._resource_merge_hdx_update(**kwargs)
560+
logger.warning(f"Failed to load resource with id {loadedid}")
525561

526562
self.set_types()
527563
self.correct_format(self.data)

0 commit comments

Comments
 (0)