11"""Resource class containing all logic for creating, checking, and updating resources."""
22
3- import hashlib
43import logging
54import warnings
65from datetime import datetime
1211from hdx .api .configuration import Configuration
1312from hdx .api .utilities .date_helper import DateHelper
1413from hdx .api .utilities .filestore_helper import FilestoreHelper
14+ from hdx .api .utilities .size_hash import get_size_and_hash
1515from hdx .data .hdxobject import HDXError , HDXObject
1616from hdx .data .resource_view import ResourceView
1717from hdx .utilities .dateparse import now_utc , now_utc_notz , parse_date
@@ -359,18 +359,6 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None
359359 self .check_url_filetoupload ()
360360 self ._check_required_fields ("resource" , ignore_fields )
361361
362- def _get_hash (self ) -> str :
363- """Return the hash of file to upload
364-
365- Returns:
366- str: Hash of file to upload
367- """
368- md5 = hashlib .md5 ()
369- f = open (self .file_to_upload , "rb" )
370- while chunk := f .read (4096 ):
371- md5 .update (chunk )
372- return md5 .hexdigest ()
373-
374362 def _resource_merge_hdx_update (
375363 self ,
376364 ** kwargs : Any ,
@@ -389,9 +377,16 @@ def _resource_merge_hdx_update(
389377 data_updated = kwargs .pop ("data_updated" , self .data_updated )
390378 files = {}
391379 if self .file_to_upload :
392- hash = self ._get_hash ()
393- if hash != self .data .get ("hash" ): # update file if hash has changed
380+ file_format = self .old_data .get ("format" , "" ).lower ()
381+ size , hash = get_size_and_hash (self .file_to_upload , file_format )
382+ if size == self .data .get ("size" ) and hash == self .data .get ("hash" ):
383+ # ensure last_modified is not updated if file hasn't changed
384+ if "last_modified" in self .data :
385+ del self .data ["last_modified" ]
386+ else :
387+ # update file if size or hash has changed
394388 files ["upload" ] = self .file_to_upload
389+ self .old_data ["size" ] = size
395390 self .old_data ["hash" ] = hash
396391 elif data_updated :
397392 # Should not output timezone info here
@@ -403,7 +398,7 @@ def _resource_merge_hdx_update(
403398 # old_data will be merged into data in the next step
404399 self ._merge_hdx_update ("resource" , "id" , files , True , ** kwargs )
405400
406- def update_in_hdx (self , ** kwargs : Any ) -> None :
401+ def update_in_hdx (self , ** kwargs : Any ) -> int :
407402 """Check if resource exists in HDX and if so, update it. To indicate
408403 that the data in an external resource (given by a URL) has been
409404 updated, set data_updated to True, which will result in the resource
@@ -418,7 +413,7 @@ def update_in_hdx(self, **kwargs: Any) -> None:
418413 date_data_updated (datetime): Date to use for last_modified. Default to None.
419414
420415 Returns:
421- None
416+ int: Return status code
422417 """
423418 self ._check_load_existing_object ("resource" , "id" )
424419 if self .file_to_upload and "url" in self .data :
@@ -454,7 +449,9 @@ def create_in_hdx(self, **kwargs: Any) -> None:
454449 files = {}
455450 if self .file_to_upload :
456451 files ["upload" ] = self .file_to_upload
457- self .data ["hash" ] = self ._get_hash ()
452+ self .data ["size" ], self .data ["hash" ] = get_size_and_hash (
453+ self .file_to_upload , self .get_format ()
454+ )
458455 self ._save_to_hdx ("create" , "name" , files , True )
459456
460457 def delete_from_hdx (self ) -> None :
0 commit comments