11"""Resource class containing all logic for creating, checking, and updating resources."""
22
3+ import hashlib
34import logging
45import warnings
56from datetime import datetime
@@ -358,15 +359,17 @@ def check_required_fields(self, ignore_fields: ListTuple[str] = tuple()) -> None
358359 self .check_url_filetoupload ()
359360 self ._check_required_fields ("resource" , ignore_fields )
360361
361- def _get_files (self ) -> Dict :
362- """Return the files parameter for CKANAPI
362+ def _get_hash (self ) -> str :
363+ """Return the hash of file to upload
363364
364365 Returns:
365- Dict: files parameter for CKANAPI
366+ str: Hash of file to upload
366367 """
367- if self .file_to_upload is None :
368- return {}
369- return {"upload" : self .file_to_upload }
368+ md5 = hashlib .md5 ()
369+ f = open (self .file_to_upload , "rb" )
370+ while chunk := f .read (4096 ):
371+ md5 .update (chunk )
372+ return md5 .hexdigest ()
370373
371374 def _resource_merge_hdx_update (
372375 self ,
@@ -384,14 +387,21 @@ def _resource_merge_hdx_update(
384387 None
385388 """
386389 data_updated = kwargs .pop ("data_updated" , self .data_updated )
387- if data_updated and not self .file_to_upload :
390+ files = {}
391+ if self .file_to_upload :
392+ hash = self ._get_hash ()
393+ if hash != self .data .get ("hash" ): # update file if hash has changed
394+ files ["upload" ] = self .file_to_upload
395+ self .old_data ["hash" ] = hash
396+ elif data_updated :
388397 # Should not output timezone info here
389398 self .old_data ["last_modified" ] = now_utc_notz ().isoformat (
390399 timespec = "microseconds"
391400 )
392401 self .data_updated = False
393- # old_data will be merged into data in the next step
394- self ._merge_hdx_update ("resource" , "id" , self ._get_files (), True , ** kwargs )
402+
403+ # old_data will be merged into data in the next step
404+ self ._merge_hdx_update ("resource" , "id" , files , True , ** kwargs )
395405
396406 def update_in_hdx (self , ** kwargs : Any ) -> None :
397407 """Check if resource exists in HDX and if so, update it. To indicate
@@ -441,7 +451,11 @@ def create_in_hdx(self, **kwargs: Any) -> None:
441451 del self .data ["url" ]
442452 self ._resource_merge_hdx_update (** kwargs )
443453 else :
444- self ._save_to_hdx ("create" , "name" , self ._get_files (), True )
454+ files = {}
455+ if self .file_to_upload :
456+ files ["upload" ] = self .file_to_upload
457+ self .data ["hash" ] = self ._get_hash ()
458+ self ._save_to_hdx ("create" , "name" , files , True )
445459
446460 def delete_from_hdx (self ) -> None :
447461 """Deletes a resource from HDX
0 commit comments