3838except ImportError as import_error :
3939 gcs_import_error = import_error
4040
41+ # Import azure packages if available
42+ # Delay an ImportError raising to an instantiation of the resource
43+ try :
44+ from azure .core import credentials , utils
45+
46+ # pylint: disable=redefined-outer-name,unused-import
47+ from azure .storage import blob , fileshare
48+
49+ azure_import_error = None
50+ except ImportError as import_error :
51+ azure_import_error = import_error
52+
4153# pylint: enable=invalid-name
4254
4355######################
@@ -76,6 +88,7 @@ def create_resource(uri_or_path):
7688 - ``file`` or empty: Local filesystem resource
7789 - ``s3``: Amazon S3 resource
7890 - ``gs``: Google Cloud Storage resource
91+ - ``https``: Azure Storage resource (files or blobs)
7992
8093 Returns
8194 -------
@@ -94,6 +107,14 @@ def create_resource(uri_or_path):
94107 return AmazonS3Resource (uri_or_path )
95108 elif uri_info .scheme == "gs" :
96109 return GoogleCloudStorageResource (uri_or_path )
110+ elif uri_info .scheme == "https" :
111+ # Create the corresponding instance of Azure storage resource
112+ # based on the well-known name pattern
113+ if AzureStorageResourceMixin .is_netloc_of_file_share (uri_info .netloc ):
114+ return AzureStorageFileResource (uri_or_path )
115+ else :
116+ # Assume we have a netloc of a blob
117+ return AzureStorageBlobResource (uri_or_path )
97118 elif scheme == "file" :
98119 # Reject URI if authority is not empty
99120 if uri_info .netloc :
@@ -503,7 +524,7 @@ def create_parent(self):
503524class GoogleCloudStorageResource (FilesystemResource ):
504525 """Google Cloud Storage Resource
505526
506- By default it reads the configuration from standard location.
527+ By default, it reads the configuration from standard location.
507528 """
508529
509530 def __init__ (self , uri ):
@@ -733,3 +754,313 @@ def create_parent(self):
733754
734755
735756# pylint: enable=no-member
757+
758+
759+ class AzureStorageResourceMixin :
760+ """Azure compatible Storage Resource Mixin
761+
762+ Among all the Azure storages, Khiops supports only (via its specific driver) :
763+ - Files
764+ - Blobs (Binary Large Objects)
765+
766+ For shared Files, the URI pattern of a resource is the following :
767+ https://<storage-account-name>.file.core.windows.net/<share name>/...
768+ <folder s>/<file name>
769+ One may confuse the "share name" with a "folder name"
770+ because nothing helps distinguish one from the other.
771+
772+ For blobs, the URI pattern of a resource is the following :
773+ https://<storage-account-name>.blob.core.windows.net/<container name>/...
774+ <folder s>/<file name>
775+ One may confuse the "container name" with a "folder name"
776+ because nothing helps distinguish one from the other.
777+
778+ By default, this resource reads the configuration from standard location
779+ (environment variables for the moment)
780+ """
781+
782+ def __init__ (self , uri ):
783+ """
784+ Azure Storage Resource initializer common to Files and Blobs
785+ """
786+
787+ # Stop initialization if Azure modules are not available
788+ if azure_import_error is not None :
789+ warnings .warn (
790+ "Could not import azure modules. "
791+ "Make sure you have installed the azure.core and "
792+ "azure.storage.fileshare packages to "
793+ "access Azure Storage files."
794+ )
795+ raise azure_import_error
796+
797+ super ().__init__ (uri )
798+
799+ # Create the authentication object using the connection string
800+ connection_string = os .environ .get ("AZURE_STORAGE_CONNECTION_STRING" )
801+ mappings = utils .parse_connection_string (connection_string )
802+ creds = credentials .AzureNamedKeyCredential (
803+ mappings .get ("accountname" ), mappings .get ("accountkey" )
804+ )
805+
806+ # Allow the extraction from the URL, the "share name" (for file shares)
807+ # or the "container name" (for blobs)
808+ # assuming it is the first part of the path.
809+ # Warning : parts[0] contains '/'
810+ parts = __class__ ._splitall (self .uri_info .path )
811+
812+ if __class__ .is_netloc_of_file_share (self .uri_info .netloc ):
813+ share_url = f"{ self .uri_info .scheme } ://{ self .uri_info .netloc } /{ parts [1 ]} "
814+ # Instantiate a ShareClient and attach it to the current instance.
815+ # Later, instances of ShareFileClient or ShareDirectoryClient
816+ # will be created.
817+ self .azure_share_client = fileshare .ShareClient .from_share_url (
818+ share_url = share_url , credential = creds
819+ )
820+ else :
821+ # For blobs : most of the time, there is no need to distinguish
822+ # between `container_url`
823+ # and the remaining parts of the path.
824+ # The `BlobClient` knows how to handle with the absolute path.
825+ # But for blobs listing, there is an exception
826+ # as we need a `ContainerClient`
827+ container_url = (
828+ f"{ self .uri_info .scheme } ://" f"{ self .uri_info .netloc } /{ parts [1 ]} "
829+ )
830+
831+ # Instantiate a `ContainerClient` and attach it to the current instance.
832+ self .azure_blob_container_client = blob .ContainerClient .from_container_url (
833+ container_url = container_url , credential = creds
834+ )
835+
836+ # Instantiate a `BlobClient` and attach it to the current instance.
837+ self .azure_blob_client = blob .BlobClient .from_blob_url (
838+ blob_url = self .uri_info .geturl (), credential = creds
839+ )
840+
841+ # This attribute will be used in a "Shared File" only,
842+ # for blobs the client will manage with absolute paths
843+ self .relative_remaining_path = "/" .join (parts [2 :])
844+
845+ @staticmethod
846+ def _splitall (path ):
847+ """
848+ Build a list of path parts form a path as a str
849+ """
850+ allparts = []
851+ while 1 :
852+ parts = os .path .split (path )
853+ if parts [0 ] == path : # sentinel for absolute paths
854+ allparts .insert (0 , parts [0 ])
855+ break
856+ elif parts [1 ] == path : # sentinel for relative paths
857+ allparts .insert (0 , parts [1 ])
858+ break
859+ else :
860+ path = parts [0 ]
861+ allparts .insert (0 , parts [1 ])
862+ return allparts
863+
864+ @staticmethod
865+ def is_netloc_of_file_share (netloc ):
866+ return netloc .endswith (".file.core.windows.net" )
867+
868+
869+ class AzureStorageFileResource (AzureStorageResourceMixin , FilesystemResource ):
870+ """Azure compatible Storage File Resource
871+
872+ The remote "file share" on Azure MUST exist before any call
873+
874+ """
875+
876+ def write (self , data ):
877+ """
878+ Notes:
879+ FIXME :
880+ In order to be consistent with all the other filesystems,
881+ if the directory hierarchy does not exist,
882+ it must be created
883+ """
884+ file_share_client = self .azure_share_client .get_file_client (
885+ self .relative_remaining_path
886+ )
887+ file_share_client .upload_file (data )
888+
889+ def exists (self ):
890+ """
891+ Check if the target resource exists (file or directory)
892+ """
893+ file_share_client = self .azure_share_client .get_file_client (
894+ self .relative_remaining_path
895+ )
896+ directory_share_client = self .azure_share_client .get_file_client (
897+ self .relative_remaining_path
898+ )
899+ # Both clients are required because
900+ # - the check against `file_share_client` is ``False``
901+ # if the target is a directory.
902+ # - the check against `directory_share_client` is ``False``
903+ # if the target is a file.
904+ return file_share_client .exists () or directory_share_client .exists ()
905+
906+ def remove (self ):
907+ """
908+ Remove the target resource either it is a file or a directory.
909+ If an error occurs an exception is raised.
910+ """
911+ file_share_client = self .azure_share_client .get_file_client (
912+ self .relative_remaining_path
913+ )
914+ delete_file_error = None
915+ delete_directory_error = None
916+ try :
917+ file_share_client .delete_file ()
918+ # If the deletion of the file succeeds then return immediately
919+ return
920+ except Exception as delete_error : # pylint: disable=broad-exception-caught
921+ delete_file_error = delete_error
922+
923+ directory_share_client = self .azure_share_client .get_directory_client (
924+ self .relative_remaining_path
925+ )
926+ try :
927+ directory_share_client .delete_directory ()
928+ # If the deletion of the directory succeeds then return immediately
929+ return
930+ except Exception as delete_error : # pylint: disable=broad-exception-caught
931+ delete_directory_error = delete_error
932+
933+ raise delete_file_error or delete_directory_error
934+
935+ def copy_from_local (self , local_path ):
936+ """
937+ Notes:
938+ Compared to other storages implementations,
939+ if the parent directories are missing they are NOT created.
940+ The caller must ensure the whole hierarchy exist otherwise an error will occur.
941+ """
942+ file_share_client = self .azure_share_client .get_file_client (
943+ self .relative_remaining_path
944+ )
945+ with open (local_path ) as input_file :
946+ file_share_client .upload_file (input_file .read ())
947+
948+ def copy_to_local (self , local_path ):
949+ file_share_client = self .azure_share_client .get_file_client (
950+ self .relative_remaining_path
951+ )
952+ with open (local_path , "wb" , encoding = "utf8" ) as output_file :
953+ data = file_share_client .download_file ()
954+ data .readinto (output_file )
955+
956+ def list_dir (self ):
957+ """
958+ List the files (not the directories) of the current directory
959+ Notes:
960+ This is not a recursive listing operation
961+ """
962+ directory_share_client = self .azure_share_client .get_directory_client (
963+ self .relative_remaining_path
964+ )
965+ return [
966+ item ["name" ]
967+ for item in directory_share_client .list_directories_and_files ()
968+ if not item ["is_directory" ]
969+ ]
970+
971+ def make_dir (self ):
972+ """
973+ Notes:
974+ To avoid any exception while attempting to create an existing directory,
975+ that would occur otherwise, its existence is first checked
976+ """
977+ directory_share_client = self .azure_share_client .get_directory_client (
978+ self .relative_remaining_path
979+ )
980+ if not directory_share_client .exists ():
981+ self .azure_share_client .create_directory (self .relative_remaining_path )
982+
983+ def read (self , size = None ):
984+ file_share_client = self .azure_share_client .get_file_client (
985+ self .relative_remaining_path
986+ )
987+ return file_share_client .download_file (length = size ).readall ()
988+
989+ def create_child (self , file_name ):
990+ return create_resource (child_uri_info (self .uri_info , file_name ).geturl ())
991+
992+ def create_parent (self ):
993+ return create_resource (parent_uri_info (self .uri_info ).geturl ())
994+
995+
996+ class AzureStorageBlobResource (AzureStorageResourceMixin , FilesystemResource ):
997+ """Azure compatible Storage Blob Resource
998+
999+ The remote "container" on Azure MUST exist before any call.
1000+
1001+ """
1002+
1003+ def read (self , size = None ):
1004+ return self .azure_blob_client .download_blob (length = size ).readall ()
1005+
1006+ def write (self , data ):
1007+ """
1008+ Notes:
1009+ in order to be consistent with the other drivers,
1010+ this method will overwrite the destination blob if it exists.
1011+ This is not the default behavior.
1012+ """
1013+ self .azure_blob_client .upload_blob (data , overwrite = True )
1014+
1015+ def exists (self ):
1016+ return self .azure_blob_client .exists ()
1017+
1018+ def remove (self ):
1019+ self .azure_blob_client .delete_blob ()
1020+
1021+ def copy_from_local (self , local_path ):
1022+ """
1023+ Notes:
1024+ in order to be consistent with the other drivers,
1025+ this method will overwrite the destination blob if it exists.
1026+ This is not the default behavior.
1027+ """
1028+ with open (local_path ) as input_file :
1029+ self .azure_blob_client .upload_blob (input_file .read (), overwrite = True )
1030+
1031+ def copy_to_local (self , local_path ):
1032+ with open (local_path , "wb" , encoding = "utf8" ) as output_file :
1033+ data = self .azure_blob_client .download_blob ()
1034+ data .readinto (output_file )
1035+
1036+ def list_dir (self ):
1037+ """
1038+ Notes:
1039+ by default, the sdk will list all the blobs belonging to the container.
1040+ An extract filter is then required to simulate a directory listing.
1041+ FIXME : suppress the virtual parent directory
1042+ """
1043+ return [
1044+ item
1045+ for item in self .azure_blob_container_client .list_blob_names (
1046+ # Keep only the blobs belonging to the "virtual directory"
1047+ name_starts_with = self .relative_remaining_path ,
1048+ )
1049+ ]
1050+
1051+ def make_dir (self ):
1052+ warnings .warn (
1053+ "'make_dir' is a non-operation on Azure Storage for Blobs. "
1054+ "See the documentation at "
1055+ "https://learn.microsoft.com/en-us/rest/api/storageservices/"
1056+ "operations-on-containers and "
1057+ "https://learn.microsoft.com/en-us/rest/api/storageservices/"
1058+ "naming-and-referencing-containers--blobs--and-metadata#blob-names "
1059+ "(a virtual hierarchy can be created in naming blobs)"
1060+ )
1061+
1062+ def create_child (self , file_name ):
1063+ return create_resource (child_uri_info (self .uri_info , file_name ).geturl ())
1064+
1065+ def create_parent (self , file_name ):
1066+ return create_resource (parent_uri_info (self .uri_info ).geturl ())
0 commit comments