@@ -738,6 +738,28 @@ def touch(self, path: str, truncate: bool = True, **kwargs) -> Dict[str, Any]:
738738 def cp_file (
739739 self , path1 : str , path2 : str , recursive = False , maxdepth = None , on_error = None , ** kwargs
740740 ):
741+ """Copy an S3 object to another S3 location.
742+
743+ Performs server-side copy of S3 objects, which is more efficient than
744+ downloading and re-uploading. Automatically chooses between simple copy
745+ and multipart copy based on object size.
746+
747+ Args:
748+ path1: Source S3 path (s3://bucket/key).
749+ path2: Destination S3 path (s3://bucket/key).
750+ recursive: Unused parameter for fsspec compatibility.
751+ maxdepth: Unused parameter for fsspec compatibility.
752+ on_error: Unused parameter for fsspec compatibility.
753+ **kwargs: Additional S3 copy parameters (e.g., metadata, storage class).
754+
755+ Raises:
756+ ValueError: If trying to copy to a versioned file or copy buckets.
757+
758+ Note:
759+ Uses multipart copy for objects larger than the maximum part size
760+ to optimize performance for large files. The copy operation is
761+ performed entirely on the S3 service without data transfer.
762+ """
741763 # TODO: Delete the value that seems to be a typo, onerror=false.
742764 # https://github.com/fsspec/filesystem_spec/commit/346a589fef9308550ffa3d0d510f2db67281bb05
743765 # https://github.com/fsspec/filesystem_spec/blob/2024.10.0/fsspec/spec.py#L1185
@@ -972,13 +994,59 @@ def get_file(self, rpath: str, lpath: str, callback=_DEFAULT_CALLBACK, outfile=N
972994 callback .relative_update (len (data ))
973995
974996 def checksum (self , path : str , ** kwargs ):
997+ """Get checksum for S3 object or directory.
998+
999+ Computes a checksum for the specified S3 path. For individual objects,
1000+ returns the ETag converted to an integer. For directories, returns a
1001+ checksum based on the directory's tokenized representation.
1002+
1003+ Args:
1004+ path: S3 path (s3://bucket/key) to get checksum for.
1005+ **kwargs: Additional arguments including:
1006+ refresh: If True, refresh cached info before computing checksum.
1007+
1008+ Returns:
1009+ Integer checksum value derived from S3 ETag or directory token.
1010+
1011+ Note:
1012+ For multipart uploads, ETag format is different and only the first
1013+ part before the dash is used for checksum calculation.
1014+ """
9751015 refresh = kwargs .pop ("refresh" , False )
9761016 info = self .info (path , refresh = refresh )
9771017 if info .get ("type" ) != S3ObjectType .S3_OBJECT_TYPE_DIRECTORY :
9781018 return int (info .get ("etag" ).strip ('"' ).split ("-" )[0 ], 16 )
9791019 return int (tokenize (info ), 16 )
9801020
9811021 def sign (self , path : str , expiration : int = 3600 , ** kwargs ):
1022+ """Generate a presigned URL for S3 object access.
1023+
1024+ Creates a presigned URL that allows temporary access to an S3 object
1025+ without requiring AWS credentials. Useful for sharing files or providing
1026+ time-limited access to resources.
1027+
1028+ Args:
1029+ path: S3 path (s3://bucket/key) to generate URL for.
1030+ expiration: URL expiration time in seconds. Defaults to 3600 (1 hour).
1031+ **kwargs: Additional parameters including:
1032+ client_method: S3 operation ('get_object', 'put_object', etc.).
1033+ Defaults to 'get_object'.
1034+ Additional parameters passed to the S3 operation.
1035+
1036+ Returns:
1037+ Presigned URL string that provides temporary access to the S3 object.
1038+
1039+ Example:
1040+ >>> fs = S3FileSystem()
1041+ >>> url = fs.sign("s3://my-bucket/file.txt", expiration=7200)
1042+ >>> # URL valid for 2 hours
1043+ >>>
1044+ >>> # Generate upload URL
1045+ >>> upload_url = fs.sign(
1046+ ... "s3://my-bucket/upload.txt",
1047+ ... client_method="put_object"
1048+ ... )
1049+ """
9821050 bucket , key , version_id = self .parse_path (path )
9831051 client_method = kwargs .pop ("client_method" , "get_object" )
9841052 params = {"Bucket" : bucket , "Key" : key }
0 commit comments