Skip to content

Commit 0444230

Browse files
Add docstrings for remaining high-priority methods
- Add docstrings to S3FileSystem.checksum(), sign(), cp_file() methods - Add docstring to S3Object.to_dict() method - Add docstrings to AthenaPandasResultSet.is_unload and dtypes properties - Add docstring to BaseCursor.get_default_converter() static method - Fix indentation issues in all added docstrings 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0812946 commit 0444230

4 files changed

Lines changed: 94 additions & 0 deletions

File tree

pyathena/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,15 @@ def __init__(
183183

184184
@staticmethod
185185
def get_default_converter(unload: bool = False) -> Union[DefaultTypeConverter, Any]:
186+
"""Get the default type converter for this cursor class.
187+
188+
Args:
189+
unload: Whether the converter is for UNLOAD operations. Some cursor
190+
types may return different converters for UNLOAD operations.
191+
192+
Returns:
193+
The default type converter instance for this cursor type.
194+
"""
186195
return DefaultTypeConverter()
187196

188197
@property

pyathena/filesystem/s3.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,28 @@ def touch(self, path: str, truncate: bool = True, **kwargs) -> Dict[str, Any]:
738738
def cp_file(
739739
self, path1: str, path2: str, recursive=False, maxdepth=None, on_error=None, **kwargs
740740
):
741+
"""Copy an S3 object to another S3 location.
742+
743+
Performs server-side copy of S3 objects, which is more efficient than
744+
downloading and re-uploading. Automatically chooses between simple copy
745+
and multipart copy based on object size.
746+
747+
Args:
748+
path1: Source S3 path (s3://bucket/key).
749+
path2: Destination S3 path (s3://bucket/key).
750+
recursive: Unused parameter for fsspec compatibility.
751+
maxdepth: Unused parameter for fsspec compatibility.
752+
on_error: Unused parameter for fsspec compatibility.
753+
**kwargs: Additional S3 copy parameters (e.g., metadata, storage class).
754+
755+
Raises:
756+
ValueError: If trying to copy to a versioned file or copy buckets.
757+
758+
Note:
759+
Uses multipart copy for objects larger than the maximum part size
760+
to optimize performance for large files. The copy operation is
761+
performed entirely on the S3 service without data transfer.
762+
"""
741763
# TODO: Delete the value that seems to be a typo, onerror=false.
742764
# https://github.com/fsspec/filesystem_spec/commit/346a589fef9308550ffa3d0d510f2db67281bb05
743765
# https://github.com/fsspec/filesystem_spec/blob/2024.10.0/fsspec/spec.py#L1185
@@ -972,13 +994,59 @@ def get_file(self, rpath: str, lpath: str, callback=_DEFAULT_CALLBACK, outfile=N
972994
callback.relative_update(len(data))
973995

974996
def checksum(self, path: str, **kwargs):
997+
"""Get checksum for S3 object or directory.
998+
999+
Computes a checksum for the specified S3 path. For individual objects,
1000+
returns the ETag converted to an integer. For directories, returns a
1001+
checksum based on the directory's tokenized representation.
1002+
1003+
Args:
1004+
path: S3 path (s3://bucket/key) to get checksum for.
1005+
**kwargs: Additional arguments including:
1006+
refresh: If True, refresh cached info before computing checksum.
1007+
1008+
Returns:
1009+
Integer checksum value derived from S3 ETag or directory token.
1010+
1011+
Note:
1012+
For multipart uploads, ETag format is different and only the first
1013+
part before the dash is used for checksum calculation.
1014+
"""
9751015
refresh = kwargs.pop("refresh", False)
9761016
info = self.info(path, refresh=refresh)
9771017
if info.get("type") != S3ObjectType.S3_OBJECT_TYPE_DIRECTORY:
9781018
return int(info.get("etag").strip('"').split("-")[0], 16)
9791019
return int(tokenize(info), 16)
9801020

9811021
def sign(self, path: str, expiration: int = 3600, **kwargs):
1022+
"""Generate a presigned URL for S3 object access.
1023+
1024+
Creates a presigned URL that allows temporary access to an S3 object
1025+
without requiring AWS credentials. Useful for sharing files or providing
1026+
time-limited access to resources.
1027+
1028+
Args:
1029+
path: S3 path (s3://bucket/key) to generate URL for.
1030+
expiration: URL expiration time in seconds. Defaults to 3600 (1 hour).
1031+
**kwargs: Additional parameters including:
1032+
client_method: S3 operation ('get_object', 'put_object', etc.).
1033+
Defaults to 'get_object'.
1034+
Additional parameters passed to the S3 operation.
1035+
1036+
Returns:
1037+
Presigned URL string that provides temporary access to the S3 object.
1038+
1039+
Example:
1040+
>>> fs = S3FileSystem()
1041+
>>> url = fs.sign("s3://my-bucket/file.txt", expiration=7200)
1042+
>>> # URL valid for 2 hours
1043+
>>>
1044+
>>> # Generate upload URL
1045+
>>> upload_url = fs.sign(
1046+
... "s3://my-bucket/upload.txt",
1047+
... client_method="put_object"
1048+
... )
1049+
"""
9821050
bucket, key, version_id = self.parse_path(path)
9831051
client_method = kwargs.pop("client_method", "get_object")
9841052
params = {"Bucket": bucket, "Key": key}

pyathena/filesystem/s3_object.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ def __str__(self):
163163
return str(self.__dict__)
164164

165165
def to_dict(self) -> Dict[str, Any]:
166+
"""Convert S3Object to dictionary representation.
167+
168+
Returns:
169+
Deep copy of the object's attributes as a dictionary.
170+
"""
166171
return copy.deepcopy(self.__dict__)
167172

168173
def to_api_repr(self) -> Dict[str, Any]:

pyathena/pandas/result_set.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,10 +349,22 @@ def __s3_file_system(self):
349349

350350
@property
351351
def is_unload(self):
352+
"""Check if this result set comes from an UNLOAD operation.
353+
354+
Returns:
355+
True if this result set is from an UNLOAD query and unload mode
356+
is enabled, False otherwise.
357+
"""
352358
return self._unload and self.query and self.query.strip().upper().startswith("UNLOAD")
353359

354360
@property
355361
def dtypes(self) -> Dict[str, Type[Any]]:
362+
"""Get pandas-compatible data types for result columns.
363+
364+
Returns:
365+
Dictionary mapping column names to their corresponding Python types
366+
based on the converter's type mapping.
367+
"""
356368
description = self.description if self.description else []
357369
return {
358370
d[0]: self._converter.types[d[1]] for d in description if d[1] in self._converter.types

0 commit comments

Comments
 (0)