Skip to content

Commit 0812946

Browse files
Add docstrings for high-priority methods
- Add comprehensive docstring to Formatter.wrap_unload() method - Add docstrings to S3FileSystem.put_file() and get_file() methods - Fix indentation issues in docstring formatting 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 198ed9a commit 0812946

2 files changed

Lines changed: 71 additions & 0 deletions

File tree

pyathena/filesystem/s3.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,23 @@ def cat_file(
902902
)[1]
903903

904904
def put_file(self, lpath: str, rpath: str, callback=_DEFAULT_CALLBACK, **kwargs):
905+
"""Upload a local file to S3.
906+
907+
Uploads a file from the local filesystem to an S3 location. Supports
908+
automatic content type detection based on file extension and provides
909+
progress callback functionality.
910+
911+
Args:
912+
lpath: Local file path to upload.
913+
rpath: S3 destination path (s3://bucket/key).
914+
callback: Progress callback for tracking upload progress.
915+
**kwargs: Additional S3 parameters (e.g., ContentType, StorageClass).
916+
917+
Note:
918+
Directories are not supported for upload. If lpath is a directory,
919+
the method returns without performing any operation. Bucket-only
920+
destinations (without key) are also not supported.
921+
"""
905922
if os.path.isdir(lpath):
906923
# No support for directory uploads.
907924
return
@@ -929,6 +946,22 @@ def put_file(self, lpath: str, rpath: str, callback=_DEFAULT_CALLBACK, **kwargs)
929946
self.invalidate_cache(rpath)
930947

931948
def get_file(self, rpath: str, lpath: str, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs):
949+
"""Download an S3 file to local filesystem.
950+
951+
Downloads a file from S3 to the local filesystem with progress tracking.
952+
Reads the file in chunks to handle large files efficiently.
953+
954+
Args:
955+
rpath: S3 source path (s3://bucket/key).
956+
lpath: Local destination file path.
957+
callback: Progress callback for tracking download progress.
958+
outfile: Unused parameter for fsspec compatibility.
959+
**kwargs: Additional S3 parameters passed to open().
960+
961+
Note:
962+
If lpath is a directory, the method returns without performing
963+
any operation.
964+
"""
932965
if os.path.isdir(lpath):
933966
return
934967

pyathena/formatter.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,44 @@ def wrap_unload(
8787
format_: str = AthenaFileFormat.FILE_FORMAT_PARQUET,
8888
compression: str = AthenaCompression.COMPRESSION_SNAPPY,
8989
):
90+
"""Wrap a SELECT query with UNLOAD statement for high-performance result retrieval.
91+
92+
Transforms SELECT or WITH queries into UNLOAD statements that export results
93+
directly to S3 in optimized formats (Parquet, ORC) with compression. This
94+
approach is significantly faster than standard CSV-based result retrieval
95+
for large datasets and preserves data types more accurately.
96+
97+
Args:
98+
operation: SQL query to wrap. Must be a SELECT or WITH statement.
99+
s3_staging_dir: Base S3 directory for storing UNLOAD results.
100+
format_: Output file format. Defaults to Parquet for optimal performance.
101+
compression: Compression algorithm. Defaults to Snappy for balanced
102+
compression ratio and speed.
103+
104+
Returns:
105+
Tuple containing:
106+
- Modified UNLOAD query string
107+
- S3 location where results will be stored (None if not SELECT/WITH)
108+
109+
Example:
110+
>>> query = "SELECT * FROM sales WHERE year = 2023"
111+
>>> unload_query, location = Formatter.wrap_unload(
112+
... query, "s3://my-bucket/results/"
113+
... )
114+
>>> print(unload_query)
115+
UNLOAD (
116+
SELECT * FROM sales WHERE year = 2023
117+
)
118+
TO 's3://my-bucket/results/unload/20231215/uuid//'
119+
WITH (
120+
format = 'PARQUET',
121+
compression = 'SNAPPY'
122+
)
123+
124+
Note:
125+
Only SELECT and WITH statements are wrapped. Other statement types
126+
are returned unchanged with location=None.
127+
"""
90128
if not operation or not operation.strip():
91129
raise ProgrammingError("Query is none or empty.")
92130

0 commit comments

Comments
 (0)