pyathena-dev
diff --git a/‎docs/cursor.rst‎
Lines changed: 191 additions & 0 deletions b/‎docs/cursor.rst‎
Lines changed: 191 additions & 0 deletions
@@ -298,6 +298,197 @@ If you want to change the dictionary type (e.g., use OrderedDict), you can speci
     cursor = connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
                      region_name="us-west-2").cursor(cursor=AsyncDictCursor, dict_type=OrderedDict)
 
+.. _query-execution-callback:
+
+Query Execution Callback
+-------------------------
+
+PyAthena provides a callback mechanism that allows you to get immediate access to the query ID 
+as soon as the ``start_query_execution`` API call is made, before waiting for query completion.
+This is useful for monitoring, logging, or cancelling long-running queries from another thread.
+
+The ``on_start_query_execution`` callback can be configured at both the connection level and 
+the execute level. When both are set, both callbacks will be invoked.
+
+Connection-level callback
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can set a default callback for all queries executed through a connection:
+
+.. code:: python
+
+    from pyathena import connect
+
+    def query_callback(query_id):
+        print(f"Query started with ID: {query_id}")
+        # You can use query_id for monitoring or cancellation
+
+    cursor = connect(
+        s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+        region_name="us-west-2",
+        on_start_query_execution=query_callback
+    ).cursor()
+    
+    cursor.execute("SELECT * FROM many_rows")  # Callback will be invoked
+
+Execute-level callback
+~~~~~~~~~~~~~~~~~~~~~~
+
+You can also specify a callback for individual query executions:
+
+.. code:: python
+
+    from pyathena import connect
+
+    def specific_callback(query_id):
+        print(f"Specific query started: {query_id}")
+
+    cursor = connect(
+        s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+        region_name="us-west-2"
+    ).cursor()
+    
+    cursor.execute(
+        "SELECT * FROM many_rows", 
+        on_start_query_execution=specific_callback
+    )
+
+Query cancellation example
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A common use case is to cancel long-running analytical queries after a timeout:
+
+.. code:: python
+
+    import time
+    from concurrent.futures import ThreadPoolExecutor, TimeoutError
+    from pyathena import connect
+
+    def cancel_long_running_query():
+        """Example: Cancel a complex analytical query after 10 minutes."""
+        
+        def track_query_start(query_id):
+            print(f"Long-running analysis started: {query_id}")
+            return query_id
+
+        def monitor_and_cancel(cursor, timeout_minutes):
+            """Monitor query and cancel if it exceeds timeout."""
+            time.sleep(timeout_minutes * 60)  # Convert to seconds
+            try:
+                cursor.cancel()
+                print(f"Query cancelled after {timeout_minutes} minutes timeout")
+            except Exception as e:
+                print(f"Cancellation failed: {e}")
+
+        cursor = connect(
+            s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+            region_name="us-west-2",
+            on_start_query_execution=track_query_start
+        ).cursor()
+
+        # Complex analytical query that might run for a long time
+        long_query = """
+        WITH daily_metrics AS (
+            SELECT 
+                date_trunc('day', timestamp_col) as day,
+                user_id,
+                COUNT(*) as events,
+                AVG(duration) as avg_duration
+            FROM large_events_table 
+            WHERE timestamp_col >= current_date - interval '1' year
+            GROUP BY 1, 2
+        ),
+        user_segments AS (
+            SELECT 
+                user_id,
+                CASE 
+                    WHEN AVG(events) > 100 THEN 'high_activity'
+                    WHEN AVG(events) > 10 THEN 'medium_activity' 
+                    ELSE 'low_activity'
+                END as segment
+            FROM daily_metrics
+            GROUP BY user_id
+        )
+        SELECT 
+            segment,
+            COUNT(DISTINCT user_id) as users,
+            AVG(events) as avg_daily_events
+        FROM daily_metrics dm
+        JOIN user_segments us ON dm.user_id = us.user_id
+        GROUP BY segment
+        ORDER BY avg_daily_events DESC
+        """
+
+        # Use ThreadPoolExecutor for timeout management
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            # Start timeout monitor (cancel after 10 minutes)
+            timeout_future = executor.submit(monitor_and_cancel, cursor, 10)
+
+            try:
+                print("Starting complex analytical query (10-minute timeout)...")
+                cursor.execute(long_query)
+                
+                # Process results
+                results = cursor.fetchall()
+                print(f"Analysis completed successfully: {len(results)} segments found")
+                for row in results:
+                    print(f"  {row[0]}: {row[1]} users, {row[2]:.1f} avg events")
+                    
+            except Exception as e:
+                print(f"Query failed or was cancelled: {e}")
+            finally:
+                # Clean up timeout monitor
+                try:
+                    timeout_future.result(timeout=1)
+                except TimeoutError:
+                    pass  # Monitor is still running, which is fine
+
+    # Run the example
+    cancel_long_running_query()
+
+Multiple callbacks
+~~~~~~~~~~~~~~~~~~~
+
+When both connection-level and execute-level callbacks are specified, 
+both callbacks will be invoked:
+
+.. code:: python
+
+    from pyathena import connect
+
+    def connection_callback(query_id):
+        print(f"Connection callback: {query_id}")
+        # Log to monitoring system
+
+    def execute_callback(query_id):
+        print(f"Execute callback: {query_id}")
+        # Store for cancellation if needed
+
+    cursor = connect(
+        s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+        region_name="us-west-2",
+        on_start_query_execution=connection_callback
+    ).cursor()
+    
+    # This will invoke both connection_callback and execute_callback
+    cursor.execute(
+        "SELECT 1", 
+        on_start_query_execution=execute_callback
+    )
+
+Supported cursor types
+~~~~~~~~~~~~~~~~~~~~~~
+
+The ``on_start_query_execution`` callback is supported by the following cursor types:
+
+* ``Cursor`` (default cursor)
+* ``DictCursor`` 
+* ``ArrowCursor``
+* ``PandasCursor``
+
+Note: ``AsyncCursor`` and its variants do not support this callback as they already 
+return the query ID immediately through their different execution model.
+
 PandasCursor
 ------------