@@ -1194,6 +1194,120 @@ def on_request_error(self, query, consistency, error, retry_num):
11941194 return self .RETHROW , None , None
11951195
11961196
1197+ class LWTRetryPolicy (ExponentialBackoffRetryPolicy ):
1198+ """
1199+ A retry policy tailored for Lightweight Transaction (LWT) queries.
1200+
1201+ LWT queries use Paxos consensus, where the first replica in the token ring
1202+ acts as the Paxos coordinator (leader). Retrying LWT queries on a *different*
1203+ host causes Paxos contention — the new coordinator must compete with the
1204+ original one, potentially causing cascading timeouts.
1205+
1206+ This policy addresses that by:
1207+
1208+ - **CAS write timeouts**: Retrying on the **same host** (the Paxos coordinator)
1209+ with exponential backoff, giving the coordinator time to complete the Paxos round.
1210+ - **CAS read timeouts** (serial consistency): Retrying on the same host.
1211+ - **Unavailable at serial consistency**: Retrying on the **next host**, since the
1212+ Paxos phase failed on this node (not enough replicas alive to form quorum).
1213+ - **Non-CAS operations**: Delegating to the standard :class:`ExponentialBackoffRetryPolicy`
1214+ behavior.
1215+
1216+ This is modeled after gocql's ``LWTRetryPolicy`` interface, which retries LWT
1217+ queries on the same host to avoid Paxos contention.
1218+
1219+ Example usage::
1220+
1221+ from cassandra.cluster import Cluster
1222+ from cassandra.policies import LWTRetryPolicy
1223+
1224+ # Use as the default retry policy for the cluster
1225+ cluster = Cluster(
1226+ default_retry_policy=LWTRetryPolicy(max_num_retries=3)
1227+ )
1228+
1229+ # Or assign to a specific statement
1230+ statement.retry_policy = LWTRetryPolicy(max_num_retries=5)
1231+
1232+ :param max_num_retries: Maximum number of retry attempts (default: 3).
1233+ :param min_interval: Initial backoff delay in seconds (default: 0.1).
1234+ :param max_interval: Maximum backoff delay in seconds (default: 10.0).
1235+ """
1236+
1237+ def __init__ (self , max_num_retries = 3 , min_interval = 0.1 , max_interval = 10.0 ,
1238+ * args , ** kwargs ):
1239+ super (LWTRetryPolicy , self ).__init__ (
1240+ max_num_retries = max_num_retries ,
1241+ min_interval = min_interval ,
1242+ max_interval = max_interval ,
1243+ * args , ** kwargs )
1244+
1245+ def on_write_timeout (self , query , consistency , write_type ,
1246+ required_responses , received_responses , retry_num ):
1247+ """
1248+ For CAS (LWT) write timeouts, retry on the **same host** with exponential
1249+ backoff. Retrying on a different host would cause Paxos contention.
1250+
1251+ For non-CAS writes, delegates to the base ExponentialBackoffRetryPolicy
1252+ behavior (retry BATCH_LOG only, RETHROW otherwise).
1253+ """
1254+ if retry_num >= self .max_num_retries :
1255+ return self .RETHROW , None , None
1256+
1257+ if write_type == WriteType .CAS :
1258+ # Retry on the SAME host — this is the Paxos coordinator.
1259+ # Moving to another host causes contention in the Paxos protocol.
1260+ return self .RETRY , consistency , self ._calculate_backoff (retry_num )
1261+
1262+ # Non-CAS: delegate to parent (retries BATCH_LOG, rethrows others)
1263+ return super (LWTRetryPolicy , self ).on_write_timeout (
1264+ query , consistency , write_type ,
1265+ required_responses , received_responses , retry_num )
1266+
1267+ def on_read_timeout (self , query , consistency , required_responses ,
1268+ received_responses , data_retrieved , retry_num ):
1269+ """
1270+ For reads at serial consistency (CAS reads), retry on the **same host**
1271+ with backoff.
1272+
1273+ For non-serial reads, delegates to the base ExponentialBackoffRetryPolicy
1274+ behavior.
1275+ """
1276+ if retry_num >= self .max_num_retries :
1277+ return self .RETHROW , None , None
1278+
1279+ if ConsistencyLevel .is_serial (consistency ):
1280+ # Serial read = CAS/Paxos read. Retry on same host.
1281+ return self .RETRY , consistency , self ._calculate_backoff (retry_num )
1282+
1283+ # Non-serial: delegate to parent
1284+ return super (LWTRetryPolicy , self ).on_read_timeout (
1285+ query , consistency , required_responses ,
1286+ received_responses , data_retrieved , retry_num )
1287+
1288+ def on_unavailable (self , query , consistency , required_replicas ,
1289+ alive_replicas , retry_num ):
1290+ """
1291+ For serial consistency (CAS/Paxos phase), retry on the **next host** —
1292+ this node couldn't form a Paxos quorum, so a different coordinator
1293+ might see a different set of available replicas.
1294+
1295+ For non-serial consistency, delegates to the base ExponentialBackoffRetryPolicy
1296+ behavior.
1297+ """
1298+ if retry_num >= self .max_num_retries :
1299+ return self .RETHROW , None , None
1300+
1301+ if ConsistencyLevel .is_serial (consistency ):
1302+ # Paxos phase failed — not enough replicas for serial quorum.
1303+ # Try a different coordinator; it might have better connectivity.
1304+ return self .RETRY_NEXT_HOST , None , self ._calculate_backoff (retry_num )
1305+
1306+ # Non-serial: delegate to parent
1307+ return super (LWTRetryPolicy , self ).on_unavailable (
1308+ query , consistency , required_replicas , alive_replicas , retry_num )
1309+
1310+
11971311class AddressTranslator (object ):
11981312 """
11991313 Interface for translating cluster-defined endpoints.
0 commit comments