@@ -31,6 +31,38 @@ use tdbe::types::enums::StreamMsgType;
3131
3232use super :: protocol:: READ_TIMEOUT_MS ;
3333
34+ /// Windows `ERROR_IO_PENDING` raw OS error code.
35+ ///
36+ /// On Windows the overlapped socket layer surfaces in-flight reads as
37+ /// `ERROR_IO_PENDING` (Win32 error 997) instead of `WSAEWOULDBLOCK`. Rust
38+ /// `std` maps 997 to `ErrorKind::Uncategorized`, so a `kind()` match on
39+ /// `WouldBlock | TimedOut` misses it and a benign in-flight read appears as
40+ /// a fatal I/O error. Callers must check the `raw_os_error()` to recognise
41+ /// it as transient.
42+ ///
43+ /// Reference: <https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--500-999->
44+ pub ( crate ) const ERROR_IO_PENDING : i32 = 997 ;
45+
46+ /// Classify a raw `std::io::Error` returned by `read()` as a transient
47+ /// "no data right now, try again" condition.
48+ ///
49+ /// Returns `true` for the three cases the FPSS framing and I/O loops must
50+ /// retry / drain on rather than escalate to a fatal disconnect:
51+ ///
52+ /// - `ErrorKind::WouldBlock` — Linux, macOS `SO_RCVTIMEO` on a non-blocking
53+ /// socket.
54+ /// - `ErrorKind::TimedOut` — macOS `SO_RCVTIMEO` on a blocking socket.
55+ /// - `raw_os_error() == Some(997)` — Windows `ERROR_IO_PENDING` from the
56+ /// overlapped I/O layer (issue #469). Maps to `ErrorKind::Uncategorized`
57+ /// in `std`, so a `kind()` match alone misses it.
58+ #[ must_use]
59+ pub ( crate ) fn is_transient_read ( io_err : & std:: io:: Error ) -> bool {
60+ matches ! (
61+ io_err. kind( ) ,
62+ std:: io:: ErrorKind :: WouldBlock | std:: io:: ErrorKind :: TimedOut
63+ ) || io_err. raw_os_error ( ) == Some ( ERROR_IO_PENDING )
64+ }
65+
3466/// Maximum payload length (single unsigned byte).
3567///
3668/// Source: `PacketStream.java` -- the length field is one byte.
@@ -231,13 +263,7 @@ fn read_header_with_timeout<R: Read>(
231263 } )
232264 }
233265 Err ( e) if e. kind ( ) == std:: io:: ErrorKind :: Interrupted => continue ,
234- Err ( e)
235- if n > 0
236- && matches ! (
237- e. kind( ) ,
238- std:: io:: ErrorKind :: WouldBlock | std:: io:: ErrorKind :: TimedOut
239- ) =>
240- {
266+ Err ( e) if n > 0 && is_transient_read ( & e) => {
241267 // Drain-yield: the aggregate wall-clock cap exists so
242268 // the command drain cannot be starved by a trickling
243269 // sender. The partial header bytes are preserved on
@@ -345,12 +371,7 @@ fn read_exact_payload_with_timeout<R: Read>(
345371 state. payload_read = n + k;
346372 }
347373 Err ( e) if e. kind ( ) == std:: io:: ErrorKind :: Interrupted => continue ,
348- Err ( e)
349- if matches ! (
350- e. kind( ) ,
351- std:: io:: ErrorKind :: WouldBlock | std:: io:: ErrorKind :: TimedOut
352- ) =>
353- {
374+ Err ( e) if is_transient_read ( & e) => {
354375 // Drain-yield: the aggregate wall-clock cap exists so
355376 // the command drain cannot be starved by a trickling
356377 // sender. The partial payload bytes are preserved
@@ -1401,4 +1422,162 @@ mod tests {
14011422 ) ) ;
14021423 assert ! ( !is_drain_yield( & io) ) ;
14031424 }
1425+
1426+ /// Windows `ERROR_IO_PENDING` (raw OS error 997) must classify as a
1427+ /// transient read. Rust `std` maps 997 to `ErrorKind::Uncategorized`,
1428+ /// so a plain `kind()` match would miss it and treat the in-flight
1429+ /// overlapped read as a fatal disconnect — which is exactly what the
1430+ /// Python user reported in issue #469.
1431+ #[ test]
1432+ fn is_transient_read_recognises_windows_error_io_pending ( ) {
1433+ let err = std:: io:: Error :: from_raw_os_error ( ERROR_IO_PENDING ) ;
1434+ // Sanity: confirm the precondition that motivates this fix —
1435+ // `std` does not map 997 to a recognisable kind on any platform.
1436+ assert_ne ! ( err. kind( ) , std:: io:: ErrorKind :: WouldBlock ) ;
1437+ assert_ne ! ( err. kind( ) , std:: io:: ErrorKind :: TimedOut ) ;
1438+ assert_eq ! ( err. raw_os_error( ) , Some ( 997 ) ) ;
1439+ assert ! (
1440+ is_transient_read( & err) ,
1441+ "ERROR_IO_PENDING (os error 997) must be classified as transient"
1442+ ) ;
1443+
1444+ // Other raw OS errors (e.g. ECONNRESET on Linux) must NOT be
1445+ // classified as transient — they are real disconnects.
1446+ let real_err = std:: io:: Error :: from_raw_os_error ( 104 ) ; // ECONNRESET
1447+ assert ! (
1448+ !is_transient_read( & real_err) ,
1449+ "ECONNRESET must not be classified as transient"
1450+ ) ;
1451+
1452+ // The classic kinds still match.
1453+ let wb = std:: io:: Error :: new ( std:: io:: ErrorKind :: WouldBlock , "x" ) ;
1454+ let to = std:: io:: Error :: new ( std:: io:: ErrorKind :: TimedOut , "x" ) ;
1455+ assert ! ( is_transient_read( & wb) ) ;
1456+ assert ! ( is_transient_read( & to) ) ;
1457+ }
1458+
1459+ /// Reader that yields a prefix, then `n_stalls` errors of the given
1460+ /// raw OS error code, then a suffix. Models a Windows TLS socket
1461+ /// surfacing `ERROR_IO_PENDING` (997) between the header and payload.
1462+ struct PrefixThenOsErrThenResume {
1463+ prefix : Vec < u8 > ,
1464+ suffix : Vec < u8 > ,
1465+ prefix_pos : usize ,
1466+ suffix_pos : usize ,
1467+ remaining_stalls : usize ,
1468+ os_error : i32 ,
1469+ }
1470+
1471+ impl std:: io:: Read for PrefixThenOsErrThenResume {
1472+ fn read ( & mut self , buf : & mut [ u8 ] ) -> std:: io:: Result < usize > {
1473+ if self . prefix_pos < self . prefix . len ( ) {
1474+ let remaining = & self . prefix [ self . prefix_pos ..] ;
1475+ let n = remaining. len ( ) . min ( buf. len ( ) ) ;
1476+ buf[ ..n] . copy_from_slice ( & remaining[ ..n] ) ;
1477+ self . prefix_pos += n;
1478+ return Ok ( n) ;
1479+ }
1480+ if self . remaining_stalls > 0 {
1481+ self . remaining_stalls -= 1 ;
1482+ return Err ( std:: io:: Error :: from_raw_os_error ( self . os_error ) ) ;
1483+ }
1484+ if self . suffix_pos < self . suffix . len ( ) {
1485+ let remaining = & self . suffix [ self . suffix_pos ..] ;
1486+ let n = remaining. len ( ) . min ( buf. len ( ) ) ;
1487+ buf[ ..n] . copy_from_slice ( & remaining[ ..n] ) ;
1488+ self . suffix_pos += n;
1489+ return Ok ( n) ;
1490+ }
1491+ Err ( std:: io:: Error :: new (
1492+ std:: io:: ErrorKind :: UnexpectedEof ,
1493+ "reader exhausted" ,
1494+ ) )
1495+ }
1496+ }
1497+
1498+ /// Reader that always returns a raw OS error after delivering a
1499+ /// prefix. Models a Windows socket where the read goes pending and
1500+ /// never completes within the test window.
1501+ struct AlwaysOsErrAfter {
1502+ prefix : Vec < u8 > ,
1503+ pos : usize ,
1504+ os_error : i32 ,
1505+ }
1506+
1507+ impl std:: io:: Read for AlwaysOsErrAfter {
1508+ fn read ( & mut self , buf : & mut [ u8 ] ) -> std:: io:: Result < usize > {
1509+ if self . pos < self . prefix . len ( ) {
1510+ let remaining = & self . prefix [ self . pos ..] ;
1511+ let n = remaining. len ( ) . min ( buf. len ( ) ) ;
1512+ buf[ ..n] . copy_from_slice ( & remaining[ ..n] ) ;
1513+ self . pos += n;
1514+ Ok ( n)
1515+ } else {
1516+ Err ( std:: io:: Error :: from_raw_os_error ( self . os_error ) )
1517+ }
1518+ }
1519+ }
1520+
1521+ /// Pre-header `ERROR_IO_PENDING` (zero bytes delivered) must propagate
1522+ /// as `Error::Io` — same path `WouldBlock` takes — so
1523+ /// `io_loop::is_read_timeout` can drain queued commands and retry on
1524+ /// the next poll instead of escalating to a reconnect storm. Issue
1525+ /// #469: this is exactly the case where the Python user on Windows
1526+ /// saw `Overlapped I/O operation is in progress. (os error 997)` spam
1527+ /// followed by repeated reconnect attempts.
1528+ #[ test]
1529+ fn pre_header_error_io_pending_propagates_as_io ( ) {
1530+ let mut reader = AlwaysOsErrAfter {
1531+ prefix : Vec :: new ( ) ,
1532+ pos : 0 ,
1533+ os_error : ERROR_IO_PENDING ,
1534+ } ;
1535+ let err = read_frame ( & mut reader) . unwrap_err ( ) ;
1536+ match err {
1537+ crate :: error:: Error :: Io ( e) => {
1538+ assert_eq ! ( e. raw_os_error( ) , Some ( ERROR_IO_PENDING ) ) ;
1539+ }
1540+ other => panic ! ( "expected Error::Io(ERROR_IO_PENDING), got {other:?}" ) ,
1541+ }
1542+ }
1543+
1544+ /// Mid-header `ERROR_IO_PENDING` (one byte delivered, second stalls
1545+ /// briefly with os error 997) must retry within the per-stall
1546+ /// deadline and return the complete frame. Without the fix this
1547+ /// arm fell through to `Err(e) => Err(e.into())` and surfaced as a
1548+ /// fatal `FPSS read error` to the user.
1549+ #[ test]
1550+ fn mid_header_error_io_pending_retries_and_recovers ( ) {
1551+ let mut reader = PrefixThenOsErrThenResume {
1552+ prefix : vec ! [ 0x01 ] ,
1553+ suffix : vec ! [ StreamMsgType :: Ping as u8 , 0xAA ] ,
1554+ prefix_pos : 0 ,
1555+ suffix_pos : 0 ,
1556+ remaining_stalls : 3 ,
1557+ os_error : ERROR_IO_PENDING ,
1558+ } ;
1559+ let frame = read_frame ( & mut reader) . unwrap ( ) . unwrap ( ) ;
1560+ assert_eq ! ( frame. code, StreamMsgType :: Ping ) ;
1561+ assert_eq ! ( frame. payload, vec![ 0xAA ] ) ;
1562+ }
1563+
1564+ /// Mid-payload `ERROR_IO_PENDING` (header + partial payload, brief
1565+ /// stall with os error 997, rest arrives) must retry and complete.
1566+ /// This is the most common shape on Windows: a real frame whose
1567+ /// payload bytes finish arriving 50–76 ms after the first overlapped
1568+ /// pending notification.
1569+ #[ test]
1570+ fn mid_payload_error_io_pending_retries_and_recovers ( ) {
1571+ let mut reader = PrefixThenOsErrThenResume {
1572+ prefix : vec ! [ 0x04 , StreamMsgType :: Ping as u8 , 0x01 , 0x02 ] ,
1573+ suffix : vec ! [ 0x03 , 0x04 ] ,
1574+ prefix_pos : 0 ,
1575+ suffix_pos : 0 ,
1576+ remaining_stalls : 3 ,
1577+ os_error : ERROR_IO_PENDING ,
1578+ } ;
1579+ let frame = read_frame ( & mut reader) . unwrap ( ) . unwrap ( ) ;
1580+ assert_eq ! ( frame. code, StreamMsgType :: Ping ) ;
1581+ assert_eq ! ( frame. payload, vec![ 0x01 , 0x02 , 0x03 , 0x04 ] ) ;
1582+ }
14041583}
0 commit comments