@@ -142,8 +142,10 @@ type wgtun struct {
142142 status * core.Volatile [int ] // status of this interface
143143 latestRefresh atomic.Int64 // last refresh time in unix millis
144144 latestPing atomic.Int64 // last ping time in unix millis
145- latestRx atomic.Int64 // last rx time in unix millis
146- latestTx atomic.Int64 // last tx time in unix millis
145+ latestGoodRx atomic.Int64 // last successful rx time in unix millis
146+ latestGoodTx atomic.Int64 // last successful tx time in unix millis
147+ latestRx atomic.Int64 // last (successful or not) rx time in unix millis
148+ latestTx atomic.Int64 // last (successful or not) tx time in unix millis
147149 errRx atomic.Int64 // rx error count
148150 errTx atomic.Int64 // tx error count
149151}
@@ -1141,12 +1143,19 @@ func (w *wgproxy) Stat() (out *x.RouterStats) {
11411143 out .ErrTx = w .errTx .Load ()
11421144 out .LastRx = w .latestRx .Load ()
11431145 out .LastTx = w .latestTx .Load ()
1146+ out .LastGoodRx = w .latestGoodRx .Load ()
1147+ out .LastGoodTx = w .latestGoodTx .Load ()
11441148 out .LastRefresh = w .latestRefresh .Load ()
11451149 out .Since = w .since
11461150 out .Status = pxstatus (w .status .Load ()).String ()
11471151
1148- log .VV ("proxy: wg: %s stats: rx: %d, tx: %d, lastok: %s" ,
1149- w .tag (), out .Rx , out .Tx , core .FmtUnixMillisAsPeriod (out .LastOK ))
1152+ if settings .Debug {
1153+ log .VV ("proxy: wg: %s stats: rx: %d, tx: %d, r: %s (good: %s), w: %s (good: %s), lastok: %s" ,
1154+ w .tag (), out .Rx , out .Tx ,
1155+ core .FmtUnixMillisAsPeriod (out .LastRx ), core .FmtUnixMillisAsPeriod (out .LastGoodRx ),
1156+ core .FmtUnixMillisAsPeriod (out .LastTx ), core .FmtUnixMillisAsPeriod (out .LastGoodTx ),
1157+ core .FmtUnixMillisAsPeriod (out .LastOK ))
1158+ }
11501159 return out
11511160}
11521161
@@ -1531,11 +1540,12 @@ func (h *wgtun) listener(op wg.PktDir, err error) {
15311540 defer func () {
15321541 cur := h .status .Load ()
15331542 stoppedOrPaused := cur == END || cur == TPU
1534- logeif (stoppedOrPaused )("wg: %s listener: %s; status %s => %s; ignoring2? %t, why: %s" ,
1535- h .tag (), op , pxstatus (cur ), pxstatus (s ), stoppedOrPaused , why )
1543+ updated := false
15361544 if ! stoppedOrPaused {
1537- h .status .Cas (cur , s )
1545+ updated = h .status .Cas (cur , s )
15381546 }
1547+ logeif (! updated )("wg: %s listener: %s; status %s => %s; end/pause? %t, ignored? %t, why: %s" ,
1548+ h .tag (), op , pxstatus (cur ), pxstatus (s ), stoppedOrPaused , ! updated , why )
15391549 }()
15401550
15411551 now := now ()
@@ -1546,18 +1556,28 @@ func (h *wgtun) listener(op wg.PktDir, err error) {
15461556 why = "TNT: could not open conn"
15471557 } else if op == wg .Rcv && timedout (err ) {
15481558 s = TZZ // writes and reads have succeeded in the recent past
1549- why = "TZZ: timeout"
1559+ why = "TZZ: read timeout"
15501560 } else {
15511561 s = TKO
15521562 why = "TKO: " + err .Error ()
15531563 }
1564+
1565+ if op == wg .Rcv && ! timedout (err ) { // read error
1566+ h .errRx .Add (1 )
1567+ h .latestRx .Store (now )
1568+ } else if op == wg .Snd { // write error
1569+ h .errTx .Add (1 )
1570+ h .latestTx .Store (now )
1571+ }
15541572 } else { // ok
15551573 s = TOK
15561574 why = "TOK: ok"
15571575 if op == wg .Rcv { // read ok
1576+ h .latestGoodRx .Store (now )
15581577 h .latestRx .Store (now )
15591578 why = "TOK: read ok"
15601579 } else if op == wg .Snd { // write ok
1580+ h .latestGoodTx .Store (now )
15611581 h .latestTx .Store (now )
15621582 why = "TOK: write ok"
15631583 } // else: not a transport message
@@ -1566,30 +1586,36 @@ func (h *wgtun) listener(op wg.PktDir, err error) {
15661586 const tenSecMillis = 10 * 1000
15671587 // s may also be TOK (for successful handshakes but not for transport data)
15681588 if age > tenSecMillis && (s == TOK || s == TKO ) {
1569- lastSuccessfulRead := h .latestRx .Load ()
1570- lastSuccessfulWrite := h .latestTx .Load ()
1571- writeElapsedMs := lastSuccessfulWrite - lastSuccessfulRead // may be negative
1572-
1573- // if no reads since last write, mark as unresponsive
1574- // if status is "up" but writes (Snd) have not yet happened
1589+ lastSuccessfulRead := h .latestGoodRx .Load ()
1590+ lastSuccessfulWrite := h .latestGoodTx .Load ()
1591+ lastRead := h .latestRx .Load ()
1592+ lastWrite := h .latestTx .Load ()
1593+
1594+ deviationMs := (max (lastSuccessfulWrite , lastSuccessfulRead ) -
1595+ min (lastSuccessfulWrite , lastSuccessfulRead ))
1596+ readElapsedMs := lastRead - lastSuccessfulRead // never negative
1597+ writeElapsedMs := lastWrite - lastSuccessfulWrite // never negative
1598+
1599+ hasNewWrites := lastWrite > age
1600+ hasNewReads := lastRead > age
1601+
1602+ // too much time since last good write and good reads
1603+ readWriteDeviation := (hasNewReads || hasNewWrites ) && deviationMs > markTNTAfterMillis
1604+ // too much time since last attempted read was good
1605+ readThres := hasNewReads && readElapsedMs > markTNTAfterMillis
1606+ // too much time since last attempted write was good
1607+ writeThres := hasNewWrites && writeElapsedMs > markTNTAfterMillis
1608+
1609+ // if status is !ok (TKO), no reads since last write, mark as unresponsive
1610+ // if status is ok (TOK) but writes have not yet happened
15751611 // then reads (Rcv) are expected to timeout; so ignore them
1576- if lastSuccessfulRead <= age && lastSuccessfulWrite <= age {
1612+ if ! hasNewReads && ! hasNewWrites {
15771613 why = "TZZ: idling after start/refresh"
15781614 s = TZZ // possibly idling
1579- } else if (s == TKO && lastSuccessfulRead <= age && lastSuccessfulWrite > age ) ||
1580- (s == TOK && writeElapsedMs > markTNTAfterMillis ) {
1581- why = fmt .Sprintf ("TNT: [w ok, r !ok] (if %d == -1) OR [w !ok, no r] (if %d == -2); %s" ,
1582- s , s , pxstatus (s ))
1583- s = TNT // writes succeeded; but reads have never or not in the past 20s
1584- }
1585- }
1586-
1587- if s != TOK {
1588- switch op {
1589- case wg .Rcv :
1590- h .errRx .Add (1 )
1591- case wg .Snd :
1592- h .errTx .Add (1 )
1615+ } else if readThres || writeThres || readWriteDeviation {
1616+ why = fmt .Sprintf ("TNT: r !ok? %t, w !ok? %t, rw apart? %t; overriding: %s" ,
1617+ readThres , writeThres , readWriteDeviation , why )
1618+ s = TNT
15931619 }
15941620 }
15951621
0 commit comments