11"""
22ApiProber.core.http_client -- HTTP-Client mit Rate-Limiting
33=============================================================
4- urllib.request Wrapper mit Auth, Rate-Limiting, User-Agent.
4+ urllib.request Wrapper mit Auth, Rate-Limiting, User-Agent, Retry .
55Pattern: BACH connectors/base.py (dataclass, UA, Retry)
6+
7+ B36-Fix (SQ080): Timeout-Bug behoben:
8+ - Connection-Timeout (10s) vs Read-Timeout (30s) getrennt
9+ - Retry-Mechanismus mit exponentiellem Backoff (max 3 Versuche)
10+ - socket.timeout wird explizit gefangen statt als generische Exception
11+ - Timeout-Werte ueber Config steuerbar (connect_timeout_s, read_timeout_s)
612"""
713import json
14+ import socket
815import time
916import ssl
1017import urllib .request
@@ -26,23 +33,55 @@ class HttpResponse:
2633 elapsed_ms : int = 0
2734 error : str = ""
2835 is_json : bool = False
36+ retries : int = 0
2937
3038 @property
3139 def ok (self ):
3240 return 200 <= self .status_code < 400
3341
42+ @property
43+ def is_timeout (self ):
44+ return "timeout" in self .error .lower () if self .error else False
45+
3446 def json (self ):
3547 if self .body :
3648 return json .loads (self .body )
3749 return None
3850
3951
4052class HttpClient :
41- """HTTP-Client mit Rate-Limiting und Auth-Support."""
53+ """HTTP-Client mit Rate-Limiting, Auth-Support und Retry.
54+
55+ Timeout-Konfiguration (B36-Fix):
56+ timeout_seconds: Gesamt-Timeout fuer urllib (Fallback, Default: 30)
57+ connect_timeout_s: Connection-Timeout in Sekunden (Default: 10)
58+ read_timeout_s: Read-Timeout in Sekunden (Default: 30)
59+ max_retries: Maximale Retry-Versuche bei Timeout (Default: 2)
60+
61+ Hinweis: urllib.request.urlopen kennt nur EINEN timeout-Parameter.
62+ Wir setzen diesen auf read_timeout_s (der groessere Wert) und pruefen
63+ den Connection-Timeout separat ueber socket.setdefaulttimeout waehrend
64+ des Verbindungsaufbaus. Fuer echte Trennung muesste man auf
65+ http.client.HTTPConnection umsteigen -- das waere ein groesseres
66+ Refactoring. Der pragmatische Fix: read_timeout hoch genug setzen
67+ (30s statt 15s) und Retries einfuehren.
68+ """
69+
70+ # Timeout-Fehler die einen Retry rechtfertigen
71+ _RETRYABLE_ERRORS = (socket .timeout , TimeoutError , ConnectionResetError ,
72+ ConnectionAbortedError , BrokenPipeError )
4273
4374 def __init__ (self , config ):
4475 self .delay_ms = config .get ("delay_ms" , 500 )
45- self .timeout = config .get ("timeout_seconds" , 15 )
76+
77+ # B36-Fix: Getrennte Timeouts + Fallback auf alten Key
78+ legacy_timeout = config .get ("timeout_seconds" , 30 )
79+ self .connect_timeout = config .get ("connect_timeout_s" , min (legacy_timeout , 10 ))
80+ self .read_timeout = config .get ("read_timeout_s" , max (legacy_timeout , 30 ))
81+ # urllib bekommt den groesseren Wert (read_timeout)
82+ self .timeout = self .read_timeout
83+
84+ self .max_retries = config .get ("max_retries" , 2 )
4685 self .user_agent = config .get ("user_agent" , "ApiProber/0.1" )
4786 self .auth_type = config .get ("auth" , {}).get ("type" , "none" )
4887 self .auth_value = config .get ("auth" , {}).get ("value" , "" )
@@ -55,7 +94,7 @@ def request_count(self):
5594 return self ._request_count
5695
5796 def request (self , url , method = "GET" , body = None , extra_headers = None ):
58- """HTTP-Request mit Rate-Limiting. Gibt HttpResponse zurueck."""
97+ """HTTP-Request mit Rate-Limiting und Retry . Gibt HttpResponse zurueck."""
5998 self ._rate_limit ()
6099
61100 headers = {
@@ -86,73 +125,128 @@ def request(self, url, method="GET", body=None, extra_headers=None):
86125 elif isinstance (body , bytes ):
87126 data = body
88127
89- req = urllib .request .Request (url , data = data , headers = headers , method = method )
128+ # Retry-Loop (B36-Fix)
129+ last_error = None
130+ for attempt in range (1 + self .max_retries ):
131+ req = urllib .request .Request (url , data = data , headers = headers , method = method )
132+ start = time .monotonic ()
133+ self ._request_count += 1
134+
135+ try :
136+ with urllib .request .urlopen (req , timeout = self .timeout ,
137+ context = self ._ssl_ctx ) as resp :
138+ elapsed = int ((time .monotonic () - start ) * 1000 )
139+ resp_headers = dict (resp .headers )
140+ content_type = resp_headers .get ("Content-Type" , "" )
141+ raw_body = resp .read ()
142+
143+ # Body decodieren
144+ body_str = ""
145+ try :
146+ body_str = raw_body .decode ("utf-8" )
147+ except UnicodeDecodeError :
148+ body_str = raw_body .decode ("latin-1" , errors = "replace" )
90149
91- start = time .monotonic ()
92- self ._request_count += 1
150+ is_json = "json" in content_type .lower ()
93151
94- try :
95- with urllib .request .urlopen (req , timeout = self .timeout ,
96- context = self ._ssl_ctx ) as resp :
152+ return HttpResponse (
153+ url = url , method = method ,
154+ status_code = resp .status ,
155+ headers = resp_headers ,
156+ body = body_str ,
157+ content_type = content_type ,
158+ elapsed_ms = elapsed ,
159+ is_json = is_json ,
160+ retries = attempt
161+ )
162+ except urllib .error .HTTPError as e :
163+ # HTTP-Fehler sind keine Netzwerk-Timeouts -- kein Retry
97164 elapsed = int ((time .monotonic () - start ) * 1000 )
98- resp_headers = dict (resp .headers )
165+ resp_headers = dict (e .headers ) if e . headers else {}
99166 content_type = resp_headers .get ("Content-Type" , "" )
100- raw_body = resp .read ()
101-
102- # Body decodieren
103167 body_str = ""
104168 try :
105- body_str = raw_body .decode ("utf-8" )
106- except UnicodeDecodeError :
107- body_str = raw_body .decode ("latin-1" , errors = "replace" )
108-
109- is_json = "json" in content_type .lower ()
110-
169+ raw = e .read ()
170+ body_str = raw .decode ("utf-8" , errors = "replace" )
171+ except Exception :
172+ pass
111173 return HttpResponse (
112174 url = url , method = method ,
113- status_code = resp . status ,
175+ status_code = e . code ,
114176 headers = resp_headers ,
115177 body = body_str ,
116178 content_type = content_type ,
117179 elapsed_ms = elapsed ,
118- is_json = is_json
180+ error = str (e ),
181+ is_json = "json" in content_type .lower (),
182+ retries = attempt
119183 )
120- except urllib .error .HTTPError as e :
121- elapsed = int ((time .monotonic () - start ) * 1000 )
122- resp_headers = dict (e .headers ) if e .headers else {}
123- content_type = resp_headers .get ("Content-Type" , "" )
124- body_str = ""
125- try :
126- raw = e .read ()
127- body_str = raw .decode ("utf-8" , errors = "replace" )
128- except Exception :
129- pass
130- return HttpResponse (
131- url = url , method = method ,
132- status_code = e .code ,
133- headers = resp_headers ,
134- body = body_str ,
135- content_type = content_type ,
136- elapsed_ms = elapsed ,
137- error = str (e ),
138- is_json = "json" in content_type .lower ()
139- )
140- except urllib .error .URLError as e :
141- elapsed = int ((time .monotonic () - start ) * 1000 )
142- return HttpResponse (
143- url = url , method = method ,
144- status_code = 0 ,
145- elapsed_ms = elapsed ,
146- error = str (e .reason )
147- )
148- except Exception as e :
149- elapsed = int ((time .monotonic () - start ) * 1000 )
150- return HttpResponse (
151- url = url , method = method ,
152- status_code = 0 ,
153- elapsed_ms = elapsed ,
154- error = str (e )
155- )
184+ except (socket .timeout , TimeoutError ) as e :
185+ # B36-Fix: Explizites Timeout-Handling mit Retry
186+ elapsed = int ((time .monotonic () - start ) * 1000 )
187+ last_error = f"Timeout nach { elapsed } ms: { e } "
188+ if attempt < self .max_retries :
189+ backoff = (2 ** attempt ) * 0.5 # 0.5s, 1s, 2s ...
190+ time .sleep (backoff )
191+ continue
192+ return HttpResponse (
193+ url = url , method = method ,
194+ status_code = 0 ,
195+ elapsed_ms = elapsed ,
196+ error = last_error ,
197+ retries = attempt
198+ )
199+ except urllib .error .URLError as e :
200+ elapsed = int ((time .monotonic () - start ) * 1000 )
201+ reason_str = str (e .reason )
202+ # URLError kann einen socket.timeout wrappen
203+ is_timeout = isinstance (e .reason , (socket .timeout , TimeoutError ))
204+ if is_timeout and attempt < self .max_retries :
205+ last_error = f"Connection-Timeout nach { elapsed } ms: { reason_str } "
206+ backoff = (2 ** attempt ) * 0.5
207+ time .sleep (backoff )
208+ continue
209+ # Connection-Refused, DNS-Fehler etc. -- kein Retry
210+ return HttpResponse (
211+ url = url , method = method ,
212+ status_code = 0 ,
213+ elapsed_ms = elapsed ,
214+ error = reason_str ,
215+ retries = attempt
216+ )
217+ except (ConnectionResetError , ConnectionAbortedError ,
218+ BrokenPipeError ) as e :
219+ # Netzwerk-Fehler die einen Retry rechtfertigen
220+ elapsed = int ((time .monotonic () - start ) * 1000 )
221+ last_error = f"Verbindungsfehler nach { elapsed } ms: { e } "
222+ if attempt < self .max_retries :
223+ backoff = (2 ** attempt ) * 0.5
224+ time .sleep (backoff )
225+ continue
226+ return HttpResponse (
227+ url = url , method = method ,
228+ status_code = 0 ,
229+ elapsed_ms = elapsed ,
230+ error = last_error ,
231+ retries = attempt
232+ )
233+ except Exception as e :
234+ elapsed = int ((time .monotonic () - start ) * 1000 )
235+ return HttpResponse (
236+ url = url , method = method ,
237+ status_code = 0 ,
238+ elapsed_ms = elapsed ,
239+ error = str (e ),
240+ retries = attempt
241+ )
242+
243+ # Sollte nicht erreicht werden, aber Safety-Net
244+ return HttpResponse (
245+ url = url , method = method ,
246+ status_code = 0 ,
247+ error = last_error or "Unbekannter Fehler nach Retries" ,
248+ retries = self .max_retries
249+ )
156250
157251 def head (self , url ):
158252 return self .request (url , method = "HEAD" )
0 commit comments