@@ -338,10 +338,10 @@ def pending_members_from_graphql
338338 def graphql_http_post ( query )
339339 1 . upto ( MAX_GRAPHQL_RETRIES ) do |try_number |
340340 result = graphql_http_post_real ( query )
341- if result [ :code ] < 500
341+ if ! graphql_result_retryable? ( result )
342342 return result
343343 elsif try_number >= MAX_GRAPHQL_RETRIES
344- Entitlements . logger . error "Query still failing after #{ MAX_GRAPHQL_RETRIES } tries. Giving up."
344+ Entitlements . logger . error "Query still failing after #{ MAX_GRAPHQL_RETRIES } tries (last code: #{ result [ :code ] } ) . Giving up."
345345 return result
346346 else
347347 Entitlements . logger . warn "GraphQL failed on try #{ try_number } of #{ MAX_GRAPHQL_RETRIES } . Will retry."
@@ -350,6 +350,18 @@ def graphql_http_post(query)
350350 end
351351 end
352352
353+ # Helper: determine whether a result hash from `graphql_http_post_real` represents
354+ # a transient failure that the retry wrapper will retry. Used by the wrapper itself
355+ # and to decide log severity inside `graphql_http_post_real`.
356+ #
357+ # result - Hash returned by `graphql_http_post_real`.
358+ #
359+ # Returns true if the result is retryable (HTTP 5xx or synthetic 5xx), false otherwise.
360+ Contract ( { code : Integer , data : C ::Or [ nil , Hash ] } ) => C ::Bool
361+ def graphql_result_retryable? ( result )
362+ result [ :code ] >= 500
363+ end
364+
353365 # Helper method: Do the HTTP POST to the GitHub API for GraphQL.
354366 #
355367 # query - String with the data to be posted.
@@ -370,23 +382,35 @@ def graphql_http_post_real(query)
370382 response = http . request ( request )
371383
372384 if response . code != "200"
373- Entitlements . logger . error "Got HTTP #{ response . code } POSTing to #{ uri } "
374- Entitlements . logger . error response . body
385+ # The retry wrapper retries on 5xx, so log those at WARN to avoid misleading
386+ # the operator with an ERROR for a transient failure that we recover from.
387+ # Terminal non-2xx responses (4xx) stay at ERROR.
388+ msg = "POST to #{ uri } returned HTTP Code #{ response . code } and Body: #{ response . body } "
389+ response . code . start_with? ( "5" ) ? Entitlements . logger . warn ( msg ) : Entitlements . logger . error ( msg )
375390 return { code : response . code . to_i , data : { "body" => response . body } }
376391 end
377392
378393 begin
379394 data = JSON . parse ( response . body )
380395 if data . key? ( "errors" )
381- Entitlements . logger . error "Errors reported: #{ data [ 'errors' ] . inspect } "
396+ # Synthesized 500 below triggers a retry, so log at WARN. Note: some GraphQL
397+ # `errors` are permanent (bad query, auth, schema). The retry wrapper's final
398+ # "Giving up" ERROR will surface persistent cases.
399+ Entitlements . logger . warn "Errors reported: #{ data [ 'errors' ] . inspect } "
382400 return { code : 500 , data : }
383401 end
384402 { code : response . code . to_i , data : }
385403 rescue JSON ::ParserError => e
404+ # JSON parse errors mean the API returned something we can't interpret. The
405+ # synthesized 500 below triggers a retry, but the cause is more likely a real
406+ # protocol/server problem than a transient network blip, so log at ERROR.
386407 Entitlements . logger . error "#{ e . class } #{ e . message } : #{ response . body . inspect } "
387408 { code : 500 , data : { "body" => response . body } }
388409 end
389410 rescue => e
411+ # Catch-all for any unexpected exception (network blip OR local code bug).
412+ # We retry below via the synthesized 500, but log at ERROR because this
413+ # branch can mask programming errors that operators must see.
390414 Entitlements . logger . error "Caught #{ e . class } POSTing to #{ uri } : #{ e . message } "
391415 { code : 500 , data : nil }
392416 end
0 commit comments