Skip to content

Commit 41964f1

Browse files
authored
Merge pull request #259 from github/warn-on-500s
We should only WARN on http codes we intend to retry on
2 parents e714aa0 + 2c073b2 commit 41964f1

8 files changed

Lines changed: 64 additions & 25 deletions

File tree

Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
entitlements-github-plugin (1.2.0)
4+
entitlements-github-plugin (1.2.1)
55
contracts (~> 0.17.0)
66
faraday (~> 2.0)
77
faraday-retry (~> 2.0)

lib/entitlements/service/github.rb

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,10 @@ def pending_members_from_graphql
338338
def graphql_http_post(query)
339339
1.upto(MAX_GRAPHQL_RETRIES) do |try_number|
340340
result = graphql_http_post_real(query)
341-
if result[:code] < 500
341+
if !graphql_result_retryable?(result)
342342
return result
343343
elsif try_number >= MAX_GRAPHQL_RETRIES
344-
Entitlements.logger.error "Query still failing after #{MAX_GRAPHQL_RETRIES} tries. Giving up."
344+
Entitlements.logger.error "Query still failing after #{MAX_GRAPHQL_RETRIES} tries (last code: #{result[:code]}). Giving up."
345345
return result
346346
else
347347
Entitlements.logger.warn "GraphQL failed on try #{try_number} of #{MAX_GRAPHQL_RETRIES}. Will retry."
@@ -350,6 +350,18 @@ def graphql_http_post(query)
350350
end
351351
end
352352

353+
# Helper: determine whether a result hash from `graphql_http_post_real` represents
354+
# a transient failure that the retry wrapper will retry. Used by the wrapper itself
355+
# and to decide log severity inside `graphql_http_post_real`.
356+
#
357+
# result - Hash returned by `graphql_http_post_real`.
358+
#
359+
# Returns true if the result is retryable (HTTP 5xx or synthetic 5xx), false otherwise.
360+
Contract ({ code: Integer, data: C::Or[nil, Hash] }) => C::Bool
361+
def graphql_result_retryable?(result)
362+
result[:code] >= 500
363+
end
364+
353365
# Helper method: Do the HTTP POST to the GitHub API for GraphQL.
354366
#
355367
# query - String with the data to be posted.
@@ -370,23 +382,35 @@ def graphql_http_post_real(query)
370382
response = http.request(request)
371383

372384
if response.code != "200"
373-
Entitlements.logger.error "Got HTTP #{response.code} POSTing to #{uri}"
374-
Entitlements.logger.error response.body
385+
# The retry wrapper retries on 5xx, so log those at WARN to avoid misleading
386+
# the operator with an ERROR for a transient failure that we recover from.
387+
# Terminal non-2xx responses (4xx) stay at ERROR.
388+
msg = "POST to #{uri} returned HTTP Code #{response.code} and Body: #{response.body}"
389+
response.code.start_with?("5") ? Entitlements.logger.warn(msg) : Entitlements.logger.error(msg)
375390
return { code: response.code.to_i, data: { "body" => response.body } }
376391
end
377392

378393
begin
379394
data = JSON.parse(response.body)
380395
if data.key?("errors")
381-
Entitlements.logger.error "Errors reported: #{data['errors'].inspect}"
396+
# Synthesized 500 below triggers a retry, so log at WARN. Note: some GraphQL
397+
# `errors` are permanent (bad query, auth, schema). The retry wrapper's final
398+
# "Giving up" ERROR will surface persistent cases.
399+
Entitlements.logger.warn "Errors reported: #{data['errors'].inspect}"
382400
return { code: 500, data: }
383401
end
384402
{ code: response.code.to_i, data: }
385403
rescue JSON::ParserError => e
404+
# JSON parse errors mean the API returned something we can't interpret. The
405+
# synthesized 500 below triggers a retry, but the cause is more likely a real
406+
# protocol/server problem than a transient network blip, so log at ERROR.
386407
Entitlements.logger.error "#{e.class} #{e.message}: #{response.body.inspect}"
387408
{ code: 500, data: { "body" => response.body } }
388409
end
389410
rescue => e
411+
# Catch-all for any unexpected exception (network blip OR local code bug).
412+
# We retry below via the synthesized 500, but log at ERROR because this
413+
# branch can mask programming errors that operators must see.
390414
Entitlements.logger.error "Caught #{e.class} POSTing to #{uri}: #{e.message}"
391415
{ code: 500, data: nil }
392416
end

lib/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
module Entitlements
44
module Version
5-
VERSION = "1.2.0"
5+
VERSION = "1.2.1"
66
end
77
end

spec/acceptance/docker-compose.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
version: '2'
2-
31
networks:
42
ldap-network:
53
internal: true
@@ -31,7 +29,7 @@ services:
3129

3230
ldap-server:
3331
entrypoint: /acceptance/ldap-server/run-server.sh
34-
image: osixia/openldap:1.2.2
32+
image: osixia/openldap:1.5.0
3533
networks:
3634
ldap-network:
3735
aliases:

spec/acceptance/ldap-server/env/default.startup.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ LDAP_READONLY_USER_PASSWORD: readonly
2121
LDAP_RFC2307BIS_SCHEMA: false
2222

2323
# Backend
24-
LDAP_BACKEND: hdb
24+
LDAP_BACKEND: mdb
2525

2626
# Tls
2727
LDAP_TLS: true
2828
LDAP_TLS_CRT_FILENAME: ldap.crt
2929
LDAP_TLS_KEY_FILENAME: ldap.key
3030
LDAP_TLS_CA_CRT_FILENAME: ca.crt
31+
LDAP_TLS_DH_PARAM_FILENAME: dhparam.pem
3132

3233
LDAP_TLS_ENFORCE: true
3334
LDAP_TLS_CIPHER_SUITE: SECURE256:+SECURE128:-VERS-TLS-ALL:+VERS-TLS1.2:-RSA:-DHE-DSS:-CAMELLIA-128-CBC:-CAMELLIA-256-CBC

spec/acceptance/ldap-server/ldif/data/99-bind-account/emmy.ldif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ objectClass: simpleSecurityObject
88
uid: emmy
99
userpassword: kittens
1010

11-
dn: olcDatabase={1}hdb,cn=config
11+
dn: olcDatabase={1}mdb,cn=config
1212
changetype: modify
1313
delete: olcAccess
1414
-

spec/acceptance/ldap-server/run-server.sh

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,28 @@ cp /acceptance/ca/intermediate/certs/ca-chain.cert.pem /container/service/slapd/
3737
cp /acceptance/ldap-server/tls/dhparam.pem /container/service/slapd/assets/certs/dhparam.pem
3838
chown -R root:root /container/service/slapd/assets/certs
3939

40-
# Pre-install our configuration environment
41-
rm -f /container/environment/99-default/*.yaml
42-
cp /acceptance/ldap-server/env/*.yaml /container/environment/99-default
40+
# Pre-install our configuration environment.
41+
# Drop our overrides into a lexically earlier directory than the image's stock
42+
# /container/environment/99-default. osixia's run tool walks /container/environment
43+
# in sorted order and first-set-wins (see /container/tool/run), so values declared
44+
# here take precedence while everything we *don't* override (LDAP_PORT, LDAPS_PORT,
45+
# LDAP_NOFILE, DISABLE_CHOWN, etc.) is inherited from the image defaults.
46+
mkdir -p /container/environment/01-custom
47+
cp /acceptance/ldap-server/env/*.yaml /container/environment/01-custom/
4348

4449
# Pre-install our schema (after killing most of the defaults from the container)
4550
rm -f /container/service/slapd/assets/config/bootstrap/ldif/0[345]*.ldif
4651
rm -rf /container/service/slapd/assets/config/bootstrap/schema/mmc
47-
rm -f /etc/ldap/schema/*
52+
# Only remove stock *.ldif schemas; keep *.schema files because osixia/openldap:1.5.0's
53+
# bootstrap (slaptest) converts *.schema -> cn=config and needs core.schema to exist.
54+
rm -f /etc/ldap/schema/*.ldif
4855
cp /acceptance/ldap-server/schema/* /etc/ldap/schema/
4956
cp /acceptance/ldap-server/ldif/bootstrap/*.ldif /container/service/slapd/assets/config/bootstrap/ldif
5057

5158
# Launch openldap
52-
nohup /usr/bin/python -u /container/tool/run -l info &
59+
# /container/tool/run has its own `#!/usr/bin/python3 -u` shebang in osixia/openldap:1.5.0,
60+
# which no longer ships /usr/bin/python. Invoke it directly so we don't depend on Py2 paths.
61+
nohup /container/tool/run -l info &
5362
OPENLDAP_PID=$!
5463

5564
# Wait for the process to be running and connectable

spec/unit/entitlements/service/github_spec.rb

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@
352352
expect_any_instance_of(Object).to receive(:sleep).with(2).exactly(1).times
353353
expect(logger).to receive(:warn).with("GraphQL failed on try 1 of 3. Will retry.")
354354
expect(logger).to receive(:warn).with("GraphQL failed on try 2 of 3. Will retry.")
355-
expect(logger).to receive(:error).with("Query still failing after 3 tries. Giving up.")
355+
expect(logger).to receive(:error).with("Query still failing after 3 tries (last code: 502). Giving up.")
356356

357357
response = subject.send(:graphql_http_post, query)
358358
expect(response[:code]).to eq(502)
@@ -368,35 +368,42 @@
368368
expect(response).to eq(code: 200, data: answer)
369369
end
370370

371-
it "logs and returns code=500 and exception message for an unhandled exception" do
371+
it "logs at error and returns code=500 and exception message for an unhandled exception" do
372372
exc = StandardError.new("Oh no you don't")
373373
stub_request(:post, "https://github.fake/api/v3/graphql").to_raise(exc)
374374
expect(logger).to receive(:error).with("Caught StandardError POSTing to https://github.fake/api/v3/graphql: Oh no you don't")
375375
response = subject.send(:graphql_http_post_real, "nonsense")
376376
expect(response).to eq(code: 500, data: nil)
377377
end
378378

379-
it "logs and returns code and body for non-200 response" do
379+
it "logs at error and returns code and body for a non-200 4xx response" do
380380
answer = { "errors" => ["message" => "Something busted"] }
381381
stub_request(:post, "https://github.fake/api/v3/graphql").to_return(status: 429, body: JSON.generate(answer))
382-
expect(logger).to receive(:error).with("Got HTTP 429 POSTing to https://github.fake/api/v3/graphql")
383-
expect(logger).to receive(:error).with("{\"errors\":[{\"message\":\"Something busted\"}]}")
382+
expect(logger).to receive(:error).with("POST to https://github.fake/api/v3/graphql returned HTTP Code 429 and Body: #{JSON.generate(answer)}")
384383
response = subject.send(:graphql_http_post_real, "nonsense")
385384
expect(response).to eq(code: 429, data: { "body" => JSON.generate(answer) })
386385
end
387386

388-
it "logs and returns raw text for JSON parsing error" do
387+
it "logs at warn and returns code and body for a 5xx response (retryable)" do
388+
body = '{"message": "We couldn\'t respond to your request in time. Sorry about that."}'
389+
stub_request(:post, "https://github.fake/api/v3/graphql").to_return(status: 504, body:)
390+
expect(logger).to receive(:warn).with("POST to https://github.fake/api/v3/graphql returned HTTP Code 504 and Body: #{body}")
391+
response = subject.send(:graphql_http_post_real, "nonsense")
392+
expect(response).to eq(code: 504, data: { "body" => body })
393+
end
394+
395+
it "logs at error and returns raw text for JSON parsing error" do
389396
answer = "mor chicken mor rewardz!"
390397
stub_request(:post, "https://github.fake/api/v3/graphql").to_return(status: 200, body: answer)
391398
expect(logger).to receive(:error).with("JSON::ParserError unexpected character: 'mor' at line 1 column 1: \"mor chicken mor rewardz!\"")
392399
response = subject.send(:graphql_http_post_real, "nonsense")
393400
expect(response).to eq(code: 500, data: { "body" => "mor chicken mor rewardz!" })
394401
end
395402

396-
it "logs and returns when errors are reported in a 200" do
403+
it "logs at warn and returns when errors are reported in a 200" do
397404
answer = { "errors" => ["ENOTENOUGHCHICKEn"] }
398405
stub_request(:post, "https://github.fake/api/v3/graphql").to_return(status: 200, body: JSON.generate(answer))
399-
expect(logger).to receive(:error).with("Errors reported: [\"ENOTENOUGHCHICKEn\"]")
406+
expect(logger).to receive(:warn).with("Errors reported: [\"ENOTENOUGHCHICKEn\"]")
400407
response = subject.send(:graphql_http_post_real, "nonsense")
401408
expect(response).to eq(code: 500, data: answer)
402409
end

0 commit comments

Comments
 (0)