Skip to content

Commit c293074

Browse files
committed
feat(sqs): /sqs_health capability JSON (Phase 3.D PR 4-B-1)
Adds the JSON capability-advertisement shape to /sqs_health that the Phase 3.D §8.5 mixed-version gate requires before the data plane can safely accept partitioned FIFO queues. What changes - /sqs_health now returns JSON when Accept: application/json is set: {"status":"ok","capabilities":[...]} The capabilities slice is built deterministically off htfifoCapabilityAdvertised, a package-level constant that stays false in this PR — the data plane and the §8 leadership-refusal hook are not yet wired, so advertising "htfifo" would let peers build partitioned queues this binary cannot safely serve. - The legacy "ok\n" text body is preserved byte-identical for any caller that does not signal application/json (no Accept header, bare "*/*" wildcard, text/plain). Existing k8s liveness probes and curl integrations stay unchanged. What does NOT change yet This is the first slice of the PR 4-B work. Two follow-ups complete the design's "advertise htfifo only when routing AND leadership- refusal are both in place" rule: - PR 4-B-2: routing resolver in kv/shard_router.go that consumes --sqsFifoPartitionMap to dispatch (queue, partition) keys to the operator-chosen Raft group. - PR 4-B-3: §8 leadership-refusal hook that calls TransferLeadership when a non-htfifo binary holds an SQS Raft group hosting a partitioned queue + the catalog-polling helper for the CreateQueue capability gate (PR 5 lifts the dormancy and starts using it). PR 4-B-3 also flips htfifoCapabilityAdvertised to true. Self-review (per CLAUDE.md) 1. Data loss — health endpoint only; no FSM/Pebble/retention path. No issue. 2. Concurrency — single-shot read off a const; no shared state. No issue. 3. Performance — JSON marshal of a 2-field struct on a low-QPS endpoint. No issue. 4. Data consistency — the gate constant is the single source of truth for the capability list; sqsAdvertisedCapabilities() is the only producer. Tests pin both true/false branches. No issue. 5. Test coverage — TestServeSQSHealthz_LegacyTextPath (3 sub-tests), TestServeSQSHealthz_JSONShape (3 sub-tests), TestServeSQSHealthz_HEAD_JSONOmitsBody, TestServeSQSHealthz_RejectsNonGETHEAD (2 sub-tests), TestSQSAdvertisedCapabilities_TracksFlag (true/false guard), TestClientAcceptsSQSHealthJSON_Boundaries (9 sub-tests).
1 parent d4d7ad8 commit c293074

2 files changed

Lines changed: 270 additions & 0 deletions

File tree

adapter/sqs.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"net"
77
"net/http"
88
"strconv"
9+
"strings"
910
"time"
1011

1112
"github.com/bootjp/elastickv/kv"
@@ -43,6 +44,45 @@ const (
4344
sqsLeaderHealthPath = "/sqs_leader_health"
4445
)
4546

47+
// sqsCapabilityHTFIFO is the capability string a binary advertises on
48+
// /sqs_health (when Accept: application/json) once it has the runtime
49+
// pieces required to safely host a partitioned FIFO queue: the routing
50+
// layer is wired through --sqsFifoPartitionMap (see main.go), and the
51+
// leadership-refusal hook in kv refuses leadership for an SQS Raft
52+
// group that hosts a partitioned queue when the binary itself does
53+
// not advertise this string.
54+
//
55+
// CreateQueue's catalog-polling gate (Phase 3.D PR 5 lifts the
56+
// dormancy and starts checking) reads this list off /sqs_health on
57+
// every peer; a CreateQueue with PartitionCount > 1 is rejected
58+
// unless every peer reports "htfifo" — fail-closed against rolling
59+
// upgrades that have not yet finished.
60+
const sqsCapabilityHTFIFO = "htfifo"
61+
62+
// htfifoCapabilityAdvertised gates whether this binary lists
63+
// "htfifo" on /sqs_health. The flag is set to true only when the
64+
// binary contains BOTH the routing-layer wiring AND the
65+
// leadership-refusal safeguard from §8 — the design's "marked
66+
// htfifo-eligible" bar (§11 PR 4). Lower-numbered PRs in the rollout
67+
// keep this false so a partial deploy never advertises a capability
68+
// it cannot safely back up. Phase 3.D PR 4-B flips this to true in
69+
// the same commit that wires routing + leadership-refusal together.
70+
const htfifoCapabilityAdvertised = false
71+
72+
// sqsAdvertisedCapabilities returns the capability list emitted on
73+
// /sqs_health (JSON mode). Stable iteration order is significant —
74+
// catalog peers may diff capability lists across nodes when checking
75+
// rollout uniformity, so the list is built deterministically. The
76+
// returned slice is freshly allocated per call so the caller may
77+
// mutate it without aliasing the package-level state.
78+
func sqsAdvertisedCapabilities() []string {
79+
caps := make([]string, 0, 1)
80+
if htfifoCapabilityAdvertised {
81+
caps = append(caps, sqsCapabilityHTFIFO)
82+
}
83+
return caps
84+
}
85+
4686
const (
4787
sqsHealthMaxRequestBodyBytes = 1024
4888
sqsMaxRequestBodyBytes = 1 << 20
@@ -251,9 +291,70 @@ func serveSQSHealthz(w http.ResponseWriter, r *http.Request) {
251291
if !writeSQSHealthMethod(w, r) {
252292
return
253293
}
294+
if clientAcceptsSQSHealthJSON(r) {
295+
writeSQSHealthJSONBody(w, r, http.StatusOK, sqsHealthBody{
296+
Status: "ok",
297+
Capabilities: sqsAdvertisedCapabilities(),
298+
})
299+
return
300+
}
254301
writeSQSHealthBody(w, r, http.StatusOK, "ok\n")
255302
}
256303

304+
// sqsHealthBody is the JSON shape returned by /sqs_health when the
305+
// caller passes Accept: application/json. Stable across binary
306+
// versions — catalog peers diff this body during the CreateQueue
307+
// gate (Phase 3.D PR 5).
308+
type sqsHealthBody struct {
309+
Status string `json:"status"`
310+
Capabilities []string `json:"capabilities"`
311+
}
312+
313+
// clientAcceptsSQSHealthJSON reports whether the caller signalled
314+
// JSON in the Accept header. Treat the absence of an Accept header
315+
// (and a bare "*/*" wildcard) as the legacy "ok\n" client to keep
316+
// the existing health-check integrations (curl, k8s liveness probes)
317+
// byte-identical.
318+
//
319+
// A substring check for "application/json" is sufficient — q-factor
320+
// and parameter parsing would be overkill for a health endpoint, and
321+
// any JSON-aware client passes the literal token in a comma-separated
322+
// list. False matches against media types like "application/jsonseq"
323+
// are accepted: a client that explicitly types out a JSON-adjacent
324+
// media type is opting in to the JSON shape on purpose.
325+
func clientAcceptsSQSHealthJSON(r *http.Request) bool {
326+
if r == nil {
327+
return false
328+
}
329+
for _, raw := range r.Header.Values("Accept") {
330+
if raw == "" || raw == "*/*" {
331+
continue
332+
}
333+
if strings.Contains(raw, "application/json") {
334+
return true
335+
}
336+
}
337+
return false
338+
}
339+
340+
func writeSQSHealthJSONBody(w http.ResponseWriter, r *http.Request, statusCode int, body sqsHealthBody) {
341+
encoded, err := json.Marshal(body)
342+
if err != nil {
343+
// json.Marshal of a fixed shape with a string + []string
344+
// cannot realistically fail; fall back to the legacy text
345+
// path so a misconfigured client still gets a 200.
346+
writeSQSHealthBody(w, r, statusCode, "ok\n")
347+
return
348+
}
349+
w.Header().Set("Content-Type", "application/json; charset=utf-8")
350+
w.WriteHeader(statusCode)
351+
if r.Method == http.MethodHead {
352+
return
353+
}
354+
_, _ = w.Write(encoded)
355+
_, _ = io.WriteString(w, "\n")
356+
}
357+
257358
func (s *SQSServer) serveSQSLeaderHealthz(w http.ResponseWriter, r *http.Request) {
258359
if !writeSQSHealthMethod(w, r) {
259360
return

adapter/sqs_health_test.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package adapter
2+
3+
import (
4+
"net/http"
5+
"net/http/httptest"
6+
"strings"
7+
"testing"
8+
9+
json "github.com/goccy/go-json"
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
// TestServeSQSHealthz_LegacyTextPath pins the byte-identical legacy
14+
// behaviour: a GET / HEAD with no Accept header (or a bare "*/*"
15+
// wildcard) returns "ok\n" with text/plain content-type. The
16+
// existing k8s liveness probe and curl-style integrations rely on
17+
// this body, so the JSON capability extension MUST NOT alter it.
18+
func TestServeSQSHealthz_LegacyTextPath(t *testing.T) {
19+
t.Parallel()
20+
cases := []struct {
21+
name string
22+
accept string
23+
}{
24+
{"no Accept header", ""},
25+
{"wildcard Accept", "*/*"},
26+
{"text/plain Accept", "text/plain"},
27+
}
28+
for _, tc := range cases {
29+
t.Run(tc.name, func(t *testing.T) {
30+
t.Parallel()
31+
req := httptest.NewRequest(http.MethodGet, sqsHealthPath, nil)
32+
if tc.accept != "" {
33+
req.Header.Set("Accept", tc.accept)
34+
}
35+
rec := httptest.NewRecorder()
36+
serveSQSHealthz(rec, req)
37+
require.Equal(t, http.StatusOK, rec.Code)
38+
require.Equal(t, "text/plain; charset=utf-8", rec.Header().Get("Content-Type"))
39+
require.Equal(t, "ok\n", rec.Body.String())
40+
})
41+
}
42+
}
43+
44+
// TestServeSQSHealthz_JSONShape pins the JSON capability response
45+
// shape returned when the caller signals Accept: application/json.
46+
// The CreateQueue capability gate (Phase 3.D PR 5) decodes this
47+
// body on every peer, so a regression that drops the field or
48+
// changes the field name would silently break the rolling-upgrade
49+
// guard.
50+
func TestServeSQSHealthz_JSONShape(t *testing.T) {
51+
t.Parallel()
52+
cases := []struct {
53+
name string
54+
accept string
55+
}{
56+
{"plain JSON", "application/json"},
57+
{"JSON with q-factor", "application/json;q=1.0"},
58+
{"comma-separated list with JSON", "text/plain, application/json;q=0.9"},
59+
}
60+
for _, tc := range cases {
61+
t.Run(tc.name, func(t *testing.T) {
62+
t.Parallel()
63+
req := httptest.NewRequest(http.MethodGet, sqsHealthPath, nil)
64+
req.Header.Set("Accept", tc.accept)
65+
rec := httptest.NewRecorder()
66+
serveSQSHealthz(rec, req)
67+
require.Equal(t, http.StatusOK, rec.Code)
68+
require.Equal(t, "application/json; charset=utf-8", rec.Header().Get("Content-Type"))
69+
70+
var got sqsHealthBody
71+
require.NoError(t, json.Unmarshal([]byte(strings.TrimSpace(rec.Body.String())), &got))
72+
require.Equal(t, "ok", got.Status)
73+
require.Equal(t, sqsAdvertisedCapabilities(), got.Capabilities)
74+
})
75+
}
76+
}
77+
78+
// TestServeSQSHealthz_HEAD_JSONOmitsBody pins that a HEAD request on
79+
// the JSON path emits the JSON content-type but no body bytes —
80+
// matching the existing legacy-path HEAD behaviour. Liveness probes
81+
// often use HEAD to avoid log spam; the JSON path MUST behave the
82+
// same way.
83+
func TestServeSQSHealthz_HEAD_JSONOmitsBody(t *testing.T) {
84+
t.Parallel()
85+
req := httptest.NewRequest(http.MethodHead, sqsHealthPath, nil)
86+
req.Header.Set("Accept", "application/json")
87+
rec := httptest.NewRecorder()
88+
serveSQSHealthz(rec, req)
89+
require.Equal(t, http.StatusOK, rec.Code)
90+
require.Equal(t, "application/json; charset=utf-8", rec.Header().Get("Content-Type"))
91+
require.Empty(t, rec.Body.String())
92+
}
93+
94+
// TestServeSQSHealthz_RejectsNonGETHEAD pins that POST / PUT / DELETE
95+
// are still rejected with 405 in both the legacy and JSON modes —
96+
// the JSON extension MUST NOT widen the method surface of the
97+
// endpoint.
98+
func TestServeSQSHealthz_RejectsNonGETHEAD(t *testing.T) {
99+
t.Parallel()
100+
for _, accept := range []string{"", "application/json"} {
101+
t.Run("Accept="+accept, func(t *testing.T) {
102+
t.Parallel()
103+
req := httptest.NewRequest(http.MethodPost, sqsHealthPath, nil)
104+
if accept != "" {
105+
req.Header.Set("Accept", accept)
106+
}
107+
rec := httptest.NewRecorder()
108+
serveSQSHealthz(rec, req)
109+
require.Equal(t, http.StatusMethodNotAllowed, rec.Code)
110+
require.Equal(t, "GET, HEAD", rec.Header().Get("Allow"))
111+
})
112+
}
113+
}
114+
115+
// TestSQSAdvertisedCapabilities_TracksFlag pins the relationship
116+
// between htfifoCapabilityAdvertised and the emitted list. If a
117+
// future change flips the constant, the JSON response must reflect
118+
// it without further wiring — the constant is the single source of
119+
// truth.
120+
func TestSQSAdvertisedCapabilities_TracksFlag(t *testing.T) {
121+
t.Parallel()
122+
caps := sqsAdvertisedCapabilities()
123+
if htfifoCapabilityAdvertised {
124+
require.Contains(t, caps, sqsCapabilityHTFIFO,
125+
"htfifo must be in the list when the flag is true")
126+
} else {
127+
require.NotContains(t, caps, sqsCapabilityHTFIFO,
128+
"htfifo must NOT be in the list when the flag is false; "+
129+
"a partial deploy that advertised the capability without "+
130+
"the routing + leadership-refusal pair would create new "+
131+
"partitioned queues that this binary cannot safely host")
132+
}
133+
}
134+
135+
// TestClientAcceptsSQSHealthJSON_Boundaries pins the substring
136+
// matcher's edge cases — these are the inputs the catalog-polling
137+
// caller in Phase 3.D PR 5 will produce, and a regression that
138+
// silently flips one of these to a wrong answer would either return
139+
// the legacy body to a JSON peer (PR 5 decode error) or the JSON
140+
// body to a curl client (UI noise).
141+
func TestClientAcceptsSQSHealthJSON_Boundaries(t *testing.T) {
142+
t.Parallel()
143+
cases := []struct {
144+
name string
145+
accept []string
146+
want bool
147+
}{
148+
{"nil request", nil, false},
149+
{"empty header value", []string{""}, false},
150+
{"bare wildcard", []string{"*/*"}, false},
151+
{"text/plain only", []string{"text/plain"}, false},
152+
{"plain JSON", []string{"application/json"}, true},
153+
{"JSON with parameters", []string{"application/json; charset=utf-8"}, true},
154+
{"multi-value with JSON last", []string{"text/plain", "application/json"}, true},
155+
{"multi-value with JSON first", []string{"application/json", "text/plain"}, true},
156+
{"comma-list with JSON", []string{"text/plain, application/json;q=0.9"}, true},
157+
}
158+
for _, tc := range cases {
159+
t.Run(tc.name, func(t *testing.T) {
160+
t.Parallel()
161+
var req *http.Request
162+
if tc.accept != nil {
163+
req = httptest.NewRequest(http.MethodGet, sqsHealthPath, nil)
164+
req.Header["Accept"] = tc.accept
165+
}
166+
require.Equal(t, tc.want, clientAcceptsSQSHealthJSON(req))
167+
})
168+
}
169+
}

0 commit comments

Comments
 (0)