@@ -17,8 +17,10 @@ limitations under the License.
1717package handlers
1818
1919import (
20+ "bytes"
2021 "context"
2122 "encoding/json"
23+ "errors"
2224 "fmt"
2325 "strconv"
2426 "time"
@@ -45,9 +47,9 @@ func (s *Server) HandleResponseHeaders(ctx context.Context, reqCtx *RequestConte
4547
4648 if ! headers .GetEndOfStream () {
4749 log .FromContext (ctx ).V (logutil .VERBOSE ).Info ("captured response headers, deferring response until body arrives..." )
48- return nil
4950 }
50- // EndOfStream means no body is expected, return HeadersResponse immediately
51+ // Always respond to response headers so Envoy proceeds with body chunks.
52+ // In STREAMED/FULL_DUPLEX_STREAMED mode, Envoy blocks until we respond.
5153 return []* eppb.ProcessingResponse {
5254 {
5355 Response : & eppb.ProcessingResponse_ResponseHeaders {
@@ -76,8 +78,15 @@ func (s *Server) HandleResponseBody(ctx context.Context, reqCtx *RequestContext,
7678 }
7779
7880 if err := json .Unmarshal (responseBodyBytes , & reqCtx .Response .Body ); err != nil {
79- logger .Error (err , "Failed to parse response body as JSON, skipping response plugins" )
80- return s .generateEmptyResponseBodyResponse (responseBodyBytes ), nil
81+ // Try parsing as SSE (Server-Sent Events) — streaming responses from providers
82+ // like Anthropic use SSE format which isn't valid JSON.
83+ if sseBody , sseErr := parseSSEResponseBody (responseBodyBytes ); sseErr == nil && sseBody != nil {
84+ reqCtx .Response .Body = sseBody
85+ logger .V (logutil .VERBOSE ).Info ("parsed SSE response body for response plugins" )
86+ } else {
87+ logger .Error (err , "Failed to parse response body as JSON or SSE, skipping response plugins" )
88+ return s .generateEmptyResponseBodyResponse (responseBodyBytes ), nil
89+ }
8190 }
8291
8392 if err := s .runResponsePlugins (ctx , reqCtx .CycleState , reqCtx .Response , reqCtx .Profile .ResponsePlugins ); err != nil {
@@ -117,18 +126,96 @@ func (s *Server) HandleResponseBody(ctx context.Context, reqCtx *RequestContext,
117126 return ret , nil
118127}
119128
120- // generateEmptyResponseBodyResponse builds a streaming response with an empty
121- // ResponseHeaders followed by chunked body responses via AddStreamedResponseBody.
122- func (s * Server ) generateEmptyResponseBodyResponse (responseBodyBytes []byte ) []* eppb.ProcessingResponse {
123- responses := []* eppb.ProcessingResponse {
129+ // generateEmptyResponseBodyResponse returns an empty BodyResponse ack for the
130+ // final (EndOfStream) response body chunk. In STREAMED mode, Envoy has already
131+ // forwarded all chunks downstream via per-chunk acks, so re-emitting the body
132+ // would cause a content-length / transfer-encoding mismatch on the client.
133+ func (s * Server ) generateEmptyResponseBodyResponse (_ []byte ) []* eppb.ProcessingResponse {
134+ return []* eppb.ProcessingResponse {
124135 {
125- Response : & eppb.ProcessingResponse_ResponseHeaders {
126- ResponseHeaders : & eppb.HeadersResponse {},
136+ Response : & eppb.ProcessingResponse_ResponseBody {
137+ ResponseBody : & eppb.BodyResponse {},
127138 },
128139 },
129140 }
130- responses = envoy .AddStreamedResponseBody (responses , responseBodyBytes )
131- return responses
141+ }
142+
143+ const (
144+ sseDataPrefix = "data:"
145+ sseDoneMarker = "[DONE]"
146+ bodyFieldModel = "model"
147+ bodyFieldUsage = "usage"
148+ bodyFieldResponse = "response"
149+ )
150+
151+ // parseSSEResponseBody extracts a composite response body from an SSE (Server-Sent Events)
152+ // stream. It parses by SSE event boundaries instead of individual lines because one logical
153+ // event may legally contain multiple consecutive `data:` lines that must be joined before JSON decoding.
154+ func parseSSEResponseBody (body []byte ) (map [string ]any , error ) {
155+ result := map [string ]any {}
156+ lines := bytes .Split (body , []byte ("\n " ))
157+ eventDataLines := make ([][]byte , 0 )
158+
159+ flushEvent := func () {
160+ if len (eventDataLines ) == 0 {
161+ return
162+ }
163+
164+ data := bytes .Join (eventDataLines , []byte ("\n " ))
165+ eventDataLines = eventDataLines [:0 ]
166+
167+ data = bytes .TrimSpace (data )
168+ if len (data ) == 0 || bytes .Equal (data , []byte (sseDoneMarker )) {
169+ return
170+ }
171+
172+ var event map [string ]any
173+ if err := json .Unmarshal (data , & event ); err != nil {
174+ return
175+ }
176+
177+ if model , ok := event [bodyFieldModel ].(string ); ok && model != "" {
178+ result [bodyFieldModel ] = model
179+ }
180+
181+ usage , _ := event [bodyFieldUsage ].(map [string ]any )
182+ if usage == nil {
183+ if resp , ok := event [bodyFieldResponse ].(map [string ]any ); ok {
184+ usage , _ = resp [bodyFieldUsage ].(map [string ]any )
185+ if m , ok := resp [bodyFieldModel ].(string ); ok && m != "" {
186+ result [bodyFieldModel ] = m
187+ }
188+ }
189+ }
190+ if usage != nil {
191+ existing , _ := result [bodyFieldUsage ].(map [string ]any )
192+ if existing == nil {
193+ existing = map [string ]any {}
194+ }
195+ for k , v := range usage {
196+ existing [k ] = v
197+ }
198+ result [bodyFieldUsage ] = existing
199+ }
200+ }
201+
202+ for _ , line := range lines {
203+ trimmed := bytes .TrimRight (line , "\r " )
204+ if len (trimmed ) == 0 {
205+ flushEvent ()
206+ continue
207+ }
208+ if bytes .HasPrefix (trimmed , []byte (sseDataPrefix )) {
209+ eventDataLines = append (eventDataLines , bytes .TrimSpace (trimmed [len (sseDataPrefix ):]))
210+ }
211+ }
212+ flushEvent ()
213+
214+ if len (result ) == 0 {
215+ return nil , errors .New ("no parseable SSE data events found" )
216+ }
217+
218+ return result , nil
132219}
133220
134221// HandleResponseTrailers handles response trailers.
0 commit comments