Skip to content

Commit 09b9600

Browse files
authored
Fix Content-Length and Transfer-Encoding problem (#2518)
1 parent a59056e commit 09b9600

4 files changed

Lines changed: 293 additions & 24 deletions

File tree

src/brpc/details/http_message.cpp

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
namespace brpc {
3535

36+
DEFINE_bool(allow_chunked_length, false,
37+
"Allow both Transfer-Encoding and Content-Length headers are present.");
3638
DEFINE_bool(http_verbose, false,
3739
"[DEBUG] Print EVERY http request/response");
3840
DEFINE_int32(http_verbose_max_body_length, 512,
@@ -172,6 +174,28 @@ int HttpMessage::on_headers_complete(http_parser *parser) {
172174
}
173175
}
174176

177+
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
178+
// If a message is received with both a Transfer-Encoding and a
179+
// Content-Length header field, the Transfer-Encoding overrides the
180+
// Content-Length. Such a message might indicate an attempt to
181+
// perform request smuggling (Section 9.5) or response splitting
182+
// (Section 9.4) and ought to be handled as an error. A sender MUST
183+
// remove the received Content-Length field prior to forwarding such
184+
// a message.
185+
186+
// Reject message if both Transfer-Encoding and Content-Length headers
187+
// are present or if allowed by gflag and 'Transfer-Encoding'
188+
// is chunked - remove Content-Length and serve request.
189+
if (parser->uses_transfer_encoding && parser->flags & F_CONTENTLENGTH) {
190+
if (parser->flags & F_CHUNKED && FLAGS_allow_chunked_length) {
191+
http_message->header().RemoveHeader("Content-Length");
192+
} else {
193+
LOG(ERROR) << "HTTP/1.1 protocol error: both Content-Length "
194+
<< "and Transfer-Encoding are set.";
195+
return -1;
196+
}
197+
}
198+
175199
// If server receives a response to a HEAD request, returns 1 and then
176200
// the parser will interpret that as saying that this message has no body.
177201
if (parser->type == HTTP_RESPONSE &&
@@ -401,6 +425,7 @@ HttpMessage::HttpMessage(bool read_body_progressively,
401425
, _cur_value(NULL)
402426
, _vbodylen(0) {
403427
http_parser_init(&_parser, HTTP_BOTH);
428+
_parser.allow_chunked_length = 1;
404429
_parser.data = this;
405430
}
406431

@@ -489,6 +514,9 @@ static void DescribeHttpParserFlags(std::ostream& os, unsigned int flags) {
489514
if (flags & F_SKIPBODY) {
490515
os << "F_SKIPBODY|";
491516
}
517+
if (flags & F_CONTENTLENGTH) {
518+
os << "F_CONTENTLENGTH|";
519+
}
492520
}
493521

494522
std::ostream& operator<<(std::ostream& os, const http_parser& parser) {
@@ -548,7 +576,13 @@ void MakeRawHttpRequest(butil::IOBuf* request,
548576
<< h->minor_version() << BRPC_CRLF;
549577
// Never use "Content-Length" set by user.
550578
h->RemoveHeader("Content-Length");
551-
if (h->method() != HTTP_METHOD_GET) {
579+
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
580+
if (h->method() == HTTP_METHOD_GET) {
581+
h->RemoveHeader("Transfer-Encoding");
582+
} else if (!transfer_encoding) {
583+
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
584+
// A sender MUST NOT send a Content-Length header field in any message
585+
// that contains a Transfer-Encoding header field.
552586
os << "Content-Length: " << (content ? content->length() : 0)
553587
<< BRPC_CRLF;
554588
}
@@ -638,25 +672,36 @@ void MakeRawHttpResponse(butil::IOBuf* response,
638672
// A server MUST NOT send a Content-Length header field in any response
639673
// with a status code of 1xx (Informational) or 204 (No Content).
640674
h->RemoveHeader("Content-Length");
641-
} else if (content) {
642-
const std::string* content_length = h->GetHeader("Content-Length");
643-
if (is_head_req) {
644-
// Prioritize "Content-Length" set by user.
645-
// If "Content-Length" is not set, set it to the length of content.
646-
if (!content_length) {
647-
os << "Content-Length: " << content->length() << BRPC_CRLF;
648-
}
649-
} else {
650-
if (content_length) {
651-
h->RemoveHeader("Content-Length");
675+
} else {
676+
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
677+
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
678+
// A sender MUST NOT send a Content-Length header field in any message
679+
// that contains a Transfer-Encoding header field.
680+
if (transfer_encoding) {
681+
h->RemoveHeader("Content-Length");
682+
}
683+
if (content) {
684+
const std::string* content_length = h->GetHeader("Content-Length");
685+
if (is_head_req) {
686+
if (!content_length && !transfer_encoding) {
687+
// Prioritize "Content-Length" set by user.
688+
// If "Content-Length" is not set, set it to the length of content.
689+
os << "Content-Length: " << content->length() << BRPC_CRLF;
690+
}
691+
} else {
692+
if (!transfer_encoding) {
693+
if (content_length) {
694+
h->RemoveHeader("Content-Length");
695+
}
696+
// Never use "Content-Length" set by user.
697+
// Always set Content-Length since lighttpd requires the header to be
698+
// set to 0 for empty content.
699+
os << "Content-Length: " << content->length() << BRPC_CRLF;
700+
}
652701
}
653-
// Never use "Content-Length" set by user.
654-
// Always set Content-Length since lighttpd requires the header to be
655-
// set to 0 for empty content.
656-
os << "Content-Length: " << content->length() << BRPC_CRLF;
657702
}
658703
}
659-
if (!h->content_type().empty()) {
704+
if (!is_invalid_content && !h->content_type().empty()) {
660705
os << "Content-Type: " << h->content_type()
661706
<< BRPC_CRLF;
662707
}

src/brpc/details/http_parser.cpp

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,10 @@ enum header_states
410410
, h_transfer_encoding
411411
, h_upgrade
412412

413+
, h_matching_transfer_encoding_token_start
413414
, h_matching_transfer_encoding_chunked
415+
, h_matching_transfer_encoding_token
416+
414417
, h_matching_connection_keep_alive
415418
, h_matching_connection_close
416419

@@ -499,6 +502,12 @@ bool is_url_char(char c) { return IS_URL_CHAR(c); }
499502

500503
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
501504

505+
/**
506+
* Verify that a char is a valid visible (printable) US-ASCII
507+
* character or %x80-FF
508+
**/
509+
#define IS_HEADER_CHAR(ch) \
510+
(ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
502511

503512
#if BRPC_HTTP_PARSER_STRICT
504513
# define STRICT_CHECK(cond) \
@@ -691,6 +700,8 @@ size_t http_parser_execute (http_parser *parser,
691700
const char *url_mark = 0;
692701
const char *body_mark = 0;
693702
const char *status_mark = 0;
703+
const unsigned int lenient = parser->lenient_http_headers;
704+
const unsigned int allow_chunked_length = parser->allow_chunked_length;
694705

695706
/* We're in an error state. Don't bother doing anything. */
696707
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -782,6 +793,7 @@ size_t http_parser_execute (http_parser *parser,
782793
if (ch == CR || ch == LF)
783794
break;
784795
parser->flags = 0;
796+
parser->uses_transfer_encoding = 0;
785797
parser->content_length = ULLONG_MAX;
786798

787799
if (ch == 'H') {
@@ -817,6 +829,7 @@ size_t http_parser_execute (http_parser *parser,
817829
case s_start_res:
818830
{
819831
parser->flags = 0;
832+
parser->uses_transfer_encoding = 0;
820833
parser->content_length = ULLONG_MAX;
821834

822835
switch (ch) {
@@ -1015,6 +1028,7 @@ size_t http_parser_execute (http_parser *parser,
10151028
if (ch == CR || ch == LF)
10161029
break;
10171030
parser->flags = 0;
1031+
parser->uses_transfer_encoding = 0;
10181032
parser->content_length = ULLONG_MAX;
10191033

10201034
if (!IS_ALPHA(ch)) {
@@ -1470,6 +1484,7 @@ size_t http_parser_execute (http_parser *parser,
14701484
parser->header_state = h_general;
14711485
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
14721486
parser->header_state = h_transfer_encoding;
1487+
parser->uses_transfer_encoding = 1;
14731488
}
14741489
break;
14751490

@@ -1544,16 +1559,26 @@ size_t http_parser_execute (http_parser *parser,
15441559
if ('c' == c) {
15451560
parser->header_state = h_matching_transfer_encoding_chunked;
15461561
} else {
1547-
parser->header_state = h_general;
1562+
parser->header_state = h_matching_transfer_encoding_token;
15481563
}
15491564
break;
15501565

1566+
/* Multi-value `Transfer-Encoding` header */
1567+
case h_matching_transfer_encoding_token_start:
1568+
break;
1569+
15511570
case h_content_length:
15521571
if (!IS_NUM(ch)) {
15531572
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
15541573
goto error;
15551574
}
15561575

1576+
if (parser->flags & F_CONTENTLENGTH) {
1577+
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1578+
goto error;
1579+
}
1580+
1581+
parser->flags |= F_CONTENTLENGTH;
15571582
parser->content_length = ch - '0';
15581583
break;
15591584

@@ -1591,6 +1616,11 @@ size_t http_parser_execute (http_parser *parser,
15911616
goto reexecute_byte;
15921617
}
15931618

1619+
if (!lenient && !IS_HEADER_CHAR(ch)) {
1620+
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1621+
goto error;
1622+
}
1623+
15941624
c = LOWER(ch);
15951625

15961626
switch (parser->header_state) {
@@ -1627,17 +1657,47 @@ size_t http_parser_execute (http_parser *parser,
16271657
break;
16281658
}
16291659

1660+
/* Transfer-Encoding: chunked */
1661+
case h_matching_transfer_encoding_token_start:
1662+
/* looking for 'Transfer-Encoding: chunked' */
1663+
if ('c' == c) {
1664+
parser->header_state = h_matching_transfer_encoding_chunked;
1665+
} else if (TOKEN(c)) {
1666+
/* NOTE(gejun): Not use strict mode for these macros since the additional
1667+
* characeters seem to be OK.
1668+
*/
1669+
1670+
/* TODO(indutny): similar code below does this, but why?
1671+
* At the very least it seems to be inconsistent given that
1672+
* h_matching_transfer_encoding_token does not check for
1673+
* `STRICT_TOKEN`
1674+
*/
1675+
parser->header_state = h_matching_transfer_encoding_token;
1676+
} else if (c == ' ' || c == '\t') {
1677+
/* Skip lws */
1678+
} else {
1679+
parser->header_state = h_general;
1680+
}
1681+
break;
1682+
16301683
/* Transfer-Encoding: chunked */
16311684
case h_matching_transfer_encoding_chunked:
16321685
parser->index++;
16331686
if (parser->index > sizeof(CHUNKED)-1
16341687
|| c != CHUNKED[parser->index]) {
1635-
parser->header_state = h_general;
1688+
parser->header_state = h_matching_transfer_encoding_token;
16361689
} else if (parser->index == sizeof(CHUNKED)-2) {
16371690
parser->header_state = h_transfer_encoding_chunked;
16381691
}
16391692
break;
16401693

1694+
case h_matching_transfer_encoding_token:
1695+
if (ch == ',') {
1696+
parser->header_state = h_matching_transfer_encoding_token_start;
1697+
parser->index = 0;
1698+
}
1699+
break;
1700+
16411701
/* looking for 'Connection: keep-alive' */
16421702
case h_matching_connection_keep_alive:
16431703
parser->index++;
@@ -1660,6 +1720,9 @@ size_t http_parser_execute (http_parser *parser,
16601720
break;
16611721

16621722
case h_transfer_encoding_chunked:
1723+
if (ch != ' ') parser->header_state = h_matching_transfer_encoding_token;
1724+
break;
1725+
16631726
case h_connection_keep_alive:
16641727
case h_connection_close:
16651728
if (ch != ' ') parser->header_state = h_general;
@@ -1739,6 +1802,24 @@ size_t http_parser_execute (http_parser *parser,
17391802
break;
17401803
}
17411804

1805+
/* Cannot use transfer-encoding and a content-length header together
1806+
per the HTTP specification. (RFC 7230 Section 3.3.3) */
1807+
if ((parser->uses_transfer_encoding == 1) &&
1808+
(parser->flags & F_CONTENTLENGTH)) {
1809+
/* Allow it for lenient parsing as long as `Transfer-Encoding` is
1810+
* not `chunked` or allow_length_with_encoding is set
1811+
*/
1812+
if (parser->flags & F_CHUNKED) {
1813+
if (!allow_chunked_length) {
1814+
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1815+
goto error;
1816+
}
1817+
} else if (!lenient) {
1818+
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1819+
goto error;
1820+
}
1821+
}
1822+
17421823
parser->state = s_headers_done;
17431824

17441825
/* Here we call the headers_complete callback. This is somewhat
@@ -1791,6 +1872,26 @@ size_t http_parser_execute (http_parser *parser,
17911872
} else if (parser->flags & F_CHUNKED) {
17921873
/* chunked encoding - ignore Content-Length header */
17931874
parser->state = s_chunk_size_start;
1875+
} else if (parser->uses_transfer_encoding == 1) {
1876+
if (parser->type == HTTP_REQUEST && !lenient) {
1877+
/* RFC 7230 3.3.3 */
1878+
/* If a Transfer-Encoding header field
1879+
* is present in a request and the chunked transfer coding is not
1880+
* the final encoding, the message body length cannot be determined
1881+
* reliably; the server MUST respond with the 400 (Bad Request)
1882+
* status code and then close the connection.
1883+
*/
1884+
SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1885+
return (p - data); /* Error */
1886+
} else {
1887+
/* RFC 7230 3.3.3 */
1888+
/* If a Transfer-Encoding header field is present in a response and
1889+
* the chunked transfer coding is not the final encoding, the
1890+
* message body length is determined by reading the connection until
1891+
* it is closed by the server.
1892+
*/
1893+
parser->state = s_body_identity_eof;
1894+
}
17941895
} else {
17951896
if (parser->content_length == 0) {
17961897
/* Content-Length header given but zero: Content-Length: 0\r\n */
@@ -2037,6 +2138,12 @@ http_message_needs_eof (const http_parser *parser)
20372138
return 0;
20382139
}
20392140

2141+
/* RFC 7230 3.3.3, see `s_headers_almost_done` */
2142+
if ((parser->uses_transfer_encoding == 1) &&
2143+
(parser->flags & F_CHUNKED) == 0) {
2144+
return 1;
2145+
}
2146+
20402147
if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
20412148
return 0;
20422149
}

src/brpc/details/http_parser.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ enum http_parser_flags
138138
, F_TRAILING = 1 << 3
139139
, F_UPGRADE = 1 << 4
140140
, F_SKIPBODY = 1 << 5
141+
, F_CONTENTLENGTH = 1 << 6
141142
};
142143

143144

@@ -178,13 +179,17 @@ enum http_parser_flags
178179
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
179180
XX(INVALID_CONTENT_LENGTH, \
180181
"invalid character in content-length header") \
182+
XX(UNEXPECTED_CONTENT_LENGTH, \
183+
"unexpected content-length header") \
181184
XX(INVALID_CHUNK_SIZE, \
182185
"invalid character in chunk size header") \
183186
XX(INVALID_CONSTANT, "invalid constant string") \
184187
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
185188
XX(STRICT, "strict mode assertion failed") \
186189
XX(PAUSED, "parser is paused") \
187-
XX(UNKNOWN, "an unknown error occurred")
190+
XX(UNKNOWN, "an unknown error occurred") \
191+
XX(INVALID_TRANSFER_ENCODING, \
192+
"request has invalid transfer-encoding")
188193

189194

190195
/* Define HPE_* values for each errno value above */
@@ -202,10 +207,15 @@ enum http_errno {
202207
struct http_parser {
203208
/** PRIVATE **/
204209
unsigned int type : 2; /* enum http_parser_type */
205-
unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */
206-
unsigned int state : 8; /* enum state from http_parser.c */
207-
unsigned int header_state : 8; /* enum header_state from http_parser.c */
208-
unsigned int index : 8; /* index into current matcher */
210+
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
211+
unsigned int state : 7; /* enum state from http_parser.c */
212+
unsigned int header_state : 7; /* enum header_state from http_parser.c */
213+
unsigned int index : 5; /* index into current matcher */
214+
unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */
215+
unsigned int allow_chunked_length : 1; /* Allow headers with both
216+
* `Content-Length` and
217+
* `Transfer-Encoding: chunked` set */
218+
unsigned int lenient_http_headers : 1;
209219

210220
uint32_t nread; /* # bytes read in various scenarios */
211221
uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one)

0 commit comments

Comments
 (0)