Skip to content

Commit 21c2bbe

Browse files
committed
Implement ResumableParser#parsed_bytes
This is intended to make it easier to securely parse untrusted inputs.
1 parent 97b2eee commit 21c2bbe

2 files changed

Lines changed: 86 additions & 8 deletions

File tree

ext/json/ext/parser/parser.c

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,9 @@ typedef struct JSON_ResumableParserStruct {
21622162
rvalue_stack value_stack;
21632163
json_frame_stack frames;
21642164
VALUE buffer;
2165+
size_t parsed_bytes;
2166+
size_t incomplete_bytes;
2167+
bool complete;
21652168
bool in_use;
21662169
} JSON_ResumableParser;
21672170

@@ -2282,6 +2285,18 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self)
22822285
*
22832286
* An incomplete document is buffered in full and there is no size limit, so when reading
22842287
* from an untrusted source the caller is responsible for bounding how much data is fed.
2288+
* For example:
2289+
*
2290+
* loop do
2291+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2292+
* raise "document too large"
2293+
* end
2294+
*
2295+
* parser << read_chunk
2296+
* while parser.parse
2297+
* process(parser.value)
2298+
* end
2299+
* end
22852300
*/
22862301
static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
22872302
{
@@ -2398,6 +2413,13 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock)
23982413
static VALUE cResumableParser_parse(VALUE self)
23992414
{
24002415
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
2416+
2417+
if (parser->complete) {
2418+
parser->parsed_bytes = 0;
2419+
parser->incomplete_bytes = 0;
2420+
parser->complete = false;
2421+
}
2422+
24012423
if (!parser->buffer) {
24022424
parser->in_use = false;
24032425
return Qfalse;
@@ -2427,20 +2449,28 @@ static VALUE cResumableParser_parse(VALUE self)
24272449
.config = &parser->config,
24282450
};
24292451
int status;
2430-
bool complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
2431-
parser->in_use = false;
2452+
const char *initial_cursor = parser->state.cursor;
2453+
parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
24322454
if (status) {
2433-
complete = false;
24342455
VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos);
24352456
if (error_source == self) {
2436-
complete = false; // is an EOS error raised by ourself
2457+
parser->complete = false; // is an EOS error raised by ourself
24372458
rb_set_errinfo(Qnil);
2459+
status = 0;
24382460
} else {
2439-
rb_jump_tag(status); // reraise
2461+
parser->complete = true; // a parse error is considered complete
24402462
}
24412463
}
2464+
2465+
parser->parsed_bytes += parser->state.cursor - initial_cursor;
2466+
parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
2467+
2468+
parser->in_use = false;
2469+
if (status) {
2470+
rb_jump_tag(status); // reraise
2471+
}
24422472
RB_GC_GUARD(Vsource);
2443-
return complete ? Qtrue : Qfalse;
2473+
return parser->complete ? Qtrue : Qfalse;
24442474
}
24452475

24462476
/*
@@ -2498,6 +2528,9 @@ static VALUE cResumableParser_clear(VALUE self)
24982528
{
24992529
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
25002530
parser->buffer = 0;
2531+
parser->complete = true;
2532+
parser->parsed_bytes = 0;
2533+
parser->incomplete_bytes = 0;
25012534
parser->frames.head = 0;
25022535
parser->value_stack.head = 0;
25032536
parser->state.name_cache.length = 0;
@@ -2633,6 +2666,29 @@ static VALUE cResumableParser_eos_p(VALUE self)
26332666
return eos(&parser->state) ? Qtrue : Qfalse;
26342667
}
26352668

2669+
/*
2670+
* call-seq: parsed_bytes -> integer
2671+
*
2672+
* Returns the number of bytes parsed since the start of the current partial value.
2673+
* This is intended to be used for securing against untrusted input:
2674+
*
2675+
* loop do
2676+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2677+
* raise "document too large"
2678+
* end
2679+
*
2680+
* parser << read_chunk
2681+
* while parser.parse
2682+
* process(parser.value)
2683+
* end
2684+
* end
2685+
*/
2686+
static VALUE cResumableParser_parsed_bytes(VALUE self)
2687+
{
2688+
JSON_ResumableParser *parser = cResumableParser_get(self);
2689+
return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
2690+
}
2691+
26362692
void Init_parser(void)
26372693
{
26382694
#ifdef HAVE_RB_EXT_RACTOR_SAFE
@@ -2669,6 +2725,7 @@ void Init_parser(void)
26692725
rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0);
26702726
rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0);
26712727
rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0);
2728+
rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0);
26722729

26732730
rb_global_variable(&CNaN);
26742731
CNaN = rb_const_get(mJSON, rb_intern("NaN"));

test/json/resumable_parser_test.rb

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def test_parse_byte_by_byte_string
156156
end
157157

158158
def test_parse_byte_by_byte_numbers
159-
assert_resumed_parsing('123 ')
159+
assert_resumed_parsing('123 ', trailing_bytes: 1)
160160
end
161161

162162
def test_nul_byte_is_a_syntax_error
@@ -364,6 +364,26 @@ def test_buffer_shrink
364364
parser.value
365365
end
366366

367+
def test_parsed_bytes
368+
chunk = '[1, 2, 3, 4, tru'
369+
@parser << chunk
370+
refute @parser.parse
371+
assert_equal chunk.bytesize, @parser.parsed_bytes
372+
373+
@parser << 'e][]'
374+
assert @parser.parse
375+
assert_equal chunk.bytesize + 2, @parser.parsed_bytes
376+
377+
assert @parser.parse
378+
assert_equal 2, @parser.parsed_bytes
379+
380+
@parser << chunk
381+
refute @parser.parse
382+
assert_equal chunk.bytesize, @parser.parsed_bytes
383+
@parser.clear
384+
assert_equal 0, @parser.parsed_bytes
385+
end
386+
367387
private
368388

369389
def assert_parse_error(json)
@@ -389,7 +409,7 @@ def assert_partial_value(expected, json)
389409
end
390410
end
391411

392-
def assert_resumed_parsing(json, parser = @parser)
412+
def assert_resumed_parsing(json, parser = @parser, trailing_bytes: 0)
393413
expected = JSON.parse(json)
394414

395415
last_parsed_byte_index = 0
@@ -402,6 +422,7 @@ def assert_resumed_parsing(json, parser = @parser)
402422
assert_equal expected, actual
403423
remaining_bytes = (json.bytesize - last_parsed_byte_index)
404424
assert_equal 0, remaining_bytes, "unconsumed bytes: #{actual.inspect}, remaining: #{json.byteslice(-1, remaining_bytes).inspect}"
425+
assert_equal json.bytesize - trailing_bytes, parser.parsed_bytes
405426
end
406427

407428
def assert_parse_stream(expected, json, parser = @parser)

0 commit comments

Comments
 (0)