Skip to content

Commit b30a8f8

Browse files
committed
ResumableParser: eagerly drop the buffer when reaching EOS
No point to wait for the next call to `<<` to remove the reference.
1 parent f08c663 commit b30a8f8

2 files changed

Lines changed: 37 additions & 13 deletions

File tree

ext/json/ext/parser/parser.c

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include "../simd/simd.h"
44

55
static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
6-
static VALUE CNaN, CInfinity, CMinusInfinity;
6+
static VALUE CNaN, CInfinity, CMinusInfinity, JSON_empty_string;
77

88
static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;
99

@@ -843,7 +843,7 @@ json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
843843
}
844844

845845
ALWAYS_INLINE(static) void
846-
json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config)
846+
json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config, bool include_comments)
847847
{
848848
while (true) {
849849
switch (peek(state)) {
@@ -874,6 +874,10 @@ json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config)
874874
state->cursor++;
875875
break;
876876
case '/':
877+
if (!include_comments) {
878+
return;
879+
}
880+
877881
json_eat_comments(state, config);
878882
break;
879883

@@ -1587,7 +1591,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
15871591
JSON_UNREACHABLE_RETURN(false);
15881592

15891593
JSON_PHASE_VALUE: {
1590-
json_eat_whitespace(state, config);
1594+
json_eat_whitespace(state, config, true);
15911595

15921596
VALUE value;
15931597
const char *value_start = state->cursor;
@@ -1684,7 +1688,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
16841688

16851689
case '[': {
16861690
state->cursor++;
1687-
json_eat_whitespace(state, config);
1691+
json_eat_whitespace(state, config, true);
16881692

16891693
const char next = peek(state);
16901694
if (next == ']') {
@@ -1713,7 +1717,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
17131717

17141718
case '{': {
17151719
state->cursor++;
1716-
json_eat_whitespace(state, config);
1720+
json_eat_whitespace(state, config, true);
17171721

17181722
if (peek(state) == '}') {
17191723
state->cursor++;
@@ -1769,7 +1773,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
17691773
JSON_PHASE_OBJECT_KEY: {
17701774
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
17711775

1772-
json_eat_whitespace(state, config);
1776+
json_eat_whitespace(state, config, true);
17731777

17741778
const char *start = state->cursor;
17751779

@@ -1804,7 +1808,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
18041808
JSON_PHASE_OBJECT_COLON: {
18051809
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
18061810

1807-
json_eat_whitespace(state, config);
1811+
json_eat_whitespace(state, config, true);
18081812

18091813
if (RB_LIKELY(peek(state) == ':')) {
18101814
state->cursor++;
@@ -1827,14 +1831,14 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
18271831
JSON_PHASE_ARRAY_COMMA: {
18281832
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
18291833

1830-
json_eat_whitespace(state, config);
1834+
json_eat_whitespace(state, config, true);
18311835

18321836
const char next_char = peek(state);
18331837

18341838
if (RB_LIKELY(next_char == ',')) {
18351839
state->cursor++;
18361840
if (config->allow_trailing_comma) {
1837-
json_eat_whitespace(state, config);
1841+
json_eat_whitespace(state, config, true);
18381842
if (peek(state) == ']') {
18391843
// Trailing comma: stay in COMMA to close on the next iteration.
18401844
goto JSON_PHASE_ARRAY_COMMA;
@@ -1873,12 +1877,12 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
18731877
JSON_PHASE_OBJECT_COMMA: {
18741878
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
18751879

1876-
json_eat_whitespace(state, config);
1880+
json_eat_whitespace(state, config, true);
18771881
const char next_char = peek(state);
18781882

18791883
if (RB_LIKELY(next_char == ',')) {
18801884
state->cursor++;
1881-
json_eat_whitespace(state, config);
1885+
json_eat_whitespace(state, config, true);
18821886

18831887
if (config->allow_trailing_comma) {
18841888
if (peek(state) == '}') {
@@ -1926,7 +1930,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
19261930

19271931
static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
19281932
{
1929-
json_eat_whitespace(state, config);
1933+
json_eat_whitespace(state, config, true);
19301934
if (!eos(state)) {
19311935
raise_syntax_error("unexpected token at end of stream %s", state);
19321936
}
@@ -2216,6 +2220,14 @@ static VALUE cJSON_parser_s_allocate(VALUE klass)
22162220
return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
22172221
}
22182222

2223+
static void json_str_clear(VALUE str)
2224+
{
2225+
if (RB_OBJ_FROZEN_RAW(str)) {
2226+
return;
2227+
}
2228+
rb_str_replace(str, JSON_empty_string);
2229+
}
2230+
22192231
typedef struct JSON_ResumableParserStruct {
22202232
JSON_ParserConfig config;
22212233
JSON_ParserState state;
@@ -2394,6 +2406,9 @@ static VALUE cResumableParser_feed(VALUE self, VALUE str)
23942406
const size_t remaining = parser->state.end - parser->state.cursor;
23952407

23962408
if (!remaining) {
2409+
if (parser->buffer) {
2410+
json_str_clear(parser->buffer);
2411+
}
23972412
parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
23982413
offset = 0;
23992414
} else {
@@ -2529,6 +2544,12 @@ static VALUE cResumableParser_parse(VALUE self)
25292544

25302545
parser->parsed_bytes += parser->state.cursor - initial_cursor;
25312546
parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
2547+
2548+
json_eat_whitespace(&parser->state, &parser->config, false);
2549+
if (eos(&parser->state)) {
2550+
json_str_clear(parser->buffer);
2551+
parser->buffer = Qfalse;
2552+
}
25322553
parser->in_use = false;
25332554

25342555
if (status) {
@@ -2804,6 +2825,9 @@ void Init_parser(void)
28042825
rb_global_variable(&Encoding_UTF_8);
28052826
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
28062827

2828+
rb_global_variable(&JSON_empty_string);
2829+
JSON_empty_string = rb_obj_hide(rb_utf8_str_new("", 0));
2830+
28072831
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
28082832
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
28092833
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));

test/json/resumable_parser_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_eos
223223
@parser << ' '
224224
assert @parser.parse
225225
assert_equal 123, @parser.value
226-
refute_predicate @parser, :eos?
226+
assert_predicate @parser, :eos?
227227

228228
refute @parser.parse
229229
assert_predicate @parser, :eos?

0 commit comments

Comments
 (0)