@@ -2162,6 +2162,9 @@ typedef struct JSON_ResumableParserStruct {
21622162 rvalue_stack value_stack ;
21632163 json_frame_stack frames ;
21642164 VALUE buffer ;
2165+ size_t parsed_bytes ;
2166+ size_t incomplete_bytes ;
2167+ bool complete ;
21652168 bool in_use ;
21662169} JSON_ResumableParser ;
21672170
@@ -2282,6 +2285,18 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self)
22822285 *
22832286 * An incomplete document is buffered in full and there is no size limit, so when reading
22842287 * from an untrusted source the caller is responsible for bounding how much data is fed.
2288+ * For example:
2289+ *
2290+ * loop do
2291+ * if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2292+ * raise "document too large"
2293+ * end
2294+ *
2295+ * parser << read_chunk
2296+ * while parser.parse
2297+ * process(parser.value)
2298+ * end
2299+ * end
22852300 */
22862301static VALUE cResumableParser_initialize (int argc , VALUE * argv , VALUE self )
22872302{
@@ -2398,6 +2413,13 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock)
23982413static VALUE cResumableParser_parse (VALUE self )
23992414{
24002415 JSON_ResumableParser * parser = ResumableParser_acquire (self , true);
2416+
2417+ if (parser -> complete ) {
2418+ parser -> parsed_bytes = 0 ;
2419+ parser -> incomplete_bytes = 0 ;
2420+ parser -> complete = false;
2421+ }
2422+
24012423 if (!parser -> buffer ) {
24022424 parser -> in_use = false;
24032425 return Qfalse ;
@@ -2427,20 +2449,28 @@ static VALUE cResumableParser_parse(VALUE self)
24272449 .config = & parser -> config ,
24282450 };
24292451 int status ;
2430- bool complete = rb_protect ( json_parse_any_resumable_safe , ( VALUE ) & args , & status ) ;
2431- parser -> in_use = false ;
2452+ const char * initial_cursor = parser -> state . cursor ;
2453+ parser -> complete = rb_protect ( json_parse_any_resumable_safe , ( VALUE ) & args , & status ) ;
24322454 if (status ) {
2433- complete = false;
24342455 VALUE error_source = rb_ivar_get (rb_errinfo (), i_at_eos );
24352456 if (error_source == self ) {
2436- complete = false; // is an EOS error raised by ourself
2457+ parser -> complete = false; // is an EOS error raised by ourself
24372458 rb_set_errinfo (Qnil );
2459+ status = 0 ;
24382460 } else {
2439- rb_jump_tag ( status ) ; // reraise
2461+ parser -> complete = true ; // a parse error is considered complete
24402462 }
24412463 }
2464+
2465+ parser -> parsed_bytes += parser -> state .cursor - initial_cursor ;
2466+ parser -> incomplete_bytes = parser -> complete ? 0 : parser -> state .end - parser -> state .cursor ;
2467+
2468+ parser -> in_use = false;
2469+ if (status ) {
2470+ rb_jump_tag (status ); // reraise
2471+ }
24422472 RB_GC_GUARD (Vsource );
2443- return complete ? Qtrue : Qfalse ;
2473+ return parser -> complete ? Qtrue : Qfalse ;
24442474}
24452475
24462476/*
@@ -2498,6 +2528,9 @@ static VALUE cResumableParser_clear(VALUE self)
24982528{
24992529 JSON_ResumableParser * parser = ResumableParser_acquire (self , false);
25002530 parser -> buffer = 0 ;
2531+ parser -> complete = true;
2532+ parser -> parsed_bytes = 0 ;
2533+ parser -> incomplete_bytes = 0 ;
25012534 parser -> frames .head = 0 ;
25022535 parser -> value_stack .head = 0 ;
25032536 parser -> state .name_cache .length = 0 ;
@@ -2633,6 +2666,29 @@ static VALUE cResumableParser_eos_p(VALUE self)
26332666 return eos (& parser -> state ) ? Qtrue : Qfalse ;
26342667}
26352668
2669+ /*
2670+ * call-seq: parsed_bytes -> integer
2671+ *
2672+ * Returns the number of bytes parsed since the start of the current partial value.
2673+ * This is intended to be used for securing against untrusted input:
2674+ *
2675+ * loop do
2676+ * if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2677+ * raise "document too large"
2678+ * end
2679+ *
2680+ * parser << read_chunk
2681+ * while parser.parse
2682+ * process(parser.value)
2683+ * end
2684+ * end
2685+ */
2686+ static VALUE cResumableParser_parsed_bytes (VALUE self )
2687+ {
2688+ JSON_ResumableParser * parser = cResumableParser_get (self );
2689+ return ULL2NUM (parser -> parsed_bytes + parser -> incomplete_bytes );
2690+ }
2691+
26362692void Init_parser (void )
26372693{
26382694#ifdef HAVE_RB_EXT_RACTOR_SAFE
@@ -2669,6 +2725,7 @@ void Init_parser(void)
26692725 rb_define_method (cResumableParser , "clear" , cResumableParser_clear , 0 );
26702726 rb_define_method (cResumableParser , "rest" , cResumableParser_rest , 0 );
26712727 rb_define_method (cResumableParser , "eos?" , cResumableParser_eos_p , 0 );
2728+ rb_define_method (cResumableParser , "parsed_bytes" , cResumableParser_parsed_bytes , 0 );
26722729
26732730 rb_global_variable (& CNaN );
26742731 CNaN = rb_const_get (mJSON , rb_intern ("NaN" ));
0 commit comments