From 81707e27b3c269b9deab735cfeb9a17a8cec6807 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Thu, 23 Oct 2025 13:41:46 +0200 Subject: [PATCH 01/18] C: Use Arena for Lexing and Parsing --- ext/herb/extension.c | 77 ++++- javascript/packages/node/extension/herb.cpp | 104 ++++++- src/analyze.c | 304 ++++++++++---------- src/extract.c | 44 ++- src/herb.c | 68 ++++- src/include/analyze.h | 1 + src/include/herb.h | 13 +- src/include/lexer.h | 3 +- src/include/lexer_struct.h | 3 + src/include/parser.h | 2 + src/include/parser_helpers.h | 1 + src/include/token.h | 8 +- src/include/token_struct.h | 3 + src/include/util/hb_arena.h | 2 +- src/lexer.c | 4 +- src/lexer_peek_helpers.c | 3 - src/main.c | 53 +++- src/parser.c | 103 ++++--- src/parser_helpers.c | 32 ++- src/prism_helpers.c | 3 +- src/token.c | 54 +++- src/util/hb_arena.c | 2 +- templates/src/ast_nodes.c.erb | 106 +++++-- templates/src/errors.c.erb | 69 ++++- templates/src/include/ast_nodes.h.erb | 6 +- templates/src/include/errors.h.erb | 8 +- wasm/herb-wasm.cpp | 44 ++- 27 files changed, 830 insertions(+), 290 deletions(-) diff --git a/ext/herb/extension.c b/ext/herb/extension.c index d04974370..baa62ce06 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -5,6 +5,10 @@ #include "extension_helpers.h" #include "nodes.h" +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" + + VALUE mHerb; VALUE cPosition; VALUE cLocation; @@ -17,23 +21,52 @@ VALUE cParseResult; static VALUE Herb_lex(VALUE self, VALUE source) { char* string = (char*) check_string(source); - hb_array_T* tokens = herb_lex(string); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + herb_lex_result_T* lex_result = herb_lex(string, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return Qnil; + } - VALUE result = create_lex_result(tokens, source); + VALUE result = create_lex_result(lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } static VALUE Herb_lex_file(VALUE self, VALUE path) { char* file_path = (char*) check_string(path); - hb_array_T* tokens = herb_lex_file(file_path); + + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return Qnil; + } VALUE source_value = read_file_to_ruby_string(file_path); - VALUE result = create_lex_result(tokens, source_value); + VALUE result = create_lex_result(lex_result->tokens, source_value); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } @@ -60,7 +93,21 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return Qnil; + } VALUE result = create_parse_result(root, source); @@ -94,7 +141,21 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return Qnil; + } VALUE result = create_parse_result(root, source_value); diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index 9ecd66ee2..49ecdf8b6 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -3,8 +3,10 @@ extern "C" { #include "../extension/libherb/include/extract.h" #include "../extension/libherb/include/herb.h" #include "../extension/libherb/include/location.h" +#include "../extension/libherb/include/macros.h" #include "../extension/libherb/include/range.h" #include "../extension/libherb/include/token.h" +#include "../extension/libherb/include/util/hb_arena.h" #include "../extension/libherb/include/util/hb_array.h" #include "../extension/libherb/include/util/hb_buffer.h" } @@ -31,10 +33,31 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { char* string = CheckString(env, args[0]); if (!string) { return nullptr; } - hb_array_T* tokens = herb_lex(string); - napi_value result = CreateLexResult(env, tokens, args[0]); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - herb_free_tokens(&tokens); + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex(string, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + free(string); + return nullptr; + } + + napi_value result = CreateLexResult(env, lex_result->tokens, args[0]); + + herb_free_lex_result(&lex_result); free(string); return result; @@ -53,11 +76,32 @@ napi_value Herb_lex_file(napi_env env, napi_callback_info info) { char* file_path = CheckString(env, args[0]); if (!file_path) { return nullptr; } - hb_array_T* tokens = herb_lex_file(file_path); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + free(file_path); + return nullptr; + } + napi_value source_value = ReadFileToString(env, file_path); - napi_value result = CreateLexResult(env, tokens, source_value); + napi_value result = CreateLexResult(env, lex_result->tokens, source_value); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); free(file_path); return result; @@ -124,7 +168,27 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + free(string); + return nullptr; + } napi_value result = CreateParseResult(env, root, args[0]); ast_node_free((AST_NODE_T *) root); @@ -154,7 +218,31 @@ napi_value Herb_parse_file(napi_env env, napi_callback_info info) { return nullptr; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + free(file_path); + free(string); + return nullptr; + } + napi_value result = CreateParseResult(env, root, source_value); ast_node_free((AST_NODE_T *) root); diff --git a/src/analyze.c b/src/analyze.c index ed40bb0b0..29e98f1de 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -14,6 +14,7 @@ #include "include/prism_helpers.h" #include "include/token_struct.h" #include "include/util.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/util/hb_string.h" @@ -344,11 +345,17 @@ static AST_NODE_T* create_control_node( hb_array_T* children, AST_NODE_T* subsequent, AST_ERB_END_NODE_T* end_node, - control_type_t control_type + control_type_t control_type, + hb_arena_T* arena ) { hb_array_T* errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + position_T start_position = erb_node->tag_opening->location.start; position_T end_position = erb_content_end_position(erb_node); @@ -406,14 +413,22 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ELSE: { - return ( - AST_NODE_T* - ) ast_erb_else_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_else_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_CASE: @@ -452,7 +467,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } else { hb_array_free(&in_conditions); @@ -467,7 +483,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } } @@ -481,7 +498,8 @@ static AST_NODE_T* create_control_node( children, start_position, end_position, - errors + errors, + arena ); } @@ -494,7 +512,8 @@ static AST_NODE_T* create_control_node( children, start_position, end_position, - errors + errors, + arena ); } @@ -524,7 +543,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -541,14 +561,22 @@ static AST_NODE_T* create_control_node( rescue_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ENSURE: { - return ( - AST_NODE_T* - ) ast_erb_ensure_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_ensure_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_UNLESS: { @@ -566,7 +594,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -579,7 +608,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -592,7 +622,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -605,7 +636,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -618,14 +650,15 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_YIELD: { return ( AST_NODE_T* - ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors); + ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors, arena); } default: return NULL; @@ -715,11 +748,10 @@ static size_t process_control_structure( when_statements, erb_content->tag_opening->location.start, when_end_position, - when_errors + when_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(when_conditions, (AST_NODE_T*) when_node); continue; @@ -758,11 +790,10 @@ static size_t process_control_structure( in_statements, erb_content->tag_opening->location.start, in_end_position, - in_errors + in_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(in_conditions, (AST_NODE_T*) in_node); continue; @@ -800,10 +831,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -826,11 +856,10 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, erb_content_end_position(end_erb), - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -855,6 +884,11 @@ static size_t process_control_structure( hb_array_T* case_match_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_MATCH_NODE_T* case_match_node = ast_erb_case_match_node_init( erb_node->tag_opening, erb_node->content, @@ -865,11 +899,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_match_errors + case_match_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_match_node); hb_array_free(&when_conditions); hb_array_free(&children); @@ -880,6 +913,11 @@ static size_t process_control_structure( hb_array_T* case_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_NODE_T* case_node = ast_erb_case_node_init( erb_node->tag_opening, erb_node->content, @@ -890,11 +928,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_errors + case_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_node); hb_array_free(&in_conditions); hb_array_free(&children); @@ -948,10 +985,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -994,10 +1030,9 @@ static size_t process_control_structure( ensure_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - ensure_errors + ensure_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -1020,11 +1055,10 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, erb_content_end_position(end_erb), - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -1046,6 +1080,11 @@ static size_t process_control_structure( hb_array_T* begin_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BEGIN_NODE_T* begin_node = ast_erb_begin_node_init( erb_node->tag_opening, erb_node->content, @@ -1057,12 +1096,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - begin_errors + begin_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) begin_node); + return index; } @@ -1090,11 +1129,10 @@ static size_t process_control_structure( close_erb->tag_closing, close_erb->tag_opening->location.start, close_end_pos, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) close_erb); - index++; } } @@ -1113,6 +1151,11 @@ static size_t process_control_structure( hb_array_T* block_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BLOCK_NODE_T* block_node = ast_erb_block_node_init( erb_node->tag_opening, erb_node->content, @@ -1121,12 +1164,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - block_errors + block_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) block_node); + return index; } @@ -1166,20 +1209,19 @@ static size_t process_control_structure( end_erb->tag_closing, end_erb->tag_opening->location.start, end_erb_final_pos, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } } - AST_NODE_T* control_node = create_control_node(erb_node, children, subsequent, end_node, initial_type); + AST_NODE_T* control_node = + create_control_node(erb_node, children, subsequent, end_node, initial_type, context->arena); if (control_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(output_array, control_node); } else { hb_array_free(&children); @@ -1204,10 +1246,10 @@ static size_t process_subsequent_block( index = process_block_children(node, array, index, children, context, parent_type); - AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type); + AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type, context->arena); if (subsequent_node) { - ast_node_free((AST_NODE_T*) erb_node); + // no-op } else { hb_array_free(&children); } @@ -1339,10 +1381,9 @@ hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analyze_ruby continue; case CONTROL_TYPE_YIELD: { - AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type); + AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type, context->arena); if (yield_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(new_array, yield_node); } else { hb_array_append(new_array, item); @@ -1362,6 +1403,21 @@ hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analyze_ruby return new_array; } +static void free_analyzed_ruby_from_array(hb_array_T* array) { + if (!array) { return; } + + for (size_t i = 0; i < hb_array_size(array); i++) { + AST_NODE_T* node = hb_array_get(array, i); + if (node && node->type == AST_ERB_CONTENT_NODE) { + AST_ERB_CONTENT_NODE_T* erb_content = (AST_ERB_CONTENT_NODE_T*) node; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + } + } +} + static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { invalid_erb_context_T* context = (invalid_erb_context_T*) data; @@ -1438,102 +1494,51 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { append_erb_control_flow_scope_error(keyword, node->location.start, node->location.end, node->errors); } } - } - - if (node->type == AST_ERB_IF_NODE) { - const AST_ERB_IF_NODE_T* if_node = (const AST_ERB_IF_NODE_T*) node; - - if (if_node->end_node == NULL) { check_erb_node_for_missing_end(node); } - - if (if_node->statements != NULL) { - for (size_t i = 0; i < hb_array_size(if_node->statements); i++) { - AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(if_node->statements, i); - - if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } - } - } - - AST_NODE_T* subsequent = if_node->subsequent; - - while (subsequent != NULL) { - if (subsequent->type == AST_ERB_CONTENT_NODE) { - const AST_ERB_CONTENT_NODE_T* content_node = (const AST_ERB_CONTENT_NODE_T*) subsequent; - - if (content_node->parsed && !content_node->valid && content_node->analyzed_ruby != NULL) { - analyzed_ruby_T* analyzed = content_node->analyzed_ruby; - const char* keyword = erb_keyword_from_analyzed_ruby(analyzed); - - if (!token_value_empty(content_node->tag_closing)) { - append_erb_control_flow_scope_error( - keyword, - subsequent->location.start, - subsequent->location.end, - subsequent->errors - ); - } - } - } - - if (subsequent->type == AST_ERB_IF_NODE) { - const AST_ERB_IF_NODE_T* elsif_node = (const AST_ERB_IF_NODE_T*) subsequent; - - if (elsif_node->statements != NULL) { - for (size_t i = 0; i < hb_array_size(elsif_node->statements); i++) { - AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(elsif_node->statements, i); - - if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } - } - } - subsequent = elsif_node->subsequent; - } else if (subsequent->type == AST_ERB_ELSE_NODE) { - const AST_ERB_ELSE_NODE_T* else_node = (const AST_ERB_ELSE_NODE_T*) subsequent; + if (is_loop_node) { context->loop_depth--; } + if (is_begin_node) { context->rescue_depth--; } + } - if (else_node->statements != NULL) { - for (size_t i = 0; i < hb_array_size(else_node->statements); i++) { - AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(else_node->statements, i); + return true; +} - if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } - } - } +static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { + analyze_ruby_context_T* context = (analyze_ruby_context_T*) data; + context->parent = (AST_NODE_T*) node; - break; - } else { - break; - } - } + if (node->type == AST_DOCUMENT_NODE) { + AST_DOCUMENT_NODE_T* document_node = (AST_DOCUMENT_NODE_T*) node; + hb_array_T* old_array = document_node->children; + document_node->children = rewrite_node_array((AST_NODE_T*) node, document_node->children, context); + free_analyzed_ruby_from_array(old_array); + hb_array_free(&old_array); } - if (node->type == AST_ERB_UNLESS_NODE || node->type == AST_ERB_WHILE_NODE || node->type == AST_ERB_UNTIL_NODE - || node->type == AST_ERB_FOR_NODE || node->type == AST_ERB_CASE_NODE || node->type == AST_ERB_CASE_MATCH_NODE - || node->type == AST_ERB_BEGIN_NODE || node->type == AST_ERB_BLOCK_NODE || node->type == AST_ERB_ELSE_NODE) { - herb_visit_child_nodes(node, detect_invalid_erb_structures, context); + if (node->type == AST_HTML_ELEMENT_NODE) { + AST_HTML_ELEMENT_NODE_T* element_node = (AST_HTML_ELEMENT_NODE_T*) node; + hb_array_T* old_array = element_node->body; + element_node->body = rewrite_node_array((AST_NODE_T*) node, element_node->body, context); + free_analyzed_ruby_from_array(old_array); + hb_array_free(&old_array); } - if (node->type == AST_ERB_UNLESS_NODE || node->type == AST_ERB_WHILE_NODE || node->type == AST_ERB_UNTIL_NODE - || node->type == AST_ERB_FOR_NODE || node->type == AST_ERB_CASE_NODE || node->type == AST_ERB_CASE_MATCH_NODE - || node->type == AST_ERB_BEGIN_NODE || node->type == AST_ERB_BLOCK_NODE || node->type == AST_ERB_ELSE_NODE) { - check_erb_node_for_missing_end(node); - - if (is_loop_node) { context->loop_depth--; } - if (is_begin_node) { context->rescue_depth--; } - - return false; + if (node->type == AST_HTML_OPEN_TAG_NODE) { + AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node; + hb_array_T* old_array = open_tag->children; + open_tag->children = rewrite_node_array((AST_NODE_T*) node, open_tag->children, context); + free_analyzed_ruby_from_array(old_array); + hb_array_free(&old_array); } - if (node->type == AST_ERB_IF_NODE) { - if (is_loop_node) { context->loop_depth--; } - if (is_begin_node) { context->rescue_depth--; } - - return false; + if (node->type == AST_HTML_ATTRIBUTE_VALUE_NODE) { + AST_HTML_ATTRIBUTE_VALUE_NODE_T* value_node = (AST_HTML_ATTRIBUTE_VALUE_NODE_T*) node; + hb_array_T* old_array = value_node->children; + value_node->children = rewrite_node_array((AST_NODE_T*) node, value_node->children, context); + free_analyzed_ruby_from_array(old_array); + hb_array_free(&old_array); } - bool result = true; - - if (is_loop_node) { context->loop_depth--; } - if (is_begin_node) { context->rescue_depth--; } - - return result; + return true; } void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, bool strict) { @@ -1543,6 +1548,7 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, context->document = document; context->parent = NULL; context->ruby_context_stack = hb_array_init(8); + context->arena = document->arena; herb_visit_node((AST_NODE_T*) document, transform_erb_nodes, context); herb_transform_conditional_elements(document); diff --git a/src/extract.c b/src/extract.c index 3032e0a56..b85668774 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1,6 +1,8 @@ #include "include/herb.h" #include "include/io.h" #include "include/lexer.h" +#include "include/macros.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/util/string.h" @@ -20,7 +22,24 @@ void herb_extract_ruby_to_buffer_with_options( ) { herb_extract_ruby_options_T extract_options = options ? *options : HERB_EXTRACT_RUBY_DEFAULT_OPTIONS; - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; bool skip_erb_content = false; bool is_comment_tag = false; bool is_erb_comment_tag = false; @@ -137,7 +156,7 @@ void herb_extract_ruby_to_buffer_with_options( } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { @@ -145,7 +164,24 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { } void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; for (size_t i = 0; i < hb_array_size(tokens); i++) { const token_T* token = hb_array_get(tokens, i); @@ -158,7 +194,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } char* herb_extract_ruby_with_semicolons(const char* source) { diff --git a/src/herb.c b/src/herb.c index cd07317fc..812d8d16b 100644 --- a/src/herb.c +++ b/src/herb.c @@ -2,8 +2,10 @@ #include "include/analyze.h" #include "include/io.h" #include "include/lexer.h" +#include "include/macros.h" #include "include/parser.h" #include "include/token.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/version.h" @@ -11,9 +13,11 @@ #include #include -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source) { +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena) { + if (!arena) { return NULL; } + lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); token_T* token = NULL; hb_array_T* tokens = hb_array_init(128); @@ -24,14 +28,24 @@ HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source) { hb_array_append(tokens, token); - return tokens; + herb_lex_result_T* result = malloc(sizeof(herb_lex_result_T)); + if (!result) { + hb_array_free(&tokens); + return NULL; + } + + result->tokens = tokens; + result->arena = arena; + + return result; } -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options) { +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options, hb_arena_T* arena) { if (!source) { source = ""; } + if (!arena) { return NULL; } lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); parser_T parser = { 0 }; parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; @@ -49,20 +63,34 @@ HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const return document; } -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex_file(const char* path) { +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena) { char* source = herb_read_file(path); - hb_array_T* tokens = herb_lex(source); + herb_lex_result_T* result = herb_lex(source, arena); free(source); - return tokens; + return result; } HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } - for (size_t i = 0; i < hb_array_size(tokens); i++) { - token_T* token = hb_array_get(tokens, i); + for (size_t i = 0; i < hb_array_size(result->tokens); i++) { + token_T* token = hb_array_get(result->tokens, i); hb_string_T type = token_to_string(token); hb_buffer_append_string(output, type); @@ -71,7 +99,23 @@ HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* hb_buffer_append(output, "\n"); } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); +} + +void herb_free_lex_result(herb_lex_result_T** result) { + if (!result || !*result) { return; } + + herb_lex_result_T* r = *result; + + if (r->tokens) { hb_array_free(&r->tokens); } + + if (r->arena) { + hb_arena_free(r->arena); + free(r->arena); + } + + free(r); + *result = NULL; } HERB_EXPORTED_FUNCTION void herb_free_tokens(hb_array_T** tokens) { diff --git a/src/include/analyze.h b/src/include/analyze.h index ac8d7e73b..3fc7bdf24 100644 --- a/src/include/analyze.h +++ b/src/include/analyze.h @@ -9,6 +9,7 @@ typedef struct ANALYZE_RUBY_CONTEXT_STRUCT { AST_DOCUMENT_NODE_T* document; AST_NODE_T* parent; hb_array_T* ruby_context_stack; + hb_arena_T* arena; } analyze_ruby_context_T; typedef enum { diff --git a/src/include/herb.h b/src/include/herb.h index 3e6876dd7..033be9a0d 100644 --- a/src/include/herb.h +++ b/src/include/herb.h @@ -5,6 +5,7 @@ #include "extract.h" #include "macros.h" #include "parser.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" @@ -14,16 +15,22 @@ extern "C" { #endif +typedef struct { + hb_array_T* tokens; + hb_arena_T* arena; +} herb_lex_result_T; + HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* output); -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source); -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex_file(const char* path); +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena); +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena); -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options); +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options, hb_arena_T* arena); HERB_EXPORTED_FUNCTION const char* herb_version(void); HERB_EXPORTED_FUNCTION const char* herb_prism_version(void); +void herb_free_lex_result(herb_lex_result_T** result); HERB_EXPORTED_FUNCTION void herb_free_tokens(hb_array_T** tokens); #ifdef __cplusplus diff --git a/src/include/lexer.h b/src/include/lexer.h index 142f3fb1c..adb13935e 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -3,8 +3,9 @@ #include "lexer_struct.h" #include "token_struct.h" +#include "util/hb_arena.h" -void lexer_init(lexer_T* lexer, const char* source); +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena); token_T* lexer_next_token(lexer_T* lexer); token_T* lexer_error(lexer_T* lexer, const char* message); diff --git a/src/include/lexer_struct.h b/src/include/lexer_struct.h index 94b132559..46e6a492a 100644 --- a/src/include/lexer_struct.h +++ b/src/include/lexer_struct.h @@ -1,6 +1,7 @@ #ifndef HERB_LEXER_STRUCT_H #define HERB_LEXER_STRUCT_H +#include "util/hb_arena.h" #include "util/hb_string.h" #include @@ -29,6 +30,8 @@ typedef struct LEXER_STRUCT { uint32_t stall_counter; uint32_t last_position; bool stalled; + + hb_arena_T* arena; } lexer_T; #endif diff --git a/src/include/parser.h b/src/include/parser.h index 81b34d8d2..0b835fe9d 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -3,6 +3,7 @@ #include "ast_node.h" #include "lexer.h" +#include "util/hb_arena.h" #include "util/hb_array.h" typedef enum { @@ -37,6 +38,7 @@ typedef struct PARSER_STRUCT { parser_options_T options; size_t consecutive_error_count; bool in_recovery_mode; + hb_arena_T* arena; } parser_T; size_t parser_sizeof(void); diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h index 537f7a5bc..325c7b3d0 100644 --- a/src/include/parser_helpers.h +++ b/src/include/parser_helpers.h @@ -44,6 +44,7 @@ token_T* parser_consume_if_present(parser_T* parser, token_type_T type); token_T* parser_consume_expected(parser_T* parser, token_type_T type, hb_array_T* array); AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors diff --git a/src/include/token.h b/src/include/token.h index 9680bbf6f..d55f6d968 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -4,13 +4,19 @@ #include "lexer_struct.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" #include "util/hb_string.h" token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer); hb_string_T token_to_string(const token_T* token); const char* token_type_to_string(token_type_T type); -token_T* token_copy(token_T* token); +char* token_value(const token_T* token); +int token_type(const token_T* token); + +size_t token_sizeof(void); + +token_T* token_copy(token_T* token, hb_arena_T* arena); void token_free(token_T* token); diff --git a/src/include/token_struct.h b/src/include/token_struct.h index 8ff44f04c..b6b9cd5a7 100644 --- a/src/include/token_struct.h +++ b/src/include/token_struct.h @@ -1,6 +1,8 @@ #ifndef HERB_TOKEN_STRUCT_H #define HERB_TOKEN_STRUCT_H +#include + #include "location.h" #include "range.h" @@ -54,6 +56,7 @@ typedef struct TOKEN_STRUCT { range_T range; location_T location; token_type_T type; + bool arena_allocated; } token_T; #endif diff --git a/src/include/util/hb_arena.h b/src/include/util/hb_arena.h index 907aede22..8c89be6d5 100644 --- a/src/include/util/hb_arena.h +++ b/src/include/util/hb_arena.h @@ -25,7 +25,7 @@ void* hb_arena_alloc(hb_arena_T* allocator, size_t size); size_t hb_arena_position(hb_arena_T* allocator); size_t hb_arena_capacity(hb_arena_T* allocator); void hb_arena_reset(hb_arena_T* allocator); -void hb_arena_reset_to(hb_arena_T* allocator, size_t new_position); +void hb_arena_reset_to(hb_arena_T* allocator, size_t target_position); void hb_arena_free(hb_arena_T* allocator); #endif diff --git a/src/lexer.c b/src/lexer.c index 98d6c3c04..399dfd187 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -31,7 +31,7 @@ static bool lexer_stalled(lexer_T* lexer) { return lexer->stalled; } -void lexer_init(lexer_T* lexer, const char* source) { +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena) { if (source != NULL) { lexer->source = hb_string(source); } else { @@ -52,6 +52,8 @@ void lexer_init(lexer_T* lexer, const char* source) { lexer->stall_counter = 0; lexer->last_position = 0; lexer->stalled = false; + + lexer->arena = arena; } token_T* lexer_error(lexer_T* lexer, const char* message) { diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 3b4862a3b..9585d9aa7 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -92,14 +92,11 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok token_T* token = lexer_next_token(lexer); while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) { - token_free(token); token = lexer_next_token(lexer); } bool result = (token && token->type == token_type); - if (token) { token_free(token); } - lexer->current_position = saved_position; lexer->current_line = saved_line; lexer->current_column = saved_column; diff --git a/src/main.c b/src/main.c index b45caa5d7..349eb73f3 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,10 @@ #include "include/extract.h" #include "include/herb.h" #include "include/io.h" +#include "include/macros.h" #include "include/ruby_parser.h" +#include "include/util/hb_arena.h" +#include "include/util/hb_arena_debug.h" #include "include/util/hb_buffer.h" #include "include/util/string.h" @@ -33,6 +36,23 @@ void print_time_diff(const struct timespec start, const struct timespec end, con printf(" %8.6f s\n\n", s); } +static hb_arena_T* allocate_arena(void) { + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { + fprintf(stderr, "Failed to allocate arena\n"); + return NULL; + } + + if (!hb_arena_init(arena, KB(16))) { + fprintf(stderr, "Failed to initialize arena\n"); + free(arena); + return NULL; + } + + return arena; +} + int main(const int argc, char* argv[]) { if (argc < 2) { puts("./herb [command] [options]\n"); @@ -62,6 +82,28 @@ int main(const int argc, char* argv[]) { struct timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); + if (string_equals(argv[1], "visit")) { + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); + clock_gettime(CLOCK_MONOTONIC, &end); + + ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output); + printf("%s\n", output.value); + + print_time_diff(start, end, "visiting"); + + ast_node_free((AST_NODE_T*) root); + free(output.value); + free(source); + + return 0; + } + if (string_equals(argv[1], "lex")) { herb_lex_to_buffer(source, &output); clock_gettime(CLOCK_MONOTONIC, &end); @@ -76,7 +118,13 @@ int main(const int argc, char* argv[]) { } if (string_equals(argv[1], "parse")) { - AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); clock_gettime(CLOCK_MONOTONIC, &end); @@ -88,6 +136,9 @@ int main(const int argc, char* argv[]) { puts(output.value); print_time_diff(start, end, "parsing"); + + printf("\n"); + hb_arena_print_stats(arena); } ast_node_free((AST_NODE_T*) root); diff --git a/src/parser.c b/src/parser.c index b415bb5a3..02ea55168 100644 --- a/src/parser.c +++ b/src/parser.c @@ -48,6 +48,7 @@ void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options parser->options = options; parser->consecutive_error_count = 0; parser->in_recovery_mode = false; + parser->arena = lexer->arena; } static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { @@ -82,7 +83,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(content.value); @@ -140,7 +142,8 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { comment_end, comment_start->location.start, comment_end->location.end, - errors + errors, + parser->arena ); free(comment.value); @@ -185,7 +188,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -232,7 +236,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -267,7 +272,8 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra token->value, token->location.start, token->location.end, - document_errors + document_errors, + parser->arena ); token_free(token); @@ -285,10 +291,15 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra AST_HTML_TEXT_NODE_T* text_node = NULL; if (hb_buffer_length(&content) > 0) { - text_node = - ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init( + hb_buffer_value(&content), + start, + parser->current_token->location.start, + errors, + parser->arena + ); } else { - text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors, parser->arena); } free(content.value); @@ -355,7 +366,7 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T } AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = - ast_html_attribute_name_node_init(children, node_start, node_end, errors); + ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->arena); free(buffer.value); @@ -527,7 +538,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote->value, potential_closing->location.start, potential_closing->location.end, - errors + errors, + parser->arena ); lexer_restore_state(parser->lexer, saved_state); @@ -579,7 +591,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, closing_quote->location.end, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -604,7 +617,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, erb_node->base.location.start, erb_node->base.location.end, - errors + errors, + parser->arena ); } @@ -623,7 +637,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, literal->base.location.start, literal->base.location.end, - errors + errors, + parser->arena ); } @@ -641,11 +656,12 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser "backtick (`)", start, end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = - ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors); + ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->arena); token_free(token); @@ -658,7 +674,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser token_type_to_string(parser->current_token->type), parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init( @@ -668,7 +685,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); return value; @@ -724,11 +742,18 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) token_free(whitespace); } - token_T* equals_with_whitespace = calloc(1, sizeof(token_T)); + token_T* equals_with_whitespace = hb_arena_alloc(parser->arena, sizeof(token_T)); equals_with_whitespace->type = TOKEN_EQUALS; - equals_with_whitespace->value = herb_strdup(equals_buffer.value); + + size_t value_length = strlen(equals_buffer.value); + char* arena_value = hb_arena_alloc(parser->arena, value_length + 1); + memcpy(arena_value, equals_buffer.value, value_length); + arena_value[value_length] = '\0'; + + equals_with_whitespace->value = arena_value; equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end }; equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end }; + equals_with_whitespace->arena_allocated = true; free(equals_buffer.value); @@ -740,7 +765,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); } else { return ast_html_attribute_node_init( @@ -749,7 +775,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } } else { @@ -813,7 +840,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); token_free(equals); @@ -827,7 +855,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } @@ -1068,7 +1097,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { is_self_closing, tag_start->location.start, tag_end->location.end, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1102,7 +1132,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) got.data, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(expected.data); @@ -1116,7 +1147,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -1140,7 +1172,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - NULL + NULL, + parser->arena ); } @@ -1161,7 +1194,9 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( parser_parse_in_data_state(parser, body, errors); } - if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); } + if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { + return parser_handle_missing_close_tag(parser, open_tag, body, errors); + } AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser); @@ -1203,7 +1238,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, close_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -1270,7 +1306,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { false, opening_tag->location.start, end_position, - errors + errors, + parser->arena ); token_free(opening_tag); @@ -1620,7 +1657,8 @@ static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) { token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors); - AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors); + AST_DOCUMENT_NODE_T* document_node = + ast_document_node_init(children, start, eof->location.end, errors, parser->arena); token_free(eof); @@ -1638,7 +1676,8 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token whitespace_token, whitespace_token->location.start, whitespace_token->location.end, - errors + errors, + parser->arena ); hb_array_append(children, whitespace_node); } diff --git a/src/parser_helpers.c b/src/parser_helpers.c index f56590ced..4085fbb15 100644 --- a/src/parser_helpers.c +++ b/src/parser_helpers.c @@ -14,7 +14,7 @@ #include void parser_push_open_tag(const parser_T* parser, token_T* tag_name) { - token_T* copy = token_copy(tag_name); + token_T* copy = token_copy(tag_name, parser->arena); hb_array_push(parser->open_tags_stack, copy); } @@ -107,7 +107,8 @@ void parser_append_unexpected_error( token_type_to_string(token->type), token->location.start, token->location.end, - errors + errors, + parser->arena ); token_free(token); @@ -119,7 +120,8 @@ void parser_append_unexpected_token_error(parser_T* parser, token_type_T expecte parser->current_token, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); } @@ -132,7 +134,7 @@ void parser_append_literal_node_from_buffer( if (hb_buffer_length(buffer) == 0) { return; } AST_LITERAL_NODE_T* literal = - ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL); + ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL, parser->arena); if (children != NULL) { hb_array_append(children, literal); } hb_buffer_clear(buffer); @@ -155,13 +157,21 @@ token_T* parser_consume_expected(parser_T* parser, const token_type_T expected_t if (token == NULL) { token = parser_advance(parser); - append_unexpected_token_error(expected_type, token, token->location.start, token->location.end, array); + append_unexpected_token_error( + expected_type, + token, + token->location.start, + token->location.end, + array, + parser->arena + ); } return token; } AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors @@ -170,7 +180,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( open_tag->tag_name, open_tag->tag_name->location.start, open_tag->tag_name->location.end, - errors + errors, + parser->arena ); return ast_html_element_node_init( @@ -182,7 +193,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -200,14 +212,16 @@ void parser_handle_mismatched_tags( actual_tag, actual_tag->location.start, actual_tag->location.end, - errors + errors, + parser->arena ); } else { append_missing_opening_tag_error( close_tag->tag_name, close_tag->tag_name->location.start, close_tag->tag_name->location.end, - errors + errors, + parser->arena ); } } diff --git a/src/prism_helpers.c b/src/prism_helpers.c index 248af8dbc..937786472 100644 --- a/src/prism_helpers.c +++ b/src/prism_helpers.c @@ -37,7 +37,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error( pm_diagnostic_id_human(error->diag_id), pm_error_level_to_string(error->level), start, - end + end, + NULL ); } diff --git a/src/token.c b/src/token.c index 1f7cf9afe..104338cce 100644 --- a/src/token.c +++ b/src/token.c @@ -10,17 +10,33 @@ #include #include +size_t token_sizeof(void) { + return sizeof(struct TOKEN_STRUCT); +} + token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) { - token_T* token = calloc(1, sizeof(token_T)); + token_T* token = hb_arena_alloc(lexer->arena, token_sizeof()); if (type == TOKEN_NEWLINE) { lexer->current_line++; lexer->current_column = 0; } - token->value = hb_string_to_c_string_using_malloc(value); + if (value.data) { + char* arena_value = hb_arena_alloc(lexer->arena, value.length + 1); + if (arena_value) { + memcpy(arena_value, value.data, value.length); + arena_value[value.length] = '\0'; + token->value = arena_value; + } else { + token->value = NULL; + } + } else { + token->value = NULL; + } token->type = type; + token->arena_allocated = true; token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position }; location_from( @@ -113,19 +129,32 @@ hb_string_T token_to_string(const token_T* token) { return hb_string(string); } -token_T* token_copy(token_T* token) { +token_T* token_copy(token_T* token, hb_arena_T* arena) { if (!token) { return NULL; } - token_T* new_token = calloc(1, sizeof(token_T)); + token_T* new_token = arena ? hb_arena_alloc(arena, token_sizeof()) : calloc(1, token_sizeof()); if (!new_token) { return NULL; } if (token->value) { - new_token->value = herb_strdup(token->value); - - if (!new_token->value) { - free(new_token); - return NULL; + if (arena) { + size_t value_length = strlen(token->value); + char* arena_value = hb_arena_alloc(arena, value_length + 1); + + if (arena_value) { + memcpy(arena_value, token->value, value_length); + arena_value[value_length] = '\0'; + new_token->value = arena_value; + } else { + new_token->value = NULL; + } + } else { + new_token->value = herb_strdup(token->value); + + if (!new_token->value) { + free(new_token); + return NULL; + } } } else { new_token->value = NULL; @@ -134,6 +163,7 @@ token_T* token_copy(token_T* token) { new_token->type = token->type; new_token->range = token->range; new_token->location = token->location; + new_token->arena_allocated = arena != NULL; return new_token; } @@ -145,7 +175,9 @@ bool token_value_empty(const token_T* token) { void token_free(token_T* token) { if (!token) { return; } - if (token->value != NULL) { free(token->value); } + if (!token->arena_allocated) { + if (token->value != NULL) { free(token->value); } - free(token); + free(token); + } } diff --git a/src/util/hb_arena.c b/src/util/hb_arena.c index 7c503df0a..88c0d31d6 100644 --- a/src/util/hb_arena.c +++ b/src/util/hb_arena.c @@ -7,7 +7,7 @@ #include #include -#define hb_arena_for_each_page(allocator, page) \ +#define hb_arena_for_each_page(allocator, _page) \ for (hb_arena_page_T* page = (allocator)->head; page != NULL; page = page->next) static inline size_t hb_arena_align_size(size_t size, size_t alignment) { diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb index 4ec348db6..48cec5c24 100644 --- a/templates/src/ast_nodes.c.erb +++ b/templates/src/ast_nodes.c.erb @@ -1,5 +1,6 @@ #include #include +#include #include @@ -9,21 +10,22 @@ #include "include/errors.h" #include "include/token.h" #include "include/util.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>) { - <%= node.struct_type %>* <%= node.human %> = malloc(sizeof(<%= node.struct_type %>)); + <%= node.struct_type %>* <%= node.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= node.struct_type %>)) : malloc(sizeof(<%= node.struct_type %>)); ast_node_init(&<%= node.human %>->base, <%= node.type %>, start_position, end_position, errors); <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::NodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::ArrayField -%> @@ -35,7 +37,13 @@ <%- when Herb::Template::PrismNodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + if (arena) { + char* temp = (char*) hb_arena_alloc(arena, strlen(<%= field.name %>) + 1); + strcpy(temp, <%= field.name %>); + <%= node.human %>-><%= field.name %> = temp; + } else { + <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + } <%- when Herb::Template::AnalyzedRubyField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::VoidPointerField -%> @@ -47,6 +55,10 @@ <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + <%= node.human %>->arena = arena; + <%- end -%> + return <%= node.human %>; } <%- end -%> @@ -72,20 +84,60 @@ hb_string_T ast_node_human_type(AST_NODE_T* node) { } void ast_free_base_node(AST_NODE_T* node) { - if (node == NULL) { return; } + // Base node cleanup is intentionally empty in the arena-based implementation. + // The node itself is arena-allocated and freed when the arena is destroyed. + // The errors array is freed by ast_free_arrays_recursive() before arena cleanup. + // This function is kept as an extension point for any future non-arena base node cleanup. +} + +<%- nodes.each do |node| -%> +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>); +<%- end -%> + +static void ast_free_arrays_recursive(AST_NODE_T* node) { + if (!node) { return; } + + switch (node->type) { + <%- nodes.each do |node| -%> + case <%= node.type %>: ast_free_arrays_<%= node.human %>((<%= node.struct_type %>*) node); break; + <%- end -%> + } +} + +<%- nodes.each do |node| -%> + +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>) { + if (!<%= node.human %>) { return; } - if (node->errors) { - for (size_t i = 0; i < hb_array_size(node->errors); i++) { - ERROR_T* child = hb_array_get(node->errors, i); - if (child != NULL) { error_free(child); } + <%- node.fields.each do |field| -%> + <%- case field -%> + <%- when Herb::Template::NodeField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + ast_free_arrays_recursive((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + } + <%- when Herb::Template::ArrayField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { + AST_NODE_T* child = (AST_NODE_T*) hb_array_get(<%= node.human %>-><%= field.name %>, i); + ast_free_arrays_recursive(child); } - hb_array_free(&node->errors); + hb_array_free(&<%= node.human %>-><%= field.name %>); } + <%- when Herb::Template::AnalyzedRubyField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + free_analyzed_ruby(<%= node.human %>-><%= field.name %>); + } + <%- end -%> + <%- end -%> - free(node); + if (<%= node.human %>->base.errors != NULL) { + hb_array_free(&<%= node.human %>->base.errors); + } } +<%- end -%> + <%- nodes.each do |node| -%> <%- arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join }.join(", ") : "void" -%> @@ -96,22 +148,15 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { token_free(<%= node.human %>-><%= field.name %>); } + // Token is arena-allocated, will be freed with arena <%- when Herb::Template::BorrowedNodeField -%> /* <%= field.name %> is a borrowed reference, not freed here (owned by another field) */ <%- when Herb::Template::NodeField -%> - ast_node_free((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + // Node is arena-allocated, will be freed with arena <%- when Herb::Template::ArrayField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { - AST_NODE_T* child = hb_array_get(<%= node.human %>-><%= field.name %>, i); - if (child) { ast_node_free(child); } - } - - hb_array_free(&<%= node.human %>-><%= field.name %>); - } + // Array freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::StringField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { free((char*) <%= node.human %>-><%= field.name %>); } + // String is arena-allocated, will be freed with arena <%- when Herb::Template::PrismNodeField -%> if (<%= node.human %>-><%= field.name %> != NULL) { // The first argument to `pm_node_destroy` is a `pm_parser_t`, but it's currently unused: @@ -120,9 +165,7 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % pm_node_destroy(NULL, <%= node.human %>-><%= field.name %>); } <%- when Herb::Template::AnalyzedRubyField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - free_analyzed_ruby(<%= node.human %>-><%= field.name %>); - } + // AnalyzedRuby freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::VoidPointerField -%> free(<%= node.human %>-><%= field.name %>); <%- when Herb::Template::BooleanField -%> @@ -134,7 +177,20 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + ast_free_arrays_recursive((AST_NODE_T*)<%= node.human %>); + + hb_arena_T* arena = <%= node.human %>->arena; + + ast_free_base_node(&<%= node.human %>->base); + + if (arena != NULL) { + hb_arena_free(arena); + free(arena); + } + <%- else -%> ast_free_base_node(&<%= node.human %>->base); + <%- end -%> } <%- end -%> diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb index a75aad5a0..cd9c12a2f 100644 --- a/templates/src/errors.c.erb +++ b/templates/src/errors.c.erb @@ -5,6 +5,7 @@ #include "include/token.h" #include "include/util.h" #include "include/util/hb_array.h" +#include "include/util/hb_arena.h" #include #include @@ -26,10 +27,10 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit } <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> +<%- arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> <%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>) { - <%= error.struct_type %>* <%= error.human %> = malloc(sizeof(<%= error.struct_type %>)); + <%= error.struct_type %>* <%= error.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= error.struct_type %>)) : malloc(sizeof(<%= error.struct_type %>)); error_init(&<%= error.human %>->base, <%= error.type %>, start, end); @@ -37,7 +38,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit const char* message_template = "<%= error.message_template %>"; size_t message_size = <%= Herb::Template::PrintfMessageTemplate.estimate_buffer_size(error.message_template) %>; - char* message = (char*) malloc(message_size); + char* message = arena ? (char*) hb_arena_alloc(arena, message_size) : (char*) malloc(message_size); if (message) { <%- error.message_arguments.each_with_index do |argument, i| -%> @@ -61,13 +62,48 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- end -%> ); - <%= error.human %>->base.message = herb_strdup(message); - free(message); + if (arena) { + size_t length = strlen(message); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, message, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup(message); + free(message); + } } else { - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + const char* template_string = "<%= error.message_template %>"; + size_t length = strlen(template_string); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, template_string, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } } <%- else -%> - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + const char* template_string = "<%= error.message_template %>"; + size_t length = strlen(template_string); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, template_string, length); + string[length] = '\0'; + <%= error.human %>->base.message = string; + } + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } <%- end -%> <%- error.fields.each do |field| -%> @@ -75,13 +111,24 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- when Herb::Template::PositionField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::TokenField -%> - <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::TokenTypeField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::SizeTField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + if (arena) { + size_t length = strlen(<%= field.name %>); + char* string = hb_arena_alloc(arena, length + 1); + + if (string) { + memcpy(string, <%= field.name %>, length); + string[length] = '\0'; + <%= error.human %>-><%= field.name %> = string; + } + } else { + <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + } <%- else -%> <%= field.inspect %> <%- end -%> @@ -89,8 +136,8 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit return <%= error.human %>; } -void append_<%= error.human %>(<%= (arguments + ["hb_array_T* errors"]).join(", ") %>) { - hb_array_append(errors, <%= error.human %>_init(<%= arguments.map { |argument| argument.split(" ").last.strip }.join(", ") %>)); +void append_<%= error.human %>(<%= (error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"]).join(", ") %>) { + hb_array_append(errors, <%= error.human %>_init(<%= (error_arguments.map { |arg| arg.split(" ").last.strip } + ["start", "end", "arena"]).join(", ") %>)); } <%- end -%> diff --git a/templates/src/include/ast_nodes.h.erb b/templates/src/include/ast_nodes.h.erb index 3958eb1b5..d0506e5c2 100644 --- a/templates/src/include/ast_nodes.h.erb +++ b/templates/src/include/ast_nodes.h.erb @@ -9,6 +9,7 @@ #include "location.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" #include "util/hb_string.h" @@ -32,12 +33,15 @@ typedef struct AST_NODE_STRUCT { typedef struct <%= node.struct_name %> { AST_NODE_T base; <%= arguments %> + <%- if node.human == "document_node" -%> + hb_arena_T* arena; + <%- end -%> } <%= node.struct_type %>; <%- end -%> <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>); <%- end -%> diff --git a/templates/src/include/errors.h.erb b/templates/src/include/errors.h.erb index 4676c72d3..9f7549f64 100644 --- a/templates/src/include/errors.h.erb +++ b/templates/src/include/errors.h.erb @@ -6,6 +6,7 @@ #include "position.h" #include "token.h" #include "util/hb_array.h" +#include "util/hb_arena.h" #include "util/hb_buffer.h" typedef enum { @@ -31,9 +32,10 @@ typedef struct { <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> -<%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>); -void append_<%= error.human %>(<%= (arguments << "hb_array_T* errors").join(", ") %>); +<%- init_arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> +<%- append_arguments = error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"] -%> +<%= error.struct_type %>* <%= error.human %>_init(<%= init_arguments.join(", ") %>); +void append_<%= error.human %>(<%= append_arguments.join(", ") %>); <%- end -%> void error_init(ERROR_T* error, error_type_T type, position_T start, position_T end); diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index 3baf167e8..4002ac0b5 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -7,6 +7,8 @@ #include "extension_helpers.h" extern "C" { +#include "../src/include/macros.h" +#include "../src/include/util/hb_arena.h" #include "../src/include/util/hb_array.h" #include "../src/include/ast_node.h" #include "../src/include/ast_nodes.h" @@ -24,11 +26,28 @@ extern "C" { using namespace emscripten; val Herb_lex(const std::string& source) { - hb_array_T* tokens = herb_lex(source.c_str()); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - val result = CreateLexResult(tokens, source); + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + + herb_lex_result_T* lex_result = herb_lex(source.c_str(), arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + return val::null(); + } + + val result = CreateLexResult(lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } @@ -56,7 +75,24 @@ val Herb_parse(const std::string& source, val options) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options); + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options, arena); + + if (!root) { + hb_arena_free(arena); + free(arena); + return val::null(); + } val result = CreateParseResult(root, source); From 329d01329e12823b4f9b177b15486372f25377f3 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Mon, 16 Feb 2026 17:53:21 +0100 Subject: [PATCH 02/18] Fix arena parameter propagation after rebase --- src/analyze.c | 72 ++++---------------- src/analyze_conditional_elements.c | 61 ++++++++++------- src/analyze_conditional_open_tags.c | 87 ++++++++++++++----------- src/include/analyze.h | 1 + src/include/analyze_helpers.h | 3 +- src/include/parser.h | 3 +- src/parser.c | 75 ++++++++++++--------- src/prism_helpers.c | 3 +- templates/src/analyze_missing_end.c.erb | 5 +- templates/src/analyze_transform.c.erb | 19 ++++++ templates/src/parser_match_tags.c.erb | 2 +- 11 files changed, 178 insertions(+), 153 deletions(-) diff --git a/src/analyze.c b/src/analyze.c index 29e98f1de..8e2a73477 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -69,7 +69,13 @@ static analyzed_ruby_T* herb_analyze_ruby(hb_string_T source) { return analyzed; } +typedef struct { + hb_arena_T* arena; +} analyze_erb_content_context_T; + static bool analyze_erb_content(const AST_NODE_T* node, void* data) { + analyze_erb_content_context_T* context = (analyze_erb_content_context_T*) data; + if (node->type == AST_ERB_CONTENT_NODE) { AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node; @@ -87,7 +93,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { append_erb_multiple_blocks_in_tag_error( erb_content_node->base.location.start, erb_content_node->base.location.end, - erb_content_node->base.errors + erb_content_node->base.errors, + context->arena ); } @@ -97,7 +104,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { append_erb_case_with_conditions_error( erb_content_node->base.location.start, erb_content_node->base.location.end, - erb_content_node->base.errors + erb_content_node->base.errors, + context->arena ); } } else { @@ -1403,21 +1411,6 @@ hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analyze_ruby return new_array; } -static void free_analyzed_ruby_from_array(hb_array_T* array) { - if (!array) { return; } - - for (size_t i = 0; i < hb_array_size(array); i++) { - AST_NODE_T* node = hb_array_get(array, i); - if (node && node->type == AST_ERB_CONTENT_NODE) { - AST_ERB_CONTENT_NODE_T* erb_content = (AST_ERB_CONTENT_NODE_T*) node; - if (erb_content->analyzed_ruby != NULL) { - free_analyzed_ruby(erb_content->analyzed_ruby); - erb_content->analyzed_ruby = NULL; - } - } - } -} - static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { invalid_erb_context_T* context = (invalid_erb_context_T*) data; @@ -1491,7 +1484,7 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { if (keyword == NULL) { keyword = erb_keyword_from_analyzed_ruby(analyzed); } if (keyword != NULL && !token_value_empty(content_node->tag_closing)) { - append_erb_control_flow_scope_error(keyword, node->location.start, node->location.end, node->errors); + append_erb_control_flow_scope_error(keyword, node->location.start, node->location.end, node->errors, context->arena); } } @@ -1502,47 +1495,9 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { return true; } -static bool transform_erb_nodes(const AST_NODE_T* node, void* data) { - analyze_ruby_context_T* context = (analyze_ruby_context_T*) data; - context->parent = (AST_NODE_T*) node; - - if (node->type == AST_DOCUMENT_NODE) { - AST_DOCUMENT_NODE_T* document_node = (AST_DOCUMENT_NODE_T*) node; - hb_array_T* old_array = document_node->children; - document_node->children = rewrite_node_array((AST_NODE_T*) node, document_node->children, context); - free_analyzed_ruby_from_array(old_array); - hb_array_free(&old_array); - } - - if (node->type == AST_HTML_ELEMENT_NODE) { - AST_HTML_ELEMENT_NODE_T* element_node = (AST_HTML_ELEMENT_NODE_T*) node; - hb_array_T* old_array = element_node->body; - element_node->body = rewrite_node_array((AST_NODE_T*) node, element_node->body, context); - free_analyzed_ruby_from_array(old_array); - hb_array_free(&old_array); - } - - if (node->type == AST_HTML_OPEN_TAG_NODE) { - AST_HTML_OPEN_TAG_NODE_T* open_tag = (AST_HTML_OPEN_TAG_NODE_T*) node; - hb_array_T* old_array = open_tag->children; - open_tag->children = rewrite_node_array((AST_NODE_T*) node, open_tag->children, context); - free_analyzed_ruby_from_array(old_array); - hb_array_free(&old_array); - } - - if (node->type == AST_HTML_ATTRIBUTE_VALUE_NODE) { - AST_HTML_ATTRIBUTE_VALUE_NODE_T* value_node = (AST_HTML_ATTRIBUTE_VALUE_NODE_T*) node; - hb_array_T* old_array = value_node->children; - value_node->children = rewrite_node_array((AST_NODE_T*) node, value_node->children, context); - free_analyzed_ruby_from_array(old_array); - hb_array_free(&old_array); - } - - return true; -} - void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, bool strict) { - herb_visit_node((AST_NODE_T*) document, analyze_erb_content, NULL); + analyze_erb_content_context_T erb_content_context = { .arena = document->arena }; + herb_visit_node((AST_NODE_T*) document, analyze_erb_content, &erb_content_context); analyze_ruby_context_T* context = malloc(sizeof(analyze_ruby_context_T)); context->document = document; @@ -1557,6 +1512,7 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, invalid_erb_context_T* invalid_context = malloc(sizeof(invalid_erb_context_T)); invalid_context->loop_depth = 0; invalid_context->rescue_depth = 0; + invalid_context->arena = document->arena; herb_visit_node((AST_NODE_T*) document, detect_invalid_erb_structures, invalid_context); diff --git a/src/analyze_conditional_elements.c b/src/analyze_conditional_elements.c index 07f27c3b4..abe0163f2 100644 --- a/src/analyze_conditional_elements.c +++ b/src/analyze_conditional_elements.c @@ -214,7 +214,14 @@ typedef struct { bool is_if; } conditional_open_tag_T; -static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document_errors) { +typedef struct { + hb_array_T* errors; + hb_arena_T* arena; +} conditional_transform_context_T; + +static void rewrite_conditional_elements(hb_array_T* nodes, conditional_transform_context_T* context) { + hb_array_T* document_errors = context->errors; + hb_arena_T* arena = context->arena; if (!nodes || hb_array_size(nodes) == 0) { return; } if (!document_errors) { return; } @@ -257,7 +264,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document open_node->location.start.line, open_node->location.start.column, open_node->location.start, - open_node->location.end + open_node->location.end, + arena ); hb_array_append(document_errors, multiple_tags_error); @@ -374,7 +382,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document node->location.start.line, node->location.start.column, mismatched_open->open_conditional->location.start, - node->location.end + node->location.end, + arena ); hb_array_append(document_errors, mismatch_error); @@ -410,7 +419,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document ELEMENT_SOURCE_HTML, start_position, end_position, - errors + errors, + arena ); free(condition_copy); @@ -489,39 +499,39 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* data); -static void transform_conditional_elements_in_array(hb_array_T* array, hb_array_T* document_errors) { +static void transform_conditional_elements_in_array(hb_array_T* array, conditional_transform_context_T* context) { if (!array) { return; } for (size_t i = 0; i < hb_array_size(array); i++) { AST_NODE_T* child = (AST_NODE_T*) hb_array_get(array, i); - if (child) { herb_visit_node(child, transform_conditional_elements_visitor, document_errors); } + if (child) { herb_visit_node(child, transform_conditional_elements_visitor, context); } } - rewrite_conditional_elements(array, document_errors); + rewrite_conditional_elements(array, context); } static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* data) { if (!node) { return false; } - hb_array_T* document_errors = (hb_array_T*) data; + conditional_transform_context_T* context = (conditional_transform_context_T*) data; switch (node->type) { case AST_DOCUMENT_NODE: { AST_DOCUMENT_NODE_T* doc = (AST_DOCUMENT_NODE_T*) node; - transform_conditional_elements_in_array(doc->children, document_errors); + transform_conditional_elements_in_array(doc->children, context); return false; } case AST_HTML_ELEMENT_NODE: { AST_HTML_ELEMENT_NODE_T* element = (AST_HTML_ELEMENT_NODE_T*) node; - transform_conditional_elements_in_array(element->body, document_errors); + transform_conditional_elements_in_array(element->body, context); return false; } case AST_ERB_IF_NODE: { AST_ERB_IF_NODE_T* if_node = (AST_ERB_IF_NODE_T*) node; - transform_conditional_elements_in_array(if_node->statements, document_errors); + transform_conditional_elements_in_array(if_node->statements, context); if (if_node->subsequent) { herb_visit_node(if_node->subsequent, transform_conditional_elements_visitor, data); } @@ -530,13 +540,13 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_ELSE_NODE: { AST_ERB_ELSE_NODE_T* else_node = (AST_ERB_ELSE_NODE_T*) node; - transform_conditional_elements_in_array(else_node->statements, document_errors); + transform_conditional_elements_in_array(else_node->statements, context); return false; } case AST_ERB_UNLESS_NODE: { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; - transform_conditional_elements_in_array(unless_node->statements, document_errors); + transform_conditional_elements_in_array(unless_node->statements, context); if (unless_node->else_clause) { herb_visit_node((AST_NODE_T*) unless_node->else_clause, transform_conditional_elements_visitor, data); @@ -547,31 +557,31 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_BLOCK_NODE: { AST_ERB_BLOCK_NODE_T* block_node = (AST_ERB_BLOCK_NODE_T*) node; - transform_conditional_elements_in_array(block_node->body, document_errors); + transform_conditional_elements_in_array(block_node->body, context); return false; } case AST_ERB_WHILE_NODE: { AST_ERB_WHILE_NODE_T* while_node = (AST_ERB_WHILE_NODE_T*) node; - transform_conditional_elements_in_array(while_node->statements, document_errors); + transform_conditional_elements_in_array(while_node->statements, context); return false; } case AST_ERB_UNTIL_NODE: { AST_ERB_UNTIL_NODE_T* until_node = (AST_ERB_UNTIL_NODE_T*) node; - transform_conditional_elements_in_array(until_node->statements, document_errors); + transform_conditional_elements_in_array(until_node->statements, context); return false; } case AST_ERB_FOR_NODE: { AST_ERB_FOR_NODE_T* for_node = (AST_ERB_FOR_NODE_T*) node; - transform_conditional_elements_in_array(for_node->statements, document_errors); + transform_conditional_elements_in_array(for_node->statements, context); return false; } case AST_ERB_CASE_NODE: { AST_ERB_CASE_NODE_T* case_node = (AST_ERB_CASE_NODE_T*) node; - transform_conditional_elements_in_array(case_node->children, document_errors); + transform_conditional_elements_in_array(case_node->children, context); for (size_t i = 0; i < hb_array_size(case_node->conditions); i++) { AST_NODE_T* when = (AST_NODE_T*) hb_array_get(case_node->conditions, i); @@ -587,13 +597,13 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_WHEN_NODE: { AST_ERB_WHEN_NODE_T* when_node = (AST_ERB_WHEN_NODE_T*) node; - transform_conditional_elements_in_array(when_node->statements, document_errors); + transform_conditional_elements_in_array(when_node->statements, context); return false; } case AST_ERB_BEGIN_NODE: { AST_ERB_BEGIN_NODE_T* begin_node = (AST_ERB_BEGIN_NODE_T*) node; - transform_conditional_elements_in_array(begin_node->statements, document_errors); + transform_conditional_elements_in_array(begin_node->statements, context); if (begin_node->rescue_clause) { herb_visit_node((AST_NODE_T*) begin_node->rescue_clause, transform_conditional_elements_visitor, data); @@ -612,7 +622,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_RESCUE_NODE: { AST_ERB_RESCUE_NODE_T* rescue_node = (AST_ERB_RESCUE_NODE_T*) node; - transform_conditional_elements_in_array(rescue_node->statements, document_errors); + transform_conditional_elements_in_array(rescue_node->statements, context); if (rescue_node->subsequent) { herb_visit_node((AST_NODE_T*) rescue_node->subsequent, transform_conditional_elements_visitor, data); @@ -623,7 +633,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_ENSURE_NODE: { AST_ERB_ENSURE_NODE_T* ensure_node = (AST_ERB_ENSURE_NODE_T*) node; - transform_conditional_elements_in_array(ensure_node->statements, document_errors); + transform_conditional_elements_in_array(ensure_node->statements, context); return false; } @@ -632,5 +642,10 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* } void herb_transform_conditional_elements(AST_DOCUMENT_NODE_T* document) { - herb_visit_node((AST_NODE_T*) document, transform_conditional_elements_visitor, document->base.errors); + conditional_transform_context_T context = { + .errors = document->base.errors, + .arena = document->arena + }; + + herb_visit_node((AST_NODE_T*) document, transform_conditional_elements_visitor, &context); } diff --git a/src/analyze_conditional_open_tags.c b/src/analyze_conditional_open_tags.c index b6b002d1d..cdca735d7 100644 --- a/src/analyze_conditional_open_tags.c +++ b/src/analyze_conditional_open_tags.c @@ -12,8 +12,13 @@ #include #include +typedef struct { + hb_array_T* errors; + hb_arena_T* arena; +} conditional_open_tags_context_T; + static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void* data); -static void transform_conditional_open_tags_in_array(hb_array_T* array, hb_array_T* document_errors); +static void transform_conditional_open_tags_in_array(hb_array_T* array, conditional_open_tags_context_T* context); static bool is_non_void_open_tag(AST_NODE_T* node) { if (!node || node->type != AST_HTML_OPEN_TAG_NODE) { return false; } @@ -194,14 +199,15 @@ static token_T* get_first_branch_tag_name_token_unless(AST_ERB_UNLESS_NODE_T* un return result.tag ? result.tag->tag_name : NULL; } -static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_OPEN_TAG_NODE_T* second_tag) { +static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_OPEN_TAG_NODE_T* second_tag, hb_arena_T* arena) { if (!erb_node || !second_tag) { return; } CONDITIONAL_ELEMENT_MULTIPLE_TAGS_ERROR_T* error = conditional_element_multiple_tags_error_init( second_tag->base.location.start.line, second_tag->base.location.start.column, erb_node->location.start, - erb_node->location.end + erb_node->location.end, + arena ); if (!erb_node->errors) { erb_node->errors = hb_array_init(1); } @@ -209,13 +215,13 @@ static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_O hb_array_append(erb_node->errors, error); } -static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { +static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node, hb_arena_T* arena) { if (!if_node || !if_node->subsequent) { return; } single_open_tag_result_T if_result = get_single_open_tag_from_statements(if_node->statements); if (if_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) if_node, if_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) if_node, if_result.second_tag, arena); return; } @@ -243,7 +249,7 @@ static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { single_open_tag_result_T branch_result = get_single_open_tag_from_statements(branch_statements); if (branch_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node(current, branch_result.second_tag); + add_multiple_tags_error_to_erb_node(current, branch_result.second_tag, arena); return; } if (!branch_result.tag) { return; } @@ -254,13 +260,13 @@ static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { (void) ends_with_else; } -static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unless_node) { +static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unless_node, hb_arena_T* arena) { if (!unless_node || !unless_node->else_clause) { return; } single_open_tag_result_T unless_result = get_single_open_tag_from_statements(unless_node->statements); if (unless_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node, unless_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node, unless_result.second_tag, arena); return; } @@ -269,13 +275,13 @@ static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unle single_open_tag_result_T else_result = get_single_open_tag_from_statements(unless_node->else_clause->statements); if (else_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node->else_clause, else_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node->else_clause, else_result.second_tag, arena); return; } } -static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* document_errors) { - (void) document_errors; +static void rewrite_conditional_open_tags(hb_array_T* nodes, conditional_open_tags_context_T* context) { + hb_arena_T* arena = context->arena; if (!nodes || hb_array_size(nodes) == 0) { return; } @@ -297,7 +303,7 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen conditional_node = node; tag_name_token = get_first_branch_tag_name_token(if_node); } else { - check_and_report_multiple_tags_in_if(if_node); + check_and_report_multiple_tags_in_if(if_node, arena); } } else if (node->type == AST_ERB_UNLESS_NODE) { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; @@ -307,7 +313,7 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen conditional_node = node; tag_name_token = get_first_branch_tag_name_token_unless(unless_node); } else { - check_and_report_multiple_tags_in_unless(unless_node); + check_and_report_multiple_tags_in_unless(unless_node, arena); } } @@ -336,7 +342,8 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen false, conditional_node->location.start, conditional_node->location.end, - conditional_open_tag_errors + conditional_open_tag_errors, + arena ); hb_array_T* element_errors = hb_array_init(1); @@ -350,7 +357,8 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen ELEMENT_SOURCE_HTML, start_position, end_position, - element_errors + element_errors, + arena ); hb_array_set(nodes, i, element); @@ -400,44 +408,44 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen hb_array_free(&consumed_indices); } -static void transform_conditional_open_tags_in_array(hb_array_T* array, hb_array_T* document_errors) { +static void transform_conditional_open_tags_in_array(hb_array_T* array, conditional_open_tags_context_T* context) { if (!array) { return; } for (size_t i = 0; i < hb_array_size(array); i++) { AST_NODE_T* child = (AST_NODE_T*) hb_array_get(array, i); - if (child) { herb_visit_node(child, transform_conditional_open_tags_visitor, document_errors); } + if (child) { herb_visit_node(child, transform_conditional_open_tags_visitor, context); } } - rewrite_conditional_open_tags(array, document_errors); + rewrite_conditional_open_tags(array, context); } static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void* data) { if (!node) { return false; } - hb_array_T* document_errors = (hb_array_T*) data; + conditional_open_tags_context_T* context = (conditional_open_tags_context_T*) data; switch (node->type) { case AST_DOCUMENT_NODE: { AST_DOCUMENT_NODE_T* doc = (AST_DOCUMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(doc->children, document_errors); + transform_conditional_open_tags_in_array(doc->children, context); return false; } case AST_HTML_ELEMENT_NODE: { AST_HTML_ELEMENT_NODE_T* element = (AST_HTML_ELEMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(element->body, document_errors); + transform_conditional_open_tags_in_array(element->body, context); return false; } case AST_HTML_CONDITIONAL_ELEMENT_NODE: { AST_HTML_CONDITIONAL_ELEMENT_NODE_T* conditional = (AST_HTML_CONDITIONAL_ELEMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(conditional->body, document_errors); + transform_conditional_open_tags_in_array(conditional->body, context); return false; } case AST_ERB_IF_NODE: { AST_ERB_IF_NODE_T* if_node = (AST_ERB_IF_NODE_T*) node; - transform_conditional_open_tags_in_array(if_node->statements, document_errors); + transform_conditional_open_tags_in_array(if_node->statements, context); if (if_node->subsequent) { herb_visit_node(if_node->subsequent, transform_conditional_open_tags_visitor, data); } @@ -446,13 +454,13 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_ELSE_NODE: { AST_ERB_ELSE_NODE_T* else_node = (AST_ERB_ELSE_NODE_T*) node; - transform_conditional_open_tags_in_array(else_node->statements, document_errors); + transform_conditional_open_tags_in_array(else_node->statements, context); return false; } case AST_ERB_UNLESS_NODE: { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; - transform_conditional_open_tags_in_array(unless_node->statements, document_errors); + transform_conditional_open_tags_in_array(unless_node->statements, context); if (unless_node->else_clause) { herb_visit_node((AST_NODE_T*) unless_node->else_clause, transform_conditional_open_tags_visitor, data); @@ -462,31 +470,31 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_BLOCK_NODE: { AST_ERB_BLOCK_NODE_T* block_node = (AST_ERB_BLOCK_NODE_T*) node; - transform_conditional_open_tags_in_array(block_node->body, document_errors); + transform_conditional_open_tags_in_array(block_node->body, context); return false; } case AST_ERB_WHILE_NODE: { AST_ERB_WHILE_NODE_T* while_node = (AST_ERB_WHILE_NODE_T*) node; - transform_conditional_open_tags_in_array(while_node->statements, document_errors); + transform_conditional_open_tags_in_array(while_node->statements, context); return false; } case AST_ERB_UNTIL_NODE: { AST_ERB_UNTIL_NODE_T* until_node = (AST_ERB_UNTIL_NODE_T*) node; - transform_conditional_open_tags_in_array(until_node->statements, document_errors); + transform_conditional_open_tags_in_array(until_node->statements, context); return false; } case AST_ERB_FOR_NODE: { AST_ERB_FOR_NODE_T* for_node = (AST_ERB_FOR_NODE_T*) node; - transform_conditional_open_tags_in_array(for_node->statements, document_errors); + transform_conditional_open_tags_in_array(for_node->statements, context); return false; } case AST_ERB_CASE_NODE: { AST_ERB_CASE_NODE_T* case_node = (AST_ERB_CASE_NODE_T*) node; - transform_conditional_open_tags_in_array(case_node->children, document_errors); + transform_conditional_open_tags_in_array(case_node->children, context); for (size_t i = 0; i < hb_array_size(case_node->conditions); i++) { AST_NODE_T* when_node = (AST_NODE_T*) hb_array_get(case_node->conditions, i); @@ -502,7 +510,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_CASE_MATCH_NODE: { AST_ERB_CASE_MATCH_NODE_T* case_match_node = (AST_ERB_CASE_MATCH_NODE_T*) node; - transform_conditional_open_tags_in_array(case_match_node->children, document_errors); + transform_conditional_open_tags_in_array(case_match_node->children, context); for (size_t i = 0; i < hb_array_size(case_match_node->conditions); i++) { AST_NODE_T* in_node = (AST_NODE_T*) hb_array_get(case_match_node->conditions, i); @@ -518,19 +526,19 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_WHEN_NODE: { AST_ERB_WHEN_NODE_T* when_node = (AST_ERB_WHEN_NODE_T*) node; - transform_conditional_open_tags_in_array(when_node->statements, document_errors); + transform_conditional_open_tags_in_array(when_node->statements, context); return false; } case AST_ERB_IN_NODE: { AST_ERB_IN_NODE_T* in_node = (AST_ERB_IN_NODE_T*) node; - transform_conditional_open_tags_in_array(in_node->statements, document_errors); + transform_conditional_open_tags_in_array(in_node->statements, context); return false; } case AST_ERB_BEGIN_NODE: { AST_ERB_BEGIN_NODE_T* begin_node = (AST_ERB_BEGIN_NODE_T*) node; - transform_conditional_open_tags_in_array(begin_node->statements, document_errors); + transform_conditional_open_tags_in_array(begin_node->statements, context); if (begin_node->rescue_clause) { herb_visit_node((AST_NODE_T*) begin_node->rescue_clause, transform_conditional_open_tags_visitor, data); @@ -549,7 +557,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_RESCUE_NODE: { AST_ERB_RESCUE_NODE_T* rescue_node = (AST_ERB_RESCUE_NODE_T*) node; - transform_conditional_open_tags_in_array(rescue_node->statements, document_errors); + transform_conditional_open_tags_in_array(rescue_node->statements, context); if (rescue_node->subsequent) { herb_visit_node((AST_NODE_T*) rescue_node->subsequent, transform_conditional_open_tags_visitor, data); @@ -560,7 +568,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_ENSURE_NODE: { AST_ERB_ENSURE_NODE_T* ensure_node = (AST_ERB_ENSURE_NODE_T*) node; - transform_conditional_open_tags_in_array(ensure_node->statements, document_errors); + transform_conditional_open_tags_in_array(ensure_node->statements, context); return false; } @@ -569,5 +577,10 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void } void herb_transform_conditional_open_tags(AST_DOCUMENT_NODE_T* document) { - herb_visit_node((AST_NODE_T*) document, transform_conditional_open_tags_visitor, document->base.errors); + conditional_open_tags_context_T context = { + .errors = document->base.errors, + .arena = document->arena + }; + + herb_visit_node((AST_NODE_T*) document, transform_conditional_open_tags_visitor, &context); } diff --git a/src/include/analyze.h b/src/include/analyze.h index 3fc7bdf24..b9b945695 100644 --- a/src/include/analyze.h +++ b/src/include/analyze.h @@ -37,6 +37,7 @@ typedef enum { typedef struct { int loop_depth; int rescue_depth; + hb_arena_T* arena; } invalid_erb_context_T; void herb_analyze_parse_errors(AST_DOCUMENT_NODE_T* document, const char* source); diff --git a/src/include/analyze_helpers.h b/src/include/analyze_helpers.h index 8c4f94c61..285ab2756 100644 --- a/src/include/analyze_helpers.h +++ b/src/include/analyze_helpers.h @@ -6,6 +6,7 @@ #include "analyzed_ruby.h" #include "ast_node.h" +#include "util/hb_arena.h" bool has_if_node(analyzed_ruby_T* analyzed); bool has_elsif_node(analyzed_ruby_T* analyzed); @@ -58,6 +59,6 @@ bool search_unexpected_in_nodes(analyzed_ruby_T* analyzed); bool search_unexpected_rescue_nodes(analyzed_ruby_T* analyzed); bool search_unexpected_when_nodes(analyzed_ruby_T* analyzed); -void check_erb_node_for_missing_end(const AST_NODE_T* node); +void check_erb_node_for_missing_end(const AST_NODE_T* node, hb_arena_T* arena); #endif diff --git a/src/include/parser.h b/src/include/parser.h index 0b835fe9d..f1b276d68 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -25,6 +25,7 @@ typedef struct PARSER_OPTIONS_STRUCT { typedef struct MATCH_TAGS_CONTEXT_STRUCT { hb_array_T* errors; bool strict; + hb_arena_T* arena; } match_tags_context_T; extern const parser_options_T HERB_DEFAULT_PARSER_OPTIONS; @@ -50,7 +51,7 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser); void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document, bool strict); void herb_parser_deinit(parser_T* parser); -void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict); +void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena); bool match_tags_visitor(const AST_NODE_T* node, void* data); #endif diff --git a/src/parser.c b/src/parser.c index 02ea55168..71ee13ce3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -130,7 +130,8 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { comment_end, comment_end->location.start, comment_end->location.end, - errors + errors, + parser->arena ); } else { comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors); @@ -421,7 +422,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -434,7 +436,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -464,7 +467,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -477,7 +481,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -803,7 +808,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_name_string ? attribute_name_string : "unknown", equals->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); if (attribute_name_string) { free(attribute_name_string); } @@ -815,7 +821,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) false, equals->location.end, parser->current_token->location.start, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init( @@ -824,7 +831,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) empty_value, attribute_name->base.location.start, parser->current_token->location.start, - NULL + NULL, + parser->arena ); token_free(equals); @@ -988,7 +996,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) { if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors); + append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors, parser->arena); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -998,7 +1006,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { false, tag_start->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1050,7 +1059,7 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { } if (token_is(parser, TOKEN_EOF)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors); + append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors, parser->arena); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -1060,7 +1069,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { false, tag_start->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1218,7 +1228,7 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( token_T* unclosed = parser_pop_open_tag(parser); if (unclosed != NULL) { - append_missing_closing_tag_error(unclosed, unclosed->location.start, unclosed->location.end, errors); + append_missing_closing_tag_error(unclosed, unclosed->location.start, unclosed->location.end, errors, parser->arena); token_free(unclosed); } } @@ -1284,7 +1294,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { parser->current_token->location.start.column, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); end_position = parser->current_token->location.start; } else { @@ -1292,7 +1303,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { opening_tag, opening_tag->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); end_position = parser->current_token->location.start; } @@ -1518,9 +1530,9 @@ static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_ return hb_array_size(nodes); } -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict); +static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena); -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict) { +static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena) { hb_array_T* result = hb_array_init(hb_array_size(nodes)); for (size_t index = 0; index < hb_array_size(nodes); index++) { @@ -1543,7 +1555,7 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T hb_array_append(body, hb_array_get(nodes, j)); } - hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict); + hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict, arena); hb_array_free(&body); position_T end_position = open_tag->base.location.end; @@ -1561,12 +1573,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T end_position, open_tag->base.location.start, open_tag->base.location.end, - element_errors + element_errors, + arena ); } AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = - ast_html_omitted_close_tag_node_init(open_tag->tag_name, end_position, end_position, hb_array_init(8)); + ast_html_omitted_close_tag_node_init(open_tag->tag_name, end_position, end_position, hb_array_init(8), arena); AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init( (AST_NODE_T*) open_tag, @@ -1577,7 +1590,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T ELEMENT_SOURCE_HTML, open_tag->base.location.start, end_position, - element_errors + element_errors, + arena ); hb_array_append(result, element); @@ -1589,7 +1603,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T open_tag->tag_name, open_tag->base.location.start, open_tag->base.location.end, - open_tag->base.errors + open_tag->base.errors, + arena ); } @@ -1604,7 +1619,7 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T hb_array_append(body, hb_array_get(nodes, j)); } - hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict); + hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict, arena); hb_array_free(&body); hb_array_T* element_errors = hb_array_init(8); @@ -1618,7 +1633,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T ELEMENT_SOURCE_HTML, open_tag->base.location.start, close_tag->base.location.end, - element_errors + element_errors, + arena ); hb_array_append(result, element); @@ -1634,7 +1650,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T close_tag->tag_name, close_tag->base.location.start, close_tag->base.location.end, - close_tag->base.errors + close_tag->base.errors, + arena ); } } @@ -1704,10 +1721,10 @@ void herb_parser_deinit(parser_T* parser) { if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); } } -void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict) { +void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena) { if (nodes == NULL || hb_array_size(nodes) == 0) { return; } - hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, strict); + hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, strict, arena); nodes->size = 0; @@ -1717,7 +1734,7 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict hb_array_free(&processed); - match_tags_context_T context = { .errors = errors, .strict = strict }; + match_tags_context_T context = { .errors = errors, .strict = strict, .arena = arena }; for (size_t i = 0; i < hb_array_size(nodes); i++) { AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i); @@ -1730,5 +1747,5 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document, bool strict) { if (document == NULL) { return; } - match_tags_in_node_array(document->children, document->base.errors, strict); + match_tags_in_node_array(document->children, document->base.errors, strict, document->arena); } diff --git a/src/prism_helpers.c b/src/prism_helpers.c index 937786472..02fd67ccf 100644 --- a/src/prism_helpers.c +++ b/src/prism_helpers.c @@ -52,7 +52,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions( pm_diagnostic_id_human(error->diag_id), pm_error_level_to_string(error->level), start, - end + end, + NULL ); } diff --git a/templates/src/analyze_missing_end.c.erb b/templates/src/analyze_missing_end.c.erb index 291b89e4d..1ed9925e0 100644 --- a/templates/src/analyze_missing_end.c.erb +++ b/templates/src/analyze_missing_end.c.erb @@ -7,7 +7,7 @@ end -%> -void check_erb_node_for_missing_end(const AST_NODE_T* node) { +void check_erb_node_for_missing_end(const AST_NODE_T* node, hb_arena_T* arena) { switch (node->type) { <%- nodes_with_end_node.each do |node| -%> <%- keyword = node.name.gsub(/^ERB/, '').gsub(/Match|Node$/, '').downcase -%> @@ -23,7 +23,8 @@ void check_erb_node_for_missing_end(const AST_NODE_T* node) { <%- end -%> <%= node.human %>->tag_opening->location.start, <%= node.human %>->tag_closing->location.end, - node->errors + node->errors, + arena ); } diff --git a/templates/src/analyze_transform.c.erb b/templates/src/analyze_transform.c.erb index f77eb2b96..91c97458a 100644 --- a/templates/src/analyze_transform.c.erb +++ b/templates/src/analyze_transform.c.erb @@ -1,6 +1,24 @@ #include "include/analyze.h" +#include "include/analyzed_ruby.h" #include "include/visitor.h" +static void free_analyzed_ruby_from_array(hb_array_T* array) { + if (array == NULL) { return; } + + for (size_t i = 0; i < hb_array_size(array); i++) { + AST_NODE_T* node = (AST_NODE_T*) hb_array_get(array, i); + + if (node != NULL && node->type == AST_ERB_CONTENT_NODE) { + AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node; + + if (erb_content_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content_node->analyzed_ruby); + erb_content_node->analyzed_ruby = NULL; + } + } + } +} + bool transform_erb_nodes(const AST_NODE_T* node, void* data) { analyze_ruby_context_T* context = (analyze_ruby_context_T*) data; context->parent = (AST_NODE_T*) node; @@ -12,6 +30,7 @@ bool transform_erb_nodes(const AST_NODE_T* node, void* data) { <%= node.struct_type %>* <%= node.human %> = (<%= node.struct_type %>*) node; hb_array_T* old_array = <%= node.human %>-><%= field.name %>; <%= node.human %>-><%= field.name %> = rewrite_node_array((AST_NODE_T*) node, <%= node.human %>-><%= field.name %>, context); + free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } diff --git a/templates/src/parser_match_tags.c.erb b/templates/src/parser_match_tags.c.erb index 237d98949..4308457a5 100644 --- a/templates/src/parser_match_tags.c.erb +++ b/templates/src/parser_match_tags.c.erb @@ -25,7 +25,7 @@ bool match_tags_visitor(const AST_NODE_T* node, void* data) { <%- array_fields.each do |field| -%> if (<%= node.human %>-><%= field.name %> != NULL) { - match_tags_in_node_array(<%= node.human %>-><%= field.name %>, context->errors, context->strict); + match_tags_in_node_array(<%= node.human %>-><%= field.name %>, context->errors, context->strict, context->arena); } <%- end -%> <%- single_node_fields.each do |field| -%> From 0c6c4248333f96520b329634d122d6eb6462c0a5 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Tue, 17 Feb 2026 01:05:31 +0100 Subject: [PATCH 03/18] Add `--arena-stats` CLI flag and `arena_stats` kwarg --- ext/herb/extension.c | 47 ++++++++++++++++++++++++++++++++++++++++---- lib/herb/cli.rb | 10 +++++++--- src/main.c | 31 ++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 10 deletions(-) diff --git a/ext/herb/extension.c b/ext/herb/extension.c index baa62ce06..0241d5b71 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -7,6 +7,7 @@ #include "../../src/include/macros.h" #include "../../src/include/util/hb_arena.h" +#include "../../src/include/util/hb_arena_debug.h" VALUE mHerb; @@ -18,8 +19,18 @@ VALUE cResult; VALUE cLexResult; VALUE cParseResult; -static VALUE Herb_lex(VALUE self, VALUE source) { +static VALUE Herb_lex(int argc, VALUE* argv, VALUE self) { + VALUE source, options; + rb_scan_args(argc, argv, "1:", &source, &options); + char* string = (char*) check_string(source); + bool print_arena_stats = false; + + if (!NIL_P(options)) { + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } hb_arena_T* arena = malloc(sizeof(hb_arena_T)); if (!arena) { return Qnil; } @@ -39,13 +50,25 @@ static VALUE Herb_lex(VALUE self, VALUE source) { VALUE result = create_lex_result(lex_result->tokens, source); + if (print_arena_stats) { hb_arena_print_stats(arena); } + herb_free_lex_result(&lex_result); return result; } -static VALUE Herb_lex_file(VALUE self, VALUE path) { +static VALUE Herb_lex_file(int argc, VALUE* argv, VALUE self) { + VALUE path, options; + rb_scan_args(argc, argv, "1:", &path, &options); + char* file_path = (char*) check_string(path); + bool print_arena_stats = false; + + if (!NIL_P(options)) { + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } hb_arena_T* arena = malloc(sizeof(hb_arena_T)); if (!arena) { return Qnil; } @@ -66,6 +89,8 @@ static VALUE Herb_lex_file(VALUE self, VALUE path) { VALUE source_value = read_file_to_ruby_string(file_path); VALUE result = create_lex_result(lex_result->tokens, source_value); + if (print_arena_stats) { hb_arena_print_stats(arena); } + herb_free_lex_result(&lex_result); return result; @@ -78,6 +103,7 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source); parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + bool print_arena_stats = false; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -91,6 +117,10 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict")); if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); } if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } + + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } } hb_arena_T* arena = malloc(sizeof(hb_arena_T)); @@ -111,6 +141,8 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { VALUE result = create_parse_result(root, source); + if (print_arena_stats) { hb_arena_print_stats(arena); } + ast_node_free((AST_NODE_T*) root); return result; @@ -126,6 +158,7 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source_value); parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + bool print_arena_stats = false; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -139,6 +172,10 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict")); if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); } if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } + + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } } hb_arena_T* arena = malloc(sizeof(hb_arena_T)); @@ -159,6 +196,8 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { VALUE result = create_parse_result(root, source_value); + if (print_arena_stats) { hb_arena_print_stats(arena); } + ast_node_free((AST_NODE_T*) root); return result; @@ -233,9 +272,9 @@ __attribute__((__visibility__("default"))) void Init_herb(void) { cParseResult = rb_define_class_under(mHerb, "ParseResult", cResult); rb_define_singleton_method(mHerb, "parse", Herb_parse, -1); - rb_define_singleton_method(mHerb, "lex", Herb_lex, 1); + rb_define_singleton_method(mHerb, "lex", Herb_lex, -1); rb_define_singleton_method(mHerb, "parse_file", Herb_parse_file, -1); - rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, 1); + rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, -1); rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, -1); rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1); rb_define_singleton_method(mHerb, "version", Herb_version, 0); diff --git a/lib/herb/cli.rb b/lib/herb/cli.rb index 4a793843b..6320ab001 100644 --- a/lib/herb/cli.rb +++ b/lib/herb/cli.rb @@ -8,7 +8,7 @@ class Herb::CLI include Herb::Colors - attr_accessor :json, :silent, :no_interactive, :no_log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict + attr_accessor :json, :silent, :no_interactive, :no_log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict, :arena_stats def initialize(args) @args = args @@ -139,13 +139,13 @@ def result show_config exit(0) when "parse" - Herb.parse(file_content, strict: strict.nil? || strict) + Herb.parse(file_content, strict: strict.nil? || strict, arena_stats: arena_stats) when "compile" compile_template when "render" render_template when "lex" - Herb.lex(file_content) + Herb.lex(file_content, arena_stats: arena_stats) when "ruby" puts Herb.extract_ruby(file_content) exit(0) @@ -249,6 +249,10 @@ def option_parser parser.on("--tool TOOL", "Show config for specific tool: linter, formatter (for config command)") do |t| self.tool = t.to_sym end + + parser.on("--arena-stats", "Print arena memory statistics (for lex/parse commands)") do + self.arena_stats = true + end end end diff --git a/src/main.c b/src/main.c index 349eb73f3..8bb48d595 100644 --- a/src/main.c +++ b/src/main.c @@ -8,6 +8,7 @@ #include "include/io.h" #include "include/macros.h" #include "include/ruby_parser.h" +#include "include/token.h" #include "include/util/hb_arena.h" #include "include/util/hb_arena_debug.h" #include "include/util/hb_buffer.h" @@ -105,12 +106,36 @@ int main(const int argc, char* argv[]) { } if (string_equals(argv[1], "lex")) { - herb_lex_to_buffer(source, &output); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + herb_lex_result_T* result = herb_lex(source, arena); clock_gettime(CLOCK_MONOTONIC, &end); - puts(output.value); - print_time_diff(start, end, "lexing"); + int silent = 0; + if (argc > 3 && string_equals(argv[3], "--silent")) { silent = 1; } + if (!silent) { + for (size_t i = 0; i < hb_array_size(result->tokens); i++) { + token_T* token = hb_array_get(result->tokens, i); + hb_string_T type = token_to_string(token); + hb_buffer_append_string(&output, type); + free(type.data); + hb_buffer_append(&output, "\n"); + } + + puts(output.value); + print_time_diff(start, end, "lexing"); + + printf("\n"); + hb_arena_print_stats(arena); + } + + hb_arena_free(arena); + free(arena); free(output.value); free(source); From 1d2075fd0f1ab8ba59884a0d15bd1c3293e2db1a Mon Sep 17 00:00:00 2001 From: Michael Kohl Date: Tue, 17 Feb 2026 12:49:05 +0700 Subject: [PATCH 04/18] Remove unused includes --- src/include/parser_helpers.h | 2 -- src/include/token.h | 1 - src/lexer_peek_helpers.c | 1 - 3 files changed, 4 deletions(-) diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h index 325c7b3d0..b0b90c6c8 100644 --- a/src/include/parser_helpers.h +++ b/src/include/parser_helpers.h @@ -2,9 +2,7 @@ #define HERB_PARSER_HELPERS_H #include "ast_nodes.h" -#include "errors.h" #include "parser.h" -#include "token.h" #include "util/hb_array.h" #include "util/hb_buffer.h" #include "util/hb_string.h" diff --git a/src/include/token.h b/src/include/token.h index d55f6d968..34b8850d5 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -2,7 +2,6 @@ #define HERB_TOKEN_H #include "lexer_struct.h" -#include "position.h" #include "token_struct.h" #include "util/hb_arena.h" #include "util/hb_string.h" diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 9585d9aa7..5a07f1897 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -2,7 +2,6 @@ #include "include/lexer.h" #include "include/lexer_struct.h" #include "include/macros.h" -#include "include/token.h" #include "include/util/hb_string.h" #include From 1ac005f56c12dfe3a7a481801831a6fc6b12b825 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 02:25:49 +0100 Subject: [PATCH 05/18] Format --- ext/herb/extension.c | 1 - src/analyze.c | 8 ++++- src/analyze_conditional_elements.c | 5 +-- src/analyze_conditional_open_tags.c | 11 ++++--- src/herb.c | 6 +++- src/include/herb.h | 6 +++- src/parser.c | 49 ++++++++++++++++++++++++----- 7 files changed, 65 insertions(+), 21 deletions(-) diff --git a/ext/herb/extension.c b/ext/herb/extension.c index 0241d5b71..59a885da9 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -9,7 +9,6 @@ #include "../../src/include/util/hb_arena.h" #include "../../src/include/util/hb_arena_debug.h" - VALUE mHerb; VALUE cPosition; VALUE cLocation; diff --git a/src/analyze.c b/src/analyze.c index f204222ff..d489f815e 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -1481,7 +1481,13 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { if (keyword == NULL) { keyword = erb_keyword_from_analyzed_ruby(analyzed); } if (keyword != NULL && !token_value_empty(content_node->tag_closing)) { - append_erb_control_flow_scope_error(keyword, node->location.start, node->location.end, node->errors, context->arena); + append_erb_control_flow_scope_error( + keyword, + node->location.start, + node->location.end, + node->errors, + context->arena + ); } } diff --git a/src/analyze_conditional_elements.c b/src/analyze_conditional_elements.c index 6ec5f80fe..2d8e8552d 100644 --- a/src/analyze_conditional_elements.c +++ b/src/analyze_conditional_elements.c @@ -641,10 +641,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* } void herb_transform_conditional_elements(AST_DOCUMENT_NODE_T* document) { - conditional_transform_context_T context = { - .errors = document->base.errors, - .arena = document->arena - }; + conditional_transform_context_T context = { .errors = document->base.errors, .arena = document->arena }; herb_visit_node((AST_NODE_T*) document, transform_conditional_elements_visitor, &context); } diff --git a/src/analyze_conditional_open_tags.c b/src/analyze_conditional_open_tags.c index cdca735d7..c151c6de8 100644 --- a/src/analyze_conditional_open_tags.c +++ b/src/analyze_conditional_open_tags.c @@ -199,7 +199,11 @@ static token_T* get_first_branch_tag_name_token_unless(AST_ERB_UNLESS_NODE_T* un return result.tag ? result.tag->tag_name : NULL; } -static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_OPEN_TAG_NODE_T* second_tag, hb_arena_T* arena) { +static void add_multiple_tags_error_to_erb_node( + AST_NODE_T* erb_node, + AST_HTML_OPEN_TAG_NODE_T* second_tag, + hb_arena_T* arena +) { if (!erb_node || !second_tag) { return; } CONDITIONAL_ELEMENT_MULTIPLE_TAGS_ERROR_T* error = conditional_element_multiple_tags_error_init( @@ -577,10 +581,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void } void herb_transform_conditional_open_tags(AST_DOCUMENT_NODE_T* document) { - conditional_open_tags_context_T context = { - .errors = document->base.errors, - .arena = document->arena - }; + conditional_open_tags_context_T context = { .errors = document->base.errors, .arena = document->arena }; herb_visit_node((AST_NODE_T*) document, transform_conditional_open_tags_visitor, &context); } diff --git a/src/herb.c b/src/herb.c index 812d8d16b..321c3f040 100644 --- a/src/herb.c +++ b/src/herb.c @@ -40,7 +40,11 @@ HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_ return result; } -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options, hb_arena_T* arena) { +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse( + const char* source, + const parser_options_T* options, + hb_arena_T* arena +) { if (!source) { source = ""; } if (!arena) { return NULL; } diff --git a/src/include/herb.h b/src/include/herb.h index 033be9a0d..bd120cfd5 100644 --- a/src/include/herb.h +++ b/src/include/herb.h @@ -25,7 +25,11 @@ HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena); HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena); -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options, hb_arena_T* arena); +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse( + const char* source, + const parser_options_T* options, + hb_arena_T* arena +); HERB_EXPORTED_FUNCTION const char* herb_version(void); HERB_EXPORTED_FUNCTION const char* herb_prism_version(void); diff --git a/src/parser.c b/src/parser.c index 71ee13ce3..0908e2187 100644 --- a/src/parser.c +++ b/src/parser.c @@ -996,7 +996,13 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) { if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors, parser->arena); + append_unclosed_open_tag_error( + tag_name, + tag_name->location.start, + parser->current_token->location.start, + errors, + parser->arena + ); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -1059,7 +1065,13 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { } if (token_is(parser, TOKEN_EOF)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors, parser->arena); + append_unclosed_open_tag_error( + tag_name, + tag_name->location.start, + parser->current_token->location.start, + errors, + parser->arena + ); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -1228,7 +1240,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( token_T* unclosed = parser_pop_open_tag(parser); if (unclosed != NULL) { - append_missing_closing_tag_error(unclosed, unclosed->location.start, unclosed->location.end, errors, parser->arena); + append_missing_closing_tag_error( + unclosed, + unclosed->location.start, + unclosed->location.end, + errors, + parser->arena + ); token_free(unclosed); } } @@ -1530,9 +1548,19 @@ static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_ return hb_array_size(nodes); } -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena); - -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena) { +static hb_array_T* parser_build_elements_from_tags( + hb_array_T* nodes, + hb_array_T* errors, + bool strict, + hb_arena_T* arena +); + +static hb_array_T* parser_build_elements_from_tags( + hb_array_T* nodes, + hb_array_T* errors, + bool strict, + hb_arena_T* arena +) { hb_array_T* result = hb_array_init(hb_array_size(nodes)); for (size_t index = 0; index < hb_array_size(nodes); index++) { @@ -1578,8 +1606,13 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T ); } - AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = - ast_html_omitted_close_tag_node_init(open_tag->tag_name, end_position, end_position, hb_array_init(8), arena); + AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init( + open_tag->tag_name, + end_position, + end_position, + hb_array_init(8), + arena + ); AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init( (AST_NODE_T*) open_tag, From 74140a1432cb94e5ecc4be3af5ba458877d3c852 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 02:30:36 +0100 Subject: [PATCH 06/18] Update Java and Rust bindings --- java/herb_jni.c | 47 +++++++++++++++++++++++++++++++++++++++++++---- rust/src/ffi.rs | 7 ++++--- rust/src/herb.rs | 47 +++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/java/herb_jni.c b/java/herb_jni.c index f6acb6dfe..a2e2f618b 100644 --- a/java/herb_jni.c +++ b/java/herb_jni.c @@ -3,6 +3,8 @@ #include "../../src/include/extract.h" #include "../../src/include/herb.h" +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" #include "../../src/include/util/hb_buffer.h" #include @@ -61,11 +63,27 @@ Java_org_herb_Herb_parse(JNIEnv* env, jclass clazz, jstring source, jobject opti } } - AST_DOCUMENT_NODE_T* ast = herb_parse(src, &parser_options); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + + return NULL; + } + + AST_DOCUMENT_NODE_T* ast = herb_parse(src, &parser_options, arena); jobject result = CreateParseResult(env, ast, source); ast_node_free((AST_NODE_T*) ast); + hb_arena_free(arena); + free(arena); (*env)->ReleaseStringUTFChars(env, source, src); return result; @@ -75,11 +93,32 @@ JNIEXPORT jobject JNICALL Java_org_herb_Herb_lex(JNIEnv* env, jclass clazz, jstring source) { const char* src = (*env)->GetStringUTFChars(env, source, 0); - hb_array_T* tokens = herb_lex(src); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + herb_lex_result_T* lex_result = herb_lex(src, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } - jobject result = CreateLexResult(env, tokens, source); + jobject result = CreateLexResult(env, lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); + hb_arena_free(arena); + free(arena); (*env)->ReleaseStringUTFChars(env, source, src); return result; diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index 191f7a9ba..d530cfc4b 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -1,5 +1,6 @@ pub use crate::bindings::{ - ast_node_free, element_source_to_string, hb_array_get, hb_array_size, hb_buffer_init, - hb_buffer_value, hb_string_T, herb_extract, herb_extract_ruby_to_buffer_with_options, - herb_free_tokens, herb_lex, herb_parse, herb_prism_version, herb_version, token_type_to_string, + ast_node_free, element_source_to_string, hb_arena_free, hb_arena_init, hb_array_get, + hb_array_size, hb_buffer_init, hb_buffer_value, hb_string_T, herb_extract, + herb_extract_ruby_to_buffer_with_options, herb_free_lex_result, herb_lex, herb_parse, + herb_prism_version, herb_version, token_type_to_string, }; diff --git a/rust/src/herb.rs b/rust/src/herb.rs index ab9c70f11..eb25de22d 100644 --- a/rust/src/herb.rs +++ b/rust/src/herb.rs @@ -1,4 +1,4 @@ -use crate::bindings::{hb_array_T, hb_buffer_T, token_T}; +use crate::bindings::{hb_arena_T, hb_buffer_T, herb_lex_result_T, token_T}; use crate::convert::token_from_c; use crate::{LexResult, ParseResult}; use std::ffi::CString; @@ -40,12 +40,29 @@ impl Default for ExtractRubyOptions { pub fn lex(source: &str) -> Result { unsafe { let c_source = CString::new(source).map_err(|e| e.to_string())?; - let c_tokens = crate::ffi::herb_lex(c_source.as_ptr()); - if c_tokens.is_null() { + let arena = libc::malloc(std::mem::size_of::()) as *mut hb_arena_T; + + if arena.is_null() { + return Err("Failed to allocate arena".to_string()); + } + + if !crate::ffi::hb_arena_init(arena, 512 * 1024) { + libc::free(arena as *mut std::ffi::c_void); + + return Err("Failed to initialize arena".to_string()); + } + + let lex_result = crate::ffi::herb_lex(c_source.as_ptr(), arena); + + if lex_result.is_null() { + crate::ffi::hb_arena_free(arena); + libc::free(arena as *mut std::ffi::c_void); + return Err("Failed to lex source".to_string()); } + let c_tokens = (*lex_result).tokens; let array_size = crate::ffi::hb_array_size(c_tokens); let mut tokens = Vec::with_capacity(array_size); @@ -57,8 +74,10 @@ pub fn lex(source: &str) -> Result { } } - let mut c_tokens_ptr = c_tokens; - crate::ffi::herb_free_tokens(&mut c_tokens_ptr as *mut *mut hb_array_T); + let mut lex_result_ptr = lex_result; + crate::ffi::herb_free_lex_result(&mut lex_result_ptr as *mut *mut herb_lex_result_T); + crate::ffi::hb_arena_free(arena); + libc::free(arena as *mut std::ffi::c_void); Ok(LexResult::new(tokens)) } @@ -71,6 +90,17 @@ pub fn parse(source: &str) -> Result { pub fn parse_with_options(source: &str, options: &ParserOptions) -> Result { unsafe { let c_source = CString::new(source).map_err(|e| e.to_string())?; + let arena = libc::malloc(std::mem::size_of::()) as *mut hb_arena_T; + + if arena.is_null() { + return Err("Failed to allocate arena".to_string()); + } + + if !crate::ffi::hb_arena_init(arena, 512 * 1024) { + libc::free(arena as *mut std::ffi::c_void); + + return Err("Failed to initialize arena".to_string()); + } let c_parser_options = crate::bindings::parser_options_T { track_whitespace: options.track_whitespace, @@ -78,9 +108,12 @@ pub fn parse_with_options(source: &str, options: &ParserOptions) -> Result Result Date: Wed, 18 Feb 2026 02:40:01 +0100 Subject: [PATCH 07/18] Add arena structs to rust build --- rust/build.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/build.rs b/rust/build.rs index 0c4cd0cb0..d97988d61 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -89,6 +89,7 @@ fn main() { .clang_arg(format!("-I{}", include_dir.display())) .clang_arg(format!("-I{}", prism_include.display())) .allowlist_function("herb_.*") + .allowlist_function("hb_arena_.*") .allowlist_function("hb_array_.*") .allowlist_function("hb_buffer_.*") .allowlist_function("token_type_to_string") @@ -100,6 +101,7 @@ fn main() { .allowlist_type("element_source_t") .allowlist_type("ast_node_type_T") .allowlist_type("error_type_T") + .allowlist_type("hb_arena_T") .allowlist_type("hb_array_T") .allowlist_type("hb_buffer_T") .allowlist_type("hb_string_T") From 67f362a073204a2f08c35005a723f54859a74aea Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 02:56:34 +0100 Subject: [PATCH 08/18] fix failing tests --- src/analyze.c | 103 ++++++++++++++++++++++++-- templates/src/analyze_transform.c.erb | 19 ----- 2 files changed, 97 insertions(+), 25 deletions(-) diff --git a/src/analyze.c b/src/analyze.c index d489f815e..ffdcd3017 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -356,11 +356,6 @@ static AST_NODE_T* create_control_node( hb_array_T* errors = erb_node->base.errors; erb_node->base.errors = NULL; - if (erb_node->analyzed_ruby != NULL) { - free_analyzed_ruby(erb_node->analyzed_ruby); - erb_node->analyzed_ruby = NULL; - } - position_T start_position = erb_node->tag_opening->location.start; position_T end_position = erb_content_end_position(erb_node); @@ -405,6 +400,11 @@ static AST_NODE_T* create_control_node( } } + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + switch (control_type) { case CONTROL_TYPE_IF: case CONTROL_TYPE_ELSIF: { @@ -1490,12 +1490,103 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { ); } } + } + + if (node->type == AST_ERB_IF_NODE) { + const AST_ERB_IF_NODE_T* if_node = (const AST_ERB_IF_NODE_T*) node; + + if (if_node->end_node == NULL) { check_erb_node_for_missing_end(node, context->arena); } + + if (if_node->statements != NULL) { + for (size_t i = 0; i < hb_array_size(if_node->statements); i++) { + AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(if_node->statements, i); + + if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } + } + } + + AST_NODE_T* subsequent = if_node->subsequent; + + while (subsequent != NULL) { + if (subsequent->type == AST_ERB_CONTENT_NODE) { + const AST_ERB_CONTENT_NODE_T* content_node = (const AST_ERB_CONTENT_NODE_T*) subsequent; + + if (content_node->parsed && !content_node->valid && content_node->analyzed_ruby != NULL) { + analyzed_ruby_T* analyzed = content_node->analyzed_ruby; + const char* keyword = erb_keyword_from_analyzed_ruby(analyzed); + + if (!token_value_empty(content_node->tag_closing)) { + append_erb_control_flow_scope_error( + keyword, + subsequent->location.start, + subsequent->location.end, + subsequent->errors, + context->arena + ); + } + } + } + + if (subsequent->type == AST_ERB_IF_NODE) { + const AST_ERB_IF_NODE_T* elsif_node = (const AST_ERB_IF_NODE_T*) subsequent; + + if (elsif_node->statements != NULL) { + for (size_t i = 0; i < hb_array_size(elsif_node->statements); i++) { + AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(elsif_node->statements, i); + + if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } + } + } + + subsequent = elsif_node->subsequent; + } else if (subsequent->type == AST_ERB_ELSE_NODE) { + const AST_ERB_ELSE_NODE_T* else_node = (const AST_ERB_ELSE_NODE_T*) subsequent; + + if (else_node->statements != NULL) { + for (size_t i = 0; i < hb_array_size(else_node->statements); i++) { + AST_NODE_T* statement = (AST_NODE_T*) hb_array_get(else_node->statements, i); + + if (statement != NULL) { herb_visit_node(statement, detect_invalid_erb_structures, context); } + } + } + + break; + } else { + break; + } + } + } + + if (node->type == AST_ERB_UNLESS_NODE || node->type == AST_ERB_WHILE_NODE || node->type == AST_ERB_UNTIL_NODE + || node->type == AST_ERB_FOR_NODE || node->type == AST_ERB_CASE_NODE || node->type == AST_ERB_CASE_MATCH_NODE + || node->type == AST_ERB_BEGIN_NODE || node->type == AST_ERB_BLOCK_NODE || node->type == AST_ERB_ELSE_NODE) { + herb_visit_child_nodes(node, detect_invalid_erb_structures, context); + } + + if (node->type == AST_ERB_UNLESS_NODE || node->type == AST_ERB_WHILE_NODE || node->type == AST_ERB_UNTIL_NODE + || node->type == AST_ERB_FOR_NODE || node->type == AST_ERB_CASE_NODE || node->type == AST_ERB_CASE_MATCH_NODE + || node->type == AST_ERB_BEGIN_NODE || node->type == AST_ERB_BLOCK_NODE || node->type == AST_ERB_ELSE_NODE) { + check_erb_node_for_missing_end(node, context->arena); if (is_loop_node) { context->loop_depth--; } if (is_begin_node) { context->rescue_depth--; } + + return false; } - return true; + if (node->type == AST_ERB_IF_NODE) { + if (is_loop_node) { context->loop_depth--; } + if (is_begin_node) { context->rescue_depth--; } + + return false; + } + + bool result = true; + + if (is_loop_node) { context->loop_depth--; } + if (is_begin_node) { context->rescue_depth--; } + + return result; } void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, bool strict) { diff --git a/templates/src/analyze_transform.c.erb b/templates/src/analyze_transform.c.erb index 91c97458a..f77eb2b96 100644 --- a/templates/src/analyze_transform.c.erb +++ b/templates/src/analyze_transform.c.erb @@ -1,24 +1,6 @@ #include "include/analyze.h" -#include "include/analyzed_ruby.h" #include "include/visitor.h" -static void free_analyzed_ruby_from_array(hb_array_T* array) { - if (array == NULL) { return; } - - for (size_t i = 0; i < hb_array_size(array); i++) { - AST_NODE_T* node = (AST_NODE_T*) hb_array_get(array, i); - - if (node != NULL && node->type == AST_ERB_CONTENT_NODE) { - AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node; - - if (erb_content_node->analyzed_ruby != NULL) { - free_analyzed_ruby(erb_content_node->analyzed_ruby); - erb_content_node->analyzed_ruby = NULL; - } - } - } -} - bool transform_erb_nodes(const AST_NODE_T* node, void* data) { analyze_ruby_context_T* context = (analyze_ruby_context_T*) data; context->parent = (AST_NODE_T*) node; @@ -30,7 +12,6 @@ bool transform_erb_nodes(const AST_NODE_T* node, void* data) { <%= node.struct_type %>* <%= node.human %> = (<%= node.struct_type %>*) node; hb_array_T* old_array = <%= node.human %>-><%= field.name %>; <%= node.human %>-><%= field.name %> = rewrite_node_array((AST_NODE_T*) node, <%= node.human %>-><%= field.name %>, context); - free_analyzed_ruby_from_array(old_array); hb_array_free(&old_array); } From 968b6b2d87a18d324147c0977c6fafa11d3cefa7 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 03:03:14 +0100 Subject: [PATCH 09/18] Add hb_arena_strdup and hb_arena_strndup --- src/include/util/hb_arena.h | 2 ++ src/parser.c | 13 ++++-------- src/token.c | 24 ++------------------- src/util/hb_arena.c | 31 +++++++++++++++++++++++++++ templates/src/errors.c.erb | 42 ++++--------------------------------- 5 files changed, 43 insertions(+), 69 deletions(-) diff --git a/src/include/util/hb_arena.h b/src/include/util/hb_arena.h index 8c89be6d5..8333781ca 100644 --- a/src/include/util/hb_arena.h +++ b/src/include/util/hb_arena.h @@ -22,6 +22,8 @@ typedef struct HB_ARENA_STRUCT { bool hb_arena_init(hb_arena_T* allocator, size_t initial_size); void* hb_arena_alloc(hb_arena_T* allocator, size_t size); +char* hb_arena_strdup(hb_arena_T* allocator, const char* string); +char* hb_arena_strndup(hb_arena_T* allocator, const char* string, size_t length); size_t hb_arena_position(hb_arena_T* allocator); size_t hb_arena_capacity(hb_arena_T* allocator); void hb_arena_reset(hb_arena_T* allocator); diff --git a/src/parser.c b/src/parser.c index 0908e2187..2f21df537 100644 --- a/src/parser.c +++ b/src/parser.c @@ -750,12 +750,7 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) token_T* equals_with_whitespace = hb_arena_alloc(parser->arena, sizeof(token_T)); equals_with_whitespace->type = TOKEN_EQUALS; - size_t value_length = strlen(equals_buffer.value); - char* arena_value = hb_arena_alloc(parser->arena, value_length + 1); - memcpy(arena_value, equals_buffer.value, value_length); - arena_value[value_length] = '\0'; - - equals_with_whitespace->value = arena_value; + equals_with_whitespace->value = hb_arena_strdup(parser->arena, equals_buffer.value); equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end }; equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end }; equals_with_whitespace->arena_allocated = true; @@ -801,7 +796,9 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) if (hb_array_size(attribute_name->children) > 0) { AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0); - if (first_child && first_child->content) { attribute_name_string = herb_strdup(first_child->content); } + if (first_child && first_child->content) { + attribute_name_string = hb_arena_strdup(parser->arena, first_child->content); + } } append_missing_attribute_value_error( @@ -812,8 +809,6 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) parser->arena ); - if (attribute_name_string) { free(attribute_name_string); } - AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init( NULL, hb_array_init(8), diff --git a/src/token.c b/src/token.c index 57e0536b1..45af9a120 100644 --- a/src/token.c +++ b/src/token.c @@ -21,18 +21,7 @@ token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) lexer->current_column = 0; } - if (value.data) { - char* arena_value = hb_arena_alloc(lexer->arena, value.length + 1); - if (arena_value) { - memcpy(arena_value, value.data, value.length); - arena_value[value.length] = '\0'; - token->value = arena_value; - } else { - token->value = NULL; - } - } else { - token->value = NULL; - } + token->value = value.data ? hb_arena_strndup(lexer->arena, value.data, value.length) : NULL; token->type = type; token->arena_allocated = true; @@ -137,16 +126,7 @@ token_T* token_copy(token_T* token, hb_arena_T* arena) { if (token->value) { if (arena) { - size_t value_length = strlen(token->value); - char* arena_value = hb_arena_alloc(arena, value_length + 1); - - if (arena_value) { - memcpy(arena_value, token->value, value_length); - arena_value[value_length] = '\0'; - new_token->value = arena_value; - } else { - new_token->value = NULL; - } + new_token->value = hb_arena_strdup(arena, token->value); } else { new_token->value = herb_strdup(token->value); diff --git a/src/util/hb_arena.c b/src/util/hb_arena.c index 88c0d31d6..bacc09e8f 100644 --- a/src/util/hb_arena.c +++ b/src/util/hb_arena.c @@ -109,6 +109,37 @@ void* hb_arena_alloc(hb_arena_T* allocator, size_t size) { return hb_arena_page_alloc_from(allocator->tail, required_size); } +char* hb_arena_strdup(hb_arena_T* allocator, const char* string) { + assert(allocator != NULL); + + if (string == NULL) { return NULL; } + + size_t length = strlen(string); + char* copy = hb_arena_alloc(allocator, length + 1); + + if (copy != NULL) { + memcpy(copy, string, length); + copy[length] = '\0'; + } + + return copy; +} + +char* hb_arena_strndup(hb_arena_T* allocator, const char* string, size_t length) { + assert(allocator != NULL); + + if (string == NULL) { return NULL; } + + char* copy = hb_arena_alloc(allocator, length + 1); + + if (copy != NULL) { + memcpy(copy, string, length); + copy[length] = '\0'; + } + + return copy; +} + size_t hb_arena_position(hb_arena_T* allocator) { size_t total = 0; diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb index cd9c12a2f..7c09d9e9d 100644 --- a/templates/src/errors.c.erb +++ b/templates/src/errors.c.erb @@ -63,44 +63,21 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit ); if (arena) { - size_t length = strlen(message); - char* string = hb_arena_alloc(arena, length + 1); - - if (string) { - memcpy(string, message, length); - string[length] = '\0'; - <%= error.human %>->base.message = string; - } + <%= error.human %>->base.message = hb_arena_strdup(arena, message); } else { <%= error.human %>->base.message = herb_strdup(message); free(message); } } else { if (arena) { - const char* template_string = "<%= error.message_template %>"; - size_t length = strlen(template_string); - char* string = hb_arena_alloc(arena, length + 1); - - if (string) { - memcpy(string, template_string, length); - string[length] = '\0'; - <%= error.human %>->base.message = string; - } + <%= error.human %>->base.message = hb_arena_strdup(arena, "<%= error.message_template %>"); } else { <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); } } <%- else -%> if (arena) { - const char* template_string = "<%= error.message_template %>"; - size_t length = strlen(template_string); - char* string = hb_arena_alloc(arena, length + 1); - - if (string) { - memcpy(string, template_string, length); - string[length] = '\0'; - <%= error.human %>->base.message = string; - } + <%= error.human %>->base.message = hb_arena_strdup(arena, "<%= error.message_template %>"); } else { <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); } @@ -117,18 +94,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- when Herb::Template::SizeTField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - if (arena) { - size_t length = strlen(<%= field.name %>); - char* string = hb_arena_alloc(arena, length + 1); - - if (string) { - memcpy(string, <%= field.name %>, length); - string[length] = '\0'; - <%= error.human %>-><%= field.name %> = string; - } - } else { - <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); - } + <%= error.human %>-><%= field.name %> = arena ? hb_arena_strdup(arena, <%= field.name %>) : herb_strdup(<%= field.name %>); <%- else -%> <%= field.inspect %> <%- end -%> From 418e2dd513cbe694fdb455ea483ad14012a3c580 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 03:13:50 +0100 Subject: [PATCH 10/18] Update ast_literal_node_init_from_token to accept arena --- src/ast_node.c | 6 +++--- src/include/ast_node.h | 3 ++- src/parser.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ast_node.c b/src/ast_node.c index be8dcd6ca..688802ba6 100644 --- a/src/ast_node.c +++ b/src/ast_node.c @@ -27,12 +27,12 @@ void ast_node_init(AST_NODE_T* node, const ast_node_type_T type, position_T star } } -AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) { - AST_LITERAL_NODE_T* literal = malloc(sizeof(AST_LITERAL_NODE_T)); +AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token, hb_arena_T* arena) { + AST_LITERAL_NODE_T* literal = hb_arena_alloc(arena, sizeof(AST_LITERAL_NODE_T)); ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL); - literal->content = herb_strdup(token->value); + literal->content = hb_arena_strdup(arena, token->value); return literal; } diff --git a/src/include/ast_node.h b/src/include/ast_node.h index 98b3801b0..2bfb59b81 100644 --- a/src/include/ast_node.h +++ b/src/include/ast_node.h @@ -5,11 +5,12 @@ #include "errors.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" void ast_node_init(AST_NODE_T* node, ast_node_type_T type, position_T start, position_T end, hb_array_T* errors); void ast_node_free(AST_NODE_T* node); -AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token); +AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token, hb_arena_T* arena); size_t ast_node_sizeof(void); size_t ast_node_child_count(AST_NODE_T* node); diff --git a/src/parser.c b/src/parser.c index 2f21df537..9683d5066 100644 --- a/src/parser.c +++ b/src/parser.c @@ -630,7 +630,7 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser //
if (token_is(parser, TOKEN_IDENTIFIER)) { token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors); - AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier); + AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->arena); token_free(identifier); hb_array_append(children, literal); From 69c35d7496aff1fef7a38963d071f922b5682c76 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:39:00 +0100 Subject: [PATCH 11/18] fix memory leaks --- src/analyze.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/analyze.c b/src/analyze.c index ffdcd3017..8425d03c2 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -727,6 +727,11 @@ static size_t process_control_structure( hb_array_T* when_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + location_T* then_keyword = NULL; const char* source = erb_content->content ? erb_content->content->value : NULL; @@ -769,6 +774,11 @@ static size_t process_control_structure( hb_array_T* in_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + location_T* in_then_keyword = NULL; const char* in_source = erb_content->content ? erb_content->content->value : NULL; @@ -829,6 +839,11 @@ static size_t process_control_structure( hb_array_T* else_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + else_clause = ast_erb_else_node_init( next_erb->tag_opening, next_erb->content, @@ -855,6 +870,11 @@ static size_t process_control_structure( hb_array_T* end_errors = end_erb->base.errors; end_erb->base.errors = NULL; + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, @@ -983,6 +1003,11 @@ static size_t process_control_structure( hb_array_T* else_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + else_clause = ast_erb_else_node_init( next_erb->tag_opening, next_erb->content, @@ -1028,6 +1053,11 @@ static size_t process_control_structure( hb_array_T* ensure_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + ensure_clause = ast_erb_ensure_node_init( next_erb->tag_opening, next_erb->content, @@ -1054,6 +1084,11 @@ static size_t process_control_structure( hb_array_T* end_errors = end_erb->base.errors; end_erb->base.errors = NULL; + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, @@ -1128,6 +1163,11 @@ static size_t process_control_structure( position_T close_end_pos = erb_content_end_position(close_erb); + if (close_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(close_erb->analyzed_ruby); + close_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( close_erb->tag_opening, close_erb->content, @@ -1208,6 +1248,11 @@ static size_t process_control_structure( position_T end_erb_final_pos = erb_content_end_position(end_erb); + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, @@ -1251,6 +1296,11 @@ static size_t process_subsequent_block( index = process_block_children(node, array, index, children, context, parent_type); + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type, context->arena); if (subsequent_node) { From 35b961081d180e7d1f5ea61d4ba290d491107093 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:39:16 +0100 Subject: [PATCH 12/18] Fix java tests --- java/herb_jni.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/java/herb_jni.c b/java/herb_jni.c index a2e2f618b..ba7d68d0e 100644 --- a/java/herb_jni.c +++ b/java/herb_jni.c @@ -82,8 +82,6 @@ Java_org_herb_Herb_parse(JNIEnv* env, jclass clazz, jstring source, jobject opti jobject result = CreateParseResult(env, ast, source); ast_node_free((AST_NODE_T*) ast); - hb_arena_free(arena); - free(arena); (*env)->ReleaseStringUTFChars(env, source, src); return result; @@ -117,8 +115,6 @@ Java_org_herb_Herb_lex(JNIEnv* env, jclass clazz, jstring source) { jobject result = CreateLexResult(env, lex_result->tokens, source); herb_free_lex_result(&lex_result); - hb_arena_free(arena); - free(arena); (*env)->ReleaseStringUTFChars(env, source, src); return result; From c85180be94ac054f2e70e70a7266285d01cf9e94 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:42:52 +0100 Subject: [PATCH 13/18] Fix rust tests --- rust/src/herb.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rust/src/herb.rs b/rust/src/herb.rs index eb25de22d..7400abacc 100644 --- a/rust/src/herb.rs +++ b/rust/src/herb.rs @@ -76,8 +76,6 @@ pub fn lex(source: &str) -> Result { let mut lex_result_ptr = lex_result; crate::ffi::herb_free_lex_result(&mut lex_result_ptr as *mut *mut herb_lex_result_T); - crate::ffi::hb_arena_free(arena); - libc::free(arena as *mut std::ffi::c_void); Ok(LexResult::new(tokens)) } @@ -123,8 +121,6 @@ pub fn parse_with_options(source: &str, options: &ParserOptions) -> Result Date: Wed, 18 Feb 2026 03:30:14 +0100 Subject: [PATCH 14/18] C: Allow to resue arena beteween parse/lex calls --- ext/herb/arena.c | 147 ++++++++++++++ ext/herb/arena.h | 21 ++ ext/herb/extconf.rb | 2 + ext/herb/extension.c | 34 +++- .../packages/browser/test/browser.test.ts | 135 +++++++++++++ .../packages/node-wasm/test/node-wasm.test.ts | 135 +++++++++++++ javascript/packages/node/binding.gyp | 1 + javascript/packages/node/extension/arena.cpp | 191 ++++++++++++++++++ javascript/packages/node/extension/arena.h | 22 ++ javascript/packages/node/extension/herb.cpp | 49 ++++- javascript/packages/node/test/node.test.ts | 113 +++++++++++ templates/src/ast_nodes.c.erb | 4 +- templates/src/include/ast_nodes.h.erb | 1 + test/arena_test.rb | 123 +++++++++++ wasm/arena.cpp | 70 +++++++ wasm/arena.h | 16 ++ wasm/herb-wasm.cpp | 44 +++- 17 files changed, 1080 insertions(+), 28 deletions(-) create mode 100644 ext/herb/arena.c create mode 100644 ext/herb/arena.h create mode 100644 javascript/packages/node/extension/arena.cpp create mode 100644 javascript/packages/node/extension/arena.h create mode 100644 test/arena_test.rb create mode 100644 wasm/arena.cpp create mode 100644 wasm/arena.h diff --git a/ext/herb/arena.c b/ext/herb/arena.c new file mode 100644 index 000000000..3abdda3c5 --- /dev/null +++ b/ext/herb/arena.c @@ -0,0 +1,147 @@ +#include + +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" +#include "../../src/include/util/hb_arena_debug.h" + +#include "arena.h" + +VALUE cArena; + +typedef struct { + hb_arena_T* arena; + bool initialized; +} herb_arena_wrapper_T; + +static void herb_arena_free(void* data) { + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) data; + if (wrapper->arena && wrapper->initialized) { + hb_arena_free(wrapper->arena); + free(wrapper->arena); + } + free(wrapper); +} + +static size_t herb_arena_memsize(const void* data) { + const herb_arena_wrapper_T* wrapper = (const herb_arena_wrapper_T*) data; + if (wrapper->arena && wrapper->initialized) { + return sizeof(herb_arena_wrapper_T) + hb_arena_capacity(wrapper->arena); + } + return sizeof(herb_arena_wrapper_T); +} + +const rb_data_type_t herb_arena_type = { + .wrap_struct_name = "Herb::Arena", + .function = { + .dmark = NULL, + .dfree = herb_arena_free, + .dsize = herb_arena_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +VALUE Arena_allocate(VALUE klass) { + herb_arena_wrapper_T* wrapper = malloc(sizeof(herb_arena_wrapper_T)); + wrapper->arena = NULL; + wrapper->initialized = false; + return TypedData_Wrap_Struct(klass, &herb_arena_type, wrapper); +} + +VALUE Arena_initialize(int argc, VALUE* argv, VALUE self) { + VALUE options; + rb_scan_args(argc, argv, "0:", &options); + + size_t initial_size = KB(512); + + if (!NIL_P(options)) { + VALUE size_val = rb_hash_lookup(options, rb_utf8_str_new_cstr("size")); + if (NIL_P(size_val)) { size_val = rb_hash_lookup(options, ID2SYM(rb_intern("size"))); } + if (!NIL_P(size_val)) { initial_size = NUM2SIZET(size_val); } + } + + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + wrapper->arena = malloc(sizeof(hb_arena_T)); + if (!wrapper->arena) { + rb_raise(rb_eNoMemError, "Failed to allocate arena"); + } + + if (!hb_arena_init(wrapper->arena, initial_size)) { + free(wrapper->arena); + wrapper->arena = NULL; + rb_raise(rb_eRuntimeError, "Failed to initialize arena"); + } + + wrapper->initialized = true; + return self; +} + +VALUE Arena_reset(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { + rb_raise(rb_eRuntimeError, "Arena not initialized"); + } + + hb_arena_reset(wrapper->arena); + return self; +} + +VALUE Arena_position(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { + rb_raise(rb_eRuntimeError, "Arena not initialized"); + } + + return SIZET2NUM(hb_arena_position(wrapper->arena)); +} + +VALUE Arena_capacity(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { + rb_raise(rb_eRuntimeError, "Arena not initialized"); + } + + return SIZET2NUM(hb_arena_capacity(wrapper->arena)); +} + +VALUE Arena_stats(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { + rb_raise(rb_eRuntimeError, "Arena not initialized"); + } + + hb_arena_print_stats(wrapper->arena); + return Qnil; +} + +hb_arena_T* get_arena_from_value(VALUE arena_obj) { + if (NIL_P(arena_obj)) return NULL; + + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(arena_obj, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { + rb_raise(rb_eRuntimeError, "Arena not initialized"); + } + + return wrapper->arena; +} + +void Init_herb_arena(VALUE mHerb) { + cArena = rb_define_class_under(mHerb, "Arena", rb_cObject); + rb_define_alloc_func(cArena, Arena_allocate); + rb_define_method(cArena, "initialize", Arena_initialize, -1); + rb_define_method(cArena, "reset", Arena_reset, 0); + rb_define_method(cArena, "position", Arena_position, 0); + rb_define_method(cArena, "capacity", Arena_capacity, 0); + rb_define_method(cArena, "stats", Arena_stats, 0); +} diff --git a/ext/herb/arena.h b/ext/herb/arena.h new file mode 100644 index 000000000..6c0c7c631 --- /dev/null +++ b/ext/herb/arena.h @@ -0,0 +1,21 @@ +#ifndef HERB_EXT_ARENA_H +#define HERB_EXT_ARENA_H + +#include +#include "../../src/include/util/hb_arena.h" + +extern VALUE cArena; +extern const rb_data_type_t herb_arena_type; + +VALUE Arena_allocate(VALUE klass); +VALUE Arena_initialize(int argc, VALUE* argv, VALUE self); +VALUE Arena_reset(VALUE self); +VALUE Arena_position(VALUE self); +VALUE Arena_capacity(VALUE self); +VALUE Arena_stats(VALUE self); + +hb_arena_T* get_arena_from_value(VALUE arena_obj); + +void Init_herb_arena(VALUE mHerb); + +#endif diff --git a/ext/herb/extconf.rb b/ext/herb/extconf.rb index fce654bf6..fa4d68575 100644 --- a/ext/herb/extconf.rb +++ b/ext/herb/extconf.rb @@ -52,6 +52,7 @@ ] core_src_files = [ + "arena.c", "extension.c", "nodes.c", "error_helpers.c", @@ -66,6 +67,7 @@ abort("could not find herb.h") unless find_header("herb.h") abort("could not find nodes.h (run `ruby templates/template.rb` to generate the file)") unless find_header("nodes.h") +abort("could not find arena.h") unless find_header("arena.h") abort("could not find extension.h") unless find_header("extension.h") abort("could not find extension_helpers.h") unless find_header("extension_helpers.h") diff --git a/ext/herb/extension.c b/ext/herb/extension.c index 59a885da9..7941ebc44 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -1,5 +1,6 @@ #include +#include "arena.h" #include "error_helpers.h" #include "extension.h" #include "extension_helpers.h" @@ -103,6 +104,7 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; bool print_arena_stats = false; + VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -120,24 +122,40 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + + external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } + hb_arena_T* arena; + bool owns_arena; - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; + if (!NIL_P(external_arena)) { + arena = get_arena_from_value(external_arena); + owns_arena = false; + } else { + arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + owns_arena = true; } AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); if (!root) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return Qnil; } + root->owns_arena = owns_arena; + VALUE result = create_parse_result(root, source); if (print_arena_stats) { hb_arena_print_stats(arena); } @@ -270,6 +288,8 @@ __attribute__((__visibility__("default"))) void Init_herb(void) { cLexResult = rb_define_class_under(mHerb, "LexResult", cResult); cParseResult = rb_define_class_under(mHerb, "ParseResult", cResult); + Init_herb_arena(mHerb); + rb_define_singleton_method(mHerb, "parse", Herb_parse, -1); rb_define_singleton_method(mHerb, "lex", Herb_lex, -1); rb_define_singleton_method(mHerb, "parse_file", Herb_parse_file, -1); diff --git a/javascript/packages/browser/test/browser.test.ts b/javascript/packages/browser/test/browser.test.ts index c4edd044d..aab53728a 100644 --- a/javascript/packages/browser/test/browser.test.ts +++ b/javascript/packages/browser/test/browser.test.ts @@ -10,6 +10,141 @@ describe("@herb-tools/browser", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("createArena function exists on backend", () => { + expect(Herb.backend.createArena).toBeDefined() + expect(typeof Herb.backend.createArena).toBe("function") + }) + + test("arena functions exist on backend", () => { + expect(Herb.backend.resetArena).toBeDefined() + expect(Herb.backend.freeArena).toBeDefined() + expect(Herb.backend.arenaPosition).toBeDefined() + expect(Herb.backend.arenaCapacity).toBeDefined() + }) + + test("creating an arena returns a valid id", () => { + const arenaId = Herb.backend.createArena(0) + expect(arenaId).toBeGreaterThan(0) + Herb.backend.freeArena(arenaId) + }) + + test("creating an arena with custom size", () => { + const arenaId = Herb.backend.createArena(1024 * 1024) + expect(arenaId).toBeGreaterThan(0) + expect(Herb.backend.arenaCapacity(arenaId)).toBeGreaterThanOrEqual(1024 * 1024) + Herb.backend.freeArena(arenaId) + }) + + test("arena position starts at zero", () => { + const arenaId = Herb.backend.createArena(0) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena position increases after parsing", () => { + const arenaId = Herb.backend.createArena(0) + const initialPosition = Herb.backend.arenaPosition(arenaId) + + Herb.backend.parse("
hello
", { arenaId }) + + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(initialPosition) + Herb.backend.freeArena(arenaId) + }) + + test("arena can be reused for multiple parse calls", () => { + const arenaId = Herb.backend.createArena(0) + + const result1 = Herb.backend.parse("
first
", { arenaId }) + const positionAfterFirst = Herb.backend.arenaPosition(arenaId) + + const result2 = Herb.backend.parse("second", { arenaId }) + const positionAfterSecond = Herb.backend.arenaPosition(arenaId) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + Herb.backend.freeArena(arenaId) + }) + + test("arena reset returns position to zero", () => { + const arenaId = Herb.backend.createArena(0) + + Herb.backend.parse("
hello
", { arenaId }) + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + + Herb.backend.resetArena(arenaId) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena can be reused after reset", () => { + const arenaId = Herb.backend.createArena(0) + + const result1 = Herb.backend.parse("
first
", { arenaId }) + Herb.backend.resetArena(arenaId) + + const result2 = Herb.backend.parse("second", { arenaId }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + Herb.backend.freeArena(arenaId) + }) + + test("multiple arenas can be used independently", () => { + const arenaId1 = Herb.backend.createArena(0) + const arenaId2 = Herb.backend.createArena(0) + + Herb.backend.parse("
first
", { arenaId: arenaId1 }) + const position1 = Herb.backend.arenaPosition(arenaId1) + + Herb.backend.parse("second", { arenaId: arenaId2 }) + const position2 = Herb.backend.arenaPosition(arenaId2) + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(Herb.backend.arenaPosition(arenaId1)).toBe(position1) + + Herb.backend.freeArena(arenaId1) + Herb.backend.freeArena(arenaId2) + }) + + test("parsing many templates with shared arena", () => { + const arenaId = Herb.backend.createArena(0) + + for (let i = 0; i < 100; i++) { + const result = Herb.backend.parse(`
template ${i}
`, { arenaId }) + expect(result).toBeDefined() + } + + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena reset allows reuse for batch processing", () => { + const arenaId = Herb.backend.createArena(0) + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arenaId }) + expect(result).toBeDefined() + } + Herb.backend.resetArena(arenaId) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + } + + Herb.backend.freeArena(arenaId) + }) + + test("invalid arena id returns -1 for position", () => { + expect(Herb.backend.arenaPosition(99999)).toBe(-1) + }) + + test("invalid arena id returns -1 for capacity", () => { + expect(Herb.backend.arenaCapacity(99999)).toBe(-1) + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/javascript/packages/node-wasm/test/node-wasm.test.ts b/javascript/packages/node-wasm/test/node-wasm.test.ts index 5732bf6b2..cbe466079 100644 --- a/javascript/packages/node-wasm/test/node-wasm.test.ts +++ b/javascript/packages/node-wasm/test/node-wasm.test.ts @@ -13,6 +13,141 @@ describe("@herb-tools/node-wasm", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("createArena function exists on backend", () => { + expect(Herb.backend.createArena).toBeDefined() + expect(typeof Herb.backend.createArena).toBe("function") + }) + + test("arena functions exist on backend", () => { + expect(Herb.backend.resetArena).toBeDefined() + expect(Herb.backend.freeArena).toBeDefined() + expect(Herb.backend.arenaPosition).toBeDefined() + expect(Herb.backend.arenaCapacity).toBeDefined() + }) + + test("creating an arena returns a valid id", () => { + const arenaId = Herb.backend.createArena(0) + expect(arenaId).toBeGreaterThan(0) + Herb.backend.freeArena(arenaId) + }) + + test("creating an arena with custom size", () => { + const arenaId = Herb.backend.createArena(1024 * 1024) + expect(arenaId).toBeGreaterThan(0) + expect(Herb.backend.arenaCapacity(arenaId)).toBeGreaterThanOrEqual(1024 * 1024) + Herb.backend.freeArena(arenaId) + }) + + test("arena position starts at zero", () => { + const arenaId = Herb.backend.createArena(0) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena position increases after parsing", () => { + const arenaId = Herb.backend.createArena(0) + const initialPosition = Herb.backend.arenaPosition(arenaId) + + Herb.backend.parse("
hello
", { arenaId }) + + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(initialPosition) + Herb.backend.freeArena(arenaId) + }) + + test("arena can be reused for multiple parse calls", () => { + const arenaId = Herb.backend.createArena(0) + + const result1 = Herb.backend.parse("
first
", { arenaId }) + const positionAfterFirst = Herb.backend.arenaPosition(arenaId) + + const result2 = Herb.backend.parse("second", { arenaId }) + const positionAfterSecond = Herb.backend.arenaPosition(arenaId) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + Herb.backend.freeArena(arenaId) + }) + + test("arena reset returns position to zero", () => { + const arenaId = Herb.backend.createArena(0) + + Herb.backend.parse("
hello
", { arenaId }) + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + + Herb.backend.resetArena(arenaId) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena can be reused after reset", () => { + const arenaId = Herb.backend.createArena(0) + + const result1 = Herb.backend.parse("
first
", { arenaId }) + Herb.backend.resetArena(arenaId) + + const result2 = Herb.backend.parse("second", { arenaId }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + Herb.backend.freeArena(arenaId) + }) + + test("multiple arenas can be used independently", () => { + const arenaId1 = Herb.backend.createArena(0) + const arenaId2 = Herb.backend.createArena(0) + + Herb.backend.parse("
first
", { arenaId: arenaId1 }) + const position1 = Herb.backend.arenaPosition(arenaId1) + + Herb.backend.parse("second", { arenaId: arenaId2 }) + const position2 = Herb.backend.arenaPosition(arenaId2) + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(Herb.backend.arenaPosition(arenaId1)).toBe(position1) + + Herb.backend.freeArena(arenaId1) + Herb.backend.freeArena(arenaId2) + }) + + test("parsing many templates with shared arena", () => { + const arenaId = Herb.backend.createArena(0) + + for (let i = 0; i < 100; i++) { + const result = Herb.backend.parse(`
template ${i}
`, { arenaId }) + expect(result).toBeDefined() + } + + expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + Herb.backend.freeArena(arenaId) + }) + + test("arena reset allows reuse for batch processing", () => { + const arenaId = Herb.backend.createArena(0) + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arenaId }) + expect(result).toBeDefined() + } + Herb.backend.resetArena(arenaId) + expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + } + + Herb.backend.freeArena(arenaId) + }) + + test("invalid arena id returns -1 for position", () => { + expect(Herb.backend.arenaPosition(99999)).toBe(-1) + }) + + test("invalid arena id returns -1 for capacity", () => { + expect(Herb.backend.arenaCapacity(99999)).toBe(-1) + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/javascript/packages/node/binding.gyp b/javascript/packages/node/binding.gyp index 68ea9aa43..300a7fca2 100644 --- a/javascript/packages/node/binding.gyp +++ b/javascript/packages/node/binding.gyp @@ -4,6 +4,7 @@ "target_name": "<(module_name)", "product_dir": "<(module_path)", "sources": [ + "./extension/arena.cpp", "./extension/error_helpers.cpp", "./extension/extension_helpers.cpp", "./extension/herb.cpp", diff --git a/javascript/packages/node/extension/arena.cpp b/javascript/packages/node/extension/arena.cpp new file mode 100644 index 000000000..2ef07448d --- /dev/null +++ b/javascript/packages/node/extension/arena.cpp @@ -0,0 +1,191 @@ +#include "arena.h" + +#include + +extern "C" { +#include "../extension/libherb/include/macros.h" +#include "../extension/libherb/include/util/hb_arena.h" +} + +napi_ref arena_constructor_ref = nullptr; + +typedef struct { + hb_arena_T* arena; + bool initialized; +} herb_arena_wrapper_T; + +static void Arena_destructor(napi_env env, void* finalize_data, void* finalize_hint) { + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) finalize_data; + if (wrapper->arena && wrapper->initialized) { + hb_arena_free(wrapper->arena); + free(wrapper->arena); + } + free(wrapper); +} + +napi_value Arena_constructor(napi_env env, napi_callback_info info) { + size_t argc = 1; + napi_value args[1]; + napi_value this_val; + napi_get_cb_info(env, info, &argc, args, &this_val, nullptr); + + size_t initial_size = KB(512); + + if (argc >= 1) { + napi_valuetype valuetype; + napi_typeof(env, args[0], &valuetype); + + if (valuetype == napi_object) { + bool has_size_prop; + napi_has_named_property(env, args[0], "size", &has_size_prop); + + if (has_size_prop) { + napi_value size_prop; + napi_get_named_property(env, args[0], "size", &size_prop); + uint32_t size_value; + napi_get_value_uint32(env, size_prop, &size_value); + initial_size = (size_t) size_value; + } + } + } + + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) malloc(sizeof(herb_arena_wrapper_T)); + if (!wrapper) { + napi_throw_error(env, nullptr, "Failed to allocate arena wrapper"); + return nullptr; + } + + wrapper->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!wrapper->arena) { + free(wrapper); + napi_throw_error(env, nullptr, "Failed to allocate arena"); + return nullptr; + } + + if (!hb_arena_init(wrapper->arena, initial_size)) { + free(wrapper->arena); + free(wrapper); + napi_throw_error(env, nullptr, "Failed to initialize arena"); + return nullptr; + } + + wrapper->initialized = true; + + napi_wrap(env, this_val, wrapper, Arena_destructor, nullptr, nullptr); + + return this_val; +} + +napi_value Arena_reset(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + hb_arena_reset(wrapper->arena); + + return this_val; +} + +napi_value Arena_get_position(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + napi_value result; + napi_create_uint32(env, (uint32_t) hb_arena_position(wrapper->arena), &result); + return result; +} + +napi_value Arena_get_capacity(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + napi_value result; + napi_create_uint32(env, (uint32_t) hb_arena_capacity(wrapper->arena), &result); + return result; +} + +napi_value Arena_free(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_value undefined; + napi_get_undefined(env, &undefined); + return undefined; + } + + hb_arena_free(wrapper->arena); + free(wrapper->arena); + wrapper->arena = nullptr; + wrapper->initialized = false; + + napi_value undefined; + napi_get_undefined(env, &undefined); + return undefined; +} + +hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val) { + if (!arena_val) return nullptr; + + napi_valuetype valuetype; + napi_typeof(env, arena_val, &valuetype); + if (valuetype != napi_object) return nullptr; + + herb_arena_wrapper_T* wrapper; + napi_status status = napi_unwrap(env, arena_val, (void**) &wrapper); + if (status != napi_ok) return nullptr; + + if (!wrapper || !wrapper->arena || !wrapper->initialized) return nullptr; + + return wrapper->arena; +} + +void Init_herb_arena(napi_env env, napi_value exports) { + napi_property_descriptor arena_properties[] = { + { "reset", nullptr, Arena_reset, nullptr, nullptr, nullptr, napi_default, nullptr }, + { "free", nullptr, Arena_free, nullptr, nullptr, nullptr, napi_default, nullptr }, + { "position", nullptr, nullptr, Arena_get_position, nullptr, nullptr, napi_default, nullptr }, + { "capacity", nullptr, nullptr, Arena_get_capacity, nullptr, nullptr, napi_default, nullptr }, + }; + + napi_value arena_class; + napi_define_class( + env, + "Arena", + NAPI_AUTO_LENGTH, + Arena_constructor, + nullptr, + sizeof(arena_properties) / sizeof(arena_properties[0]), + arena_properties, + &arena_class + ); + + napi_create_reference(env, arena_class, 1, &arena_constructor_ref); + napi_set_named_property(env, exports, "Arena", arena_class); +} diff --git a/javascript/packages/node/extension/arena.h b/javascript/packages/node/extension/arena.h new file mode 100644 index 000000000..471d3dea6 --- /dev/null +++ b/javascript/packages/node/extension/arena.h @@ -0,0 +1,22 @@ +#ifndef HERB_NODE_ARENA_H +#define HERB_NODE_ARENA_H + +#include + +extern "C" { +#include "../extension/libherb/include/util/hb_arena.h" +} + +extern napi_ref arena_constructor_ref; + +napi_value Arena_constructor(napi_env env, napi_callback_info info); +napi_value Arena_reset(napi_env env, napi_callback_info info); +napi_value Arena_get_position(napi_env env, napi_callback_info info); +napi_value Arena_get_capacity(napi_env env, napi_callback_info info); +napi_value Arena_free(napi_env env, napi_callback_info info); + +hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val); + +void Init_herb_arena(napi_env env, napi_value exports); + +#endif diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index 49ecdf8b6..e8cb0ca55 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -11,6 +11,7 @@ extern "C" { #include "../extension/libherb/include/util/hb_buffer.h" } +#include "arena.h" #include "error_helpers.h" #include "extension_helpers.h" #include "nodes.h" @@ -121,6 +122,7 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { if (!string) { return nullptr; } parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + hb_arena_T* external_arena = nullptr; if (argc >= 2) { napi_valuetype valuetype; @@ -165,30 +167,53 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { napi_get_value_bool(env, strict_prop, &strict_value); parser_options.strict = strict_value; } + + bool has_arena_prop; + napi_has_named_property(env, args[1], "arena", &has_arena_prop); + + if (has_arena_prop) { + napi_value arena_prop; + napi_get_named_property(env, args[1], "arena", &arena_prop); + external_arena = get_arena_from_value(env, arena_prop); + } } } - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + hb_arena_T* arena; + bool owns_arena; - if (!arena) { - free(string); - return nullptr; - } + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(string); - return nullptr; + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + owns_arena = true; } AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); if (!root) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } free(string); return nullptr; } + + root->owns_arena = owns_arena; + napi_value result = CreateParseResult(env, root, args[0]); ast_node_free((AST_NODE_T *) root); @@ -362,6 +387,8 @@ napi_value Herb_version(napi_env env, napi_callback_info info) { } napi_value Init(napi_env env, napi_value exports) { + Init_herb_arena(env, exports); + napi_property_descriptor descriptors[] = { { "parse", nullptr, Herb_parse, nullptr, nullptr, nullptr, napi_default, nullptr }, { "lex", nullptr, Herb_lex, nullptr, nullptr, nullptr, napi_default, nullptr }, diff --git a/javascript/packages/node/test/node.test.ts b/javascript/packages/node/test/node.test.ts index fe3fdd6dc..746576bb4 100644 --- a/javascript/packages/node/test/node.test.ts +++ b/javascript/packages/node/test/node.test.ts @@ -11,6 +11,119 @@ describe("@herb-tools/node", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("Arena class exists on backend", () => { + expect(Herb.backend.Arena).toBeDefined() + }) + + test("creating an arena with default size", () => { + const arena = new Herb.backend.Arena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + }) + + test("creating an arena with custom size", () => { + const arena = new Herb.backend.Arena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + }) + + test("arena position starts at zero", () => { + const arena = new Herb.backend.Arena() + expect(arena.position).toBe(0) + }) + + test("arena position increases after parsing", () => { + const arena = new Herb.backend.Arena() + const initialPosition = arena.position + + Herb.backend.parse("
hello
", { arena }) + + expect(arena.position).toBeGreaterThan(initialPosition) + }) + + test("arena can be reused for multiple parse calls", () => { + const arena = new Herb.backend.Arena() + + const result1 = Herb.backend.parse("
first
", { arena }) + const positionAfterFirst = arena.position + + const result2 = Herb.backend.parse("second", { arena }) + const positionAfterSecond = arena.position + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + }) + + test("arena reset returns position to zero", () => { + const arena = new Herb.backend.Arena() + + Herb.backend.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) + + arena.reset() + expect(arena.position).toBe(0) + }) + + test("arena can be reused after reset", () => { + const arena = new Herb.backend.Arena() + + const result1 = Herb.backend.parse("
first
", { arena }) + arena.reset() + + const result2 = Herb.backend.parse("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + }) + + test("multiple arenas can be used independently", () => { + const arena1 = new Herb.backend.Arena() + const arena2 = new Herb.backend.Arena() + + Herb.backend.parse("
first
", { arena: arena1 }) + const position1 = arena1.position + + Herb.backend.parse("second", { arena: arena2 }) + const position2 = arena2.position + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(arena1.position).toBe(position1) + }) + + test("parsing many templates with shared arena", () => { + const arena = new Herb.backend.Arena() + + for (let i = 0; i < 100; i++) { + const result = Herb.backend.parse(`
template ${i}
`, { arena }) + expect(result).toBeDefined() + } + + expect(arena.position).toBeGreaterThan(0) + }) + + test("arena reset allows reuse for batch processing", () => { + const arena = new Herb.backend.Arena() + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arena }) + expect(result).toBeDefined() + } + arena.reset() + expect(arena.position).toBe(0) + } + }) + + test("arena free releases resources", () => { + const arena = new Herb.backend.Arena() + Herb.backend.parse("
hello
", { arena }) + arena.free() + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb index 48cec5c24..07fe68f7c 100644 --- a/templates/src/ast_nodes.c.erb +++ b/templates/src/ast_nodes.c.erb @@ -57,6 +57,7 @@ <%- if node.human == "document_node" -%> <%= node.human %>->arena = arena; + <%= node.human %>->owns_arena = true; <%- end -%> return <%= node.human %>; @@ -181,10 +182,11 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % ast_free_arrays_recursive((AST_NODE_T*)<%= node.human %>); hb_arena_T* arena = <%= node.human %>->arena; + bool owns_arena = <%= node.human %>->owns_arena; ast_free_base_node(&<%= node.human %>->base); - if (arena != NULL) { + if (arena != NULL && owns_arena) { hb_arena_free(arena); free(arena); } diff --git a/templates/src/include/ast_nodes.h.erb b/templates/src/include/ast_nodes.h.erb index d0506e5c2..cd73455c9 100644 --- a/templates/src/include/ast_nodes.h.erb +++ b/templates/src/include/ast_nodes.h.erb @@ -35,6 +35,7 @@ typedef struct <%= node.struct_name %> { <%= arguments %> <%- if node.human == "document_node" -%> hb_arena_T* arena; + bool owns_arena; <%- end -%> } <%= node.struct_type %>; <%- end -%> diff --git a/test/arena_test.rb b/test/arena_test.rb new file mode 100644 index 000000000..e78ddddd5 --- /dev/null +++ b/test/arena_test.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +class ArenaTest < Minitest::Spec + test "Arena class exists" do + assert defined?(Herb::Arena) + end + + test "creating an arena with default size" do + arena = Herb::Arena.new + assert_instance_of Herb::Arena, arena + assert arena.capacity > 0 + end + + test "creating an arena with custom size" do + arena = Herb::Arena.new(size: 1024 * 1024) + assert_instance_of Herb::Arena, arena + assert arena.capacity >= 1024 * 1024 + end + + test "arena position starts at zero" do + arena = Herb::Arena.new + assert_equal 0, arena.position + end + + test "arena position increases after parsing" do + arena = Herb::Arena.new + initial_position = arena.position + + Herb.parse("
hello
", arena: arena) + + assert arena.position > initial_position + end + + test "arena can be reused for multiple parse calls" do + arena = Herb::Arena.new + + result1 = Herb.parse("
first
", arena: arena) + position_after_first = arena.position + + result2 = Herb.parse("second", arena: arena) + position_after_second = arena.position + + assert result1 + assert result2 + assert position_after_second > position_after_first + end + + test "arena reset returns position to zero" do + arena = Herb::Arena.new + + Herb.parse("
hello
", arena: arena) + assert arena.position > 0 + + arena.reset + assert_equal 0, arena.position + end + + test "arena can be reused after reset" do + arena = Herb::Arena.new + + result1 = Herb.parse("
first
", arena: arena) + arena.reset + + result2 = Herb.parse("second", arena: arena) + + assert result1 + assert result2 + end + + test "arena stats prints stats and returns nil" do + arena = Herb::Arena.new + result = arena.stats + assert_nil result + end + + test "multiple arenas can be used independently" do + arena1 = Herb::Arena.new + arena2 = Herb::Arena.new + + Herb.parse("
first
", arena: arena1) + position1 = arena1.position + + Herb.parse("second", arena: arena2) + position2 = arena2.position + + assert position1 > 0 + assert position2 > 0 + assert_equal position1, arena1.position + end + + test "parsing without arena still works" do + result = Herb.parse("
hello
") + assert result + assert result.value + end + + test "parsing many templates with shared arena" do + arena = Herb::Arena.new + + 100.times do |i| + result = Herb.parse("
template #{i}
", arena: arena) + assert result + assert result.value + end + + assert arena.position > 0 + end + + test "arena reset allows reuse for batch processing" do + arena = Herb::Arena.new + + 3.times do |batch| + 10.times do |i| + result = Herb.parse("
batch #{batch} item #{i}
", arena: arena) + assert result + end + arena.reset + assert_equal 0, arena.position + end + end +end diff --git a/wasm/arena.cpp b/wasm/arena.cpp new file mode 100644 index 000000000..9295866ed --- /dev/null +++ b/wasm/arena.cpp @@ -0,0 +1,70 @@ +#include "arena.h" + +#include +#include + +extern "C" { +#include "../src/include/macros.h" +} + +static std::map arena_registry; +static int next_arena_id = 1; + +int Herb_createArena(int initial_size) { + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!arena) return -1; + + size_t size = initial_size > 0 ? (size_t) initial_size : KB(512); + if (!hb_arena_init(arena, size)) { + free(arena); + return -1; + } + + int id = next_arena_id++; + arena_registry[id] = arena; + return id; +} + +void Herb_resetArena(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + hb_arena_reset(it->second); + } +} + +void Herb_freeArena(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + hb_arena_free(it->second); + free(it->second); + arena_registry.erase(it); + } +} + +int Herb_arenaPosition(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + return (int) hb_arena_position(it->second); + } + + return -1; +} + +int Herb_arenaCapacity(int arena_id) { + auto it = arena_registry.find(arena_id); + if (it != arena_registry.end() && it->second) { + return (int) hb_arena_capacity(it->second); + } + return -1; +} + +hb_arena_T* get_arena_by_id(int arena_id) { + auto it = arena_registry.find(arena_id); + if (it != arena_registry.end() && it->second) { + return it->second; + } + return nullptr; +} diff --git a/wasm/arena.h b/wasm/arena.h new file mode 100644 index 000000000..b28f92101 --- /dev/null +++ b/wasm/arena.h @@ -0,0 +1,16 @@ +#ifndef HERB_WASM_ARENA_H +#define HERB_WASM_ARENA_H + +extern "C" { +#include "../src/include/util/hb_arena.h" +} + +int Herb_createArena(int initial_size); +void Herb_resetArena(int arena_id); +void Herb_freeArena(int arena_id); +int Herb_arenaPosition(int arena_id); +int Herb_arenaCapacity(int arena_id); + +hb_arena_T* get_arena_by_id(int arena_id); + +#endif diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index 4002ac0b5..4aa896372 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -4,6 +4,7 @@ #include #include +#include "arena.h" #include "extension_helpers.h" extern "C" { @@ -54,6 +55,7 @@ val Herb_lex(const std::string& source) { val Herb_parse(const std::string& source, val options) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + hb_arena_T* external_arena = nullptr; if (!options.isUndefined() && !options.isNull() && options.typeOf().as() == "object") { if (options.hasOwnProperty("track_whitespace")) { @@ -73,27 +75,45 @@ val Herb_parse(const std::string& source, val options) { if (options.hasOwnProperty("strict")) { parser_options.strict = options["strict"].as(); } + + if (options.hasOwnProperty("arenaId")) { + int arena_id = options["arenaId"].as(); + external_arena = get_arena_by_id(arena_id); + } } - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + hb_arena_T* arena; + bool owns_arena; - if (!arena) { - return val::null(); - } + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return val::null(); + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + owns_arena = true; } AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options, arena); if (!root) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return val::null(); } + root->owns_arena = owns_arena; + val result = CreateParseResult(root, source); ast_node_free((AST_NODE_T *) root); @@ -151,4 +171,10 @@ EMSCRIPTEN_BINDINGS(herb_module) { function("extractRuby", &Herb_extract_ruby); function("extractHTML", &Herb_extract_html); function("version", &Herb_version); + + function("createArena", &Herb_createArena); + function("resetArena", &Herb_resetArena); + function("freeArena", &Herb_freeArena); + function("arenaPosition", &Herb_arenaPosition); + function("arenaCapacity", &Herb_arenaCapacity); } From fac81712b944de94804eea59ae207b7df4d29911 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:05:39 +0100 Subject: [PATCH 15/18] Improve JavaScript API and also support arena in lex --- ext/herb/extension.c | 89 ++++++-- .../packages/browser/src/wasm-backend.ts | 39 +++- .../packages/browser/test/browser.test.ts | 157 +++++++------ javascript/packages/core/src/arena.ts | 36 +++ javascript/packages/core/src/backend.ts | 9 +- javascript/packages/core/src/herb-backend.ts | 57 ++++- javascript/packages/core/src/index.ts | 2 + javascript/packages/core/src/lex-options.ts | 5 + .../packages/core/src/parser-options.ts | 5 +- .../packages/node-wasm/src/wasm-backend.ts | 39 +++- .../packages/node-wasm/test/node-wasm.test.ts | 157 +++++++------ javascript/packages/node/extension/herb.cpp | 207 ++++++++++++++---- javascript/packages/node/src/node-backend.ts | 37 +++- javascript/packages/node/test/node.test.ts | 101 ++++++--- wasm/herb-wasm.cpp | 38 +++- 15 files changed, 731 insertions(+), 247 deletions(-) create mode 100644 javascript/packages/core/src/arena.ts create mode 100644 javascript/packages/core/src/lex-options.ts diff --git a/ext/herb/extension.c b/ext/herb/extension.c index 7941ebc44..bd93b3f52 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -25,26 +25,41 @@ static VALUE Herb_lex(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source); bool print_arena_stats = false; + VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + + external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } + hb_arena_T* arena; + bool owns_arena; - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; + if (!NIL_P(external_arena)) { + arena = get_arena_from_value(external_arena); + owns_arena = false; + } else { + arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + owns_arena = true; } herb_lex_result_T* lex_result = herb_lex(string, arena); if (!lex_result) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return Qnil; } @@ -63,26 +78,41 @@ static VALUE Herb_lex_file(int argc, VALUE* argv, VALUE self) { char* file_path = (char*) check_string(path); bool print_arena_stats = false; + VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + + external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } + hb_arena_T* arena; + bool owns_arena; - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; + if (!NIL_P(external_arena)) { + arena = get_arena_from_value(external_arena); + owns_arena = false; + } else { + arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + owns_arena = true; } herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); if (!lex_result) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return Qnil; } @@ -176,6 +206,7 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; bool print_arena_stats = false; + VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -193,24 +224,40 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + + external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } + hb_arena_T* arena; + bool owns_arena; - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; + if (!NIL_P(external_arena)) { + arena = get_arena_from_value(external_arena); + owns_arena = false; + } else { + arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return Qnil; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return Qnil; + } + owns_arena = true; } AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); if (!root) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return Qnil; } + root->owns_arena = owns_arena; + VALUE result = create_parse_result(root, source_value); if (print_arena_stats) { hb_arena_print_stats(arena); } diff --git a/javascript/packages/browser/src/wasm-backend.ts b/javascript/packages/browser/src/wasm-backend.ts index 3c0b53af0..61d3c9173 100644 --- a/javascript/packages/browser/src/wasm-backend.ts +++ b/javascript/packages/browser/src/wasm-backend.ts @@ -1,6 +1,37 @@ import { name, version } from "../package.json" -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class WASMArenaBackend implements ArenaBackend { + private backend: any + private arenaId: number + + constructor(backend: any, arenaId: number) { + this.backend = backend + this.arenaId = arenaId + } + + get position(): number { + return this.backend.arenaPosition(this.arenaId) + } + + get capacity(): number { + return this.backend.arenaCapacity(this.arenaId) + } + + reset(): void { + this.backend.resetArena(this.arenaId) + } + + free(): void { + this.backend.freeArena(this.arenaId) + } + + toBackendOption(): { arenaId: number } { + return { arenaId: this.arenaId } + } +} export class HerbBackendWASM extends HerbBackend { lexFile(): never { @@ -14,4 +45,10 @@ export class HerbBackendWASM extends HerbBackend { backendVersion(): string { return `${name}@${version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const arenaId = (this.backend as any).createArena(options?.size ?? 0) + return new Arena(new WASMArenaBackend(this.backend, arenaId)) + } } diff --git a/javascript/packages/browser/test/browser.test.ts b/javascript/packages/browser/test/browser.test.ts index aab53728a..8bfe2f25e 100644 --- a/javascript/packages/browser/test/browser.test.ts +++ b/javascript/packages/browser/test/browser.test.ts @@ -11,137 +11,160 @@ describe("@herb-tools/browser", () => { }) describe("Arena", () => { - test("createArena function exists on backend", () => { - expect(Herb.backend.createArena).toBeDefined() - expect(typeof Herb.backend.createArena).toBe("function") - }) - - test("arena functions exist on backend", () => { - expect(Herb.backend.resetArena).toBeDefined() - expect(Herb.backend.freeArena).toBeDefined() - expect(Herb.backend.arenaPosition).toBeDefined() - expect(Herb.backend.arenaCapacity).toBeDefined() - }) - - test("creating an arena returns a valid id", () => { - const arenaId = Herb.backend.createArena(0) - expect(arenaId).toBeGreaterThan(0) - Herb.backend.freeArena(arenaId) + test("createArena returns an Arena", () => { + const arena = Herb.createArena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + arena.free() }) test("creating an arena with custom size", () => { - const arenaId = Herb.backend.createArena(1024 * 1024) - expect(arenaId).toBeGreaterThan(0) - expect(Herb.backend.arenaCapacity(arenaId)).toBeGreaterThanOrEqual(1024 * 1024) - Herb.backend.freeArena(arenaId) + const arena = Herb.createArena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() }) test("arena position starts at zero", () => { - const arenaId = Herb.backend.createArena(0) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) - Herb.backend.freeArena(arenaId) + const arena = Herb.createArena() + expect(arena.position).toBe(0) + arena.free() }) test("arena position increases after parsing", () => { - const arenaId = Herb.backend.createArena(0) - const initialPosition = Herb.backend.arenaPosition(arenaId) + const arena = Herb.createArena() + const initialPosition = arena.position - Herb.backend.parse("
hello
", { arenaId }) + Herb.parse("
hello
", { arena }) - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(initialPosition) - Herb.backend.freeArena(arenaId) + expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() }) test("arena can be reused for multiple parse calls", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arenaId }) - const positionAfterFirst = Herb.backend.arenaPosition(arenaId) + const result1 = Herb.parse("
first
", { arena }) + const positionAfterFirst = arena.position - const result2 = Herb.backend.parse("second", { arenaId }) - const positionAfterSecond = Herb.backend.arenaPosition(arenaId) + const result2 = Herb.parse("second", { arena }) + const positionAfterSecond = arena.position expect(result1).toBeDefined() expect(result2).toBeDefined() expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) - Herb.backend.freeArena(arenaId) + arena.free() }) test("arena reset returns position to zero", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - Herb.backend.parse("
hello
", { arenaId }) - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + Herb.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) - Herb.backend.resetArena(arenaId) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) - Herb.backend.freeArena(arenaId) + arena.reset() + expect(arena.position).toBe(0) + arena.free() }) test("arena can be reused after reset", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arenaId }) - Herb.backend.resetArena(arenaId) + const result1 = Herb.parse("
first
", { arena }) + arena.reset() - const result2 = Herb.backend.parse("second", { arenaId }) + const result2 = Herb.parse("second", { arena }) expect(result1).toBeDefined() expect(result2).toBeDefined() - Herb.backend.freeArena(arenaId) + arena.free() }) test("multiple arenas can be used independently", () => { - const arenaId1 = Herb.backend.createArena(0) - const arenaId2 = Herb.backend.createArena(0) + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() - Herb.backend.parse("
first
", { arenaId: arenaId1 }) - const position1 = Herb.backend.arenaPosition(arenaId1) + Herb.parse("
first
", { arena: arena1 }) + const position1 = arena1.position - Herb.backend.parse("second", { arenaId: arenaId2 }) - const position2 = Herb.backend.arenaPosition(arenaId2) + Herb.parse("second", { arena: arena2 }) + const position2 = arena2.position expect(position1).toBeGreaterThan(0) expect(position2).toBeGreaterThan(0) - expect(Herb.backend.arenaPosition(arenaId1)).toBe(position1) + expect(arena1.position).toBe(position1) - Herb.backend.freeArena(arenaId1) - Herb.backend.freeArena(arenaId2) + arena1.free() + arena2.free() }) test("parsing many templates with shared arena", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() for (let i = 0; i < 100; i++) { - const result = Herb.backend.parse(`
template ${i}
`, { arenaId }) + const result = Herb.parse(`
template ${i}
`, { arena }) expect(result).toBeDefined() } - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) - Herb.backend.freeArena(arenaId) + expect(arena.position).toBeGreaterThan(0) + arena.free() }) test("arena reset allows reuse for batch processing", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() for (let batch = 0; batch < 3; batch++) { for (let i = 0; i < 10; i++) { - const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arenaId }) + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) expect(result).toBeDefined() } - Herb.backend.resetArena(arenaId) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + arena.reset() + expect(arena.position).toBe(0) } - Herb.backend.freeArena(arenaId) + arena.free() + }) + + test("arena free releases resources", () => { + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() }) - test("invalid arena id returns -1 for position", () => { - expect(Herb.backend.arenaPosition(99999)).toBe(-1) + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() }) - test("invalid arena id returns -1 for capacity", () => { - expect(Herb.backend.arenaCapacity(99999)).toBe(-1) + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) + arena.free() }) }) diff --git a/javascript/packages/core/src/arena.ts b/javascript/packages/core/src/arena.ts new file mode 100644 index 000000000..6562912bd --- /dev/null +++ b/javascript/packages/core/src/arena.ts @@ -0,0 +1,36 @@ +export interface ArenaBackend { + position: number + capacity: number + reset(): void + free(): void + toBackendOption(): unknown +} + +export class Arena { + private backend: ArenaBackend + + constructor(backend: ArenaBackend) { + this.backend = backend + } + + get position(): number { + return this.backend.position + } + + get capacity(): number { + return this.backend.capacity + } + + reset(): this { + this.backend.reset() + return this + } + + free(): void { + this.backend.free() + } + + toBackendOption(): unknown { + return this.backend.toBackendOption() + } +} diff --git a/javascript/packages/core/src/backend.ts b/javascript/packages/core/src/backend.ts index a00ca103d..a5b4701f4 100644 --- a/javascript/packages/core/src/backend.ts +++ b/javascript/packages/core/src/backend.ts @@ -1,14 +1,13 @@ import type { SerializedParseResult } from "./parse-result.js" import type { SerializedLexResult } from "./lex-result.js" -import type { ParserOptions } from "./parser-options.js" import type { ExtractRubyOptions } from "./extract-ruby-options.js" interface LibHerbBackendFunctions { - lex: (source: string) => SerializedLexResult - lexFile: (path: string) => SerializedLexResult + lex: (source: string, options?: Record) => SerializedLexResult + lexFile: (path: string, options?: Record) => SerializedLexResult - parse: (source: string, options?: ParserOptions) => SerializedParseResult - parseFile: (path: string) => SerializedParseResult + parse: (source: string, options?: Record) => SerializedParseResult + parseFile: (path: string, options?: Record) => SerializedParseResult extractRuby: (source: string, options?: ExtractRubyOptions) => string extractHTML: (source: string) => string diff --git a/javascript/packages/core/src/herb-backend.ts b/javascript/packages/core/src/herb-backend.ts index a6a77c13d..2afb65bd9 100644 --- a/javascript/packages/core/src/herb-backend.ts +++ b/javascript/packages/core/src/herb-backend.ts @@ -6,10 +6,16 @@ import { ParseResult } from "./parse-result.js" import { DEFAULT_PARSER_OPTIONS } from "./parser-options.js" import { DEFAULT_EXTRACT_RUBY_OPTIONS } from "./extract-ruby-options.js" +import type { Arena } from "./arena.js" import type { LibHerbBackend, BackendPromise } from "./backend.js" import type { ParserOptions } from "./parser-options.js" +import type { LexOptions } from "./lex-options.js" import type { ExtractRubyOptions } from "./extract-ruby-options.js" +export interface CreateArenaOptions { + size?: number +} + /** * The main Herb parser interface, providing methods to lex and parse input. */ @@ -44,25 +50,41 @@ export abstract class HerbBackend { /** * Lexes the given source string into a `LexResult`. * @param source - The source code to lex. + * @param options - Optional lexing options. * @returns A `LexResult` instance. * @throws Error if the backend is not loaded. */ - lex(source: string): LexResult { + lex(source: string, options?: LexOptions): LexResult { this.ensureBackend() - return LexResult.from(this.backend.lex(ensureString(source))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return LexResult.from(this.backend.lex(ensureString(source), mergedOptions)) } /** * Lexes a file. * @param path - The file path to lex. + * @param options - Optional lexing options. * @returns A `LexResult` instance. * @throws Error if the backend is not loaded. */ - lexFile(path: string): LexResult { + lexFile(path: string, options?: LexOptions): LexResult { this.ensureBackend() - return LexResult.from(this.backend.lexFile(ensureString(path))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return LexResult.from(this.backend.lexFile(ensureString(path), mergedOptions)) } /** @@ -75,7 +97,12 @@ export abstract class HerbBackend { parse(source: string, options?: ParserOptions): ParseResult { this.ensureBackend() - const mergedOptions = { ...DEFAULT_PARSER_OPTIONS, ...options } + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...DEFAULT_PARSER_OPTIONS, ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } return ParseResult.from(this.backend.parse(ensureString(source), mergedOptions)) } @@ -83,13 +110,21 @@ export abstract class HerbBackend { /** * Parses a file. * @param path - The file path to parse. + * @param options - Optional parsing options. * @returns A `ParseResult` instance. * @throws Error if the backend is not loaded. */ - parseFile(path: string): ParseResult { + parseFile(path: string, options?: ParserOptions): ParseResult { this.ensureBackend() - return ParseResult.from(this.backend.parseFile(ensureString(path))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...DEFAULT_PARSER_OPTIONS, ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return ParseResult.from(this.backend.parseFile(ensureString(path), mergedOptions)) } /** @@ -159,4 +194,12 @@ export abstract class HerbBackend { * @returns A string representing the backend version. */ abstract backendVersion(): string + + /** + * Creates a new Arena for memory allocation during parsing. + * @param options - Optional arena creation options. + * @returns An Arena instance. + * @throws Error if the backend is not loaded. + */ + abstract createArena(options?: CreateArenaOptions): Arena } diff --git a/javascript/packages/core/src/index.ts b/javascript/packages/core/src/index.ts index b8c2d488f..188c678d9 100644 --- a/javascript/packages/core/src/index.ts +++ b/javascript/packages/core/src/index.ts @@ -1,3 +1,4 @@ +export * from "./arena.js" export * from "./ast-utils.js" export * from "./backend.js" export * from "./diagnostic.js" @@ -6,6 +7,7 @@ export * from "./errors.js" export * from "./extract-ruby-options.js" export * from "./herb-backend.js" export * from "./levenshtein.js" +export * from "./lex-options.js" export * from "./lex-result.js" export * from "./location.js" export * from "./node-type-guards.js" diff --git a/javascript/packages/core/src/lex-options.ts b/javascript/packages/core/src/lex-options.ts new file mode 100644 index 000000000..cf2ba211b --- /dev/null +++ b/javascript/packages/core/src/lex-options.ts @@ -0,0 +1,5 @@ +import type { Arena } from "./arena.js" + +export interface LexOptions { + arena?: Arena +} diff --git a/javascript/packages/core/src/parser-options.ts b/javascript/packages/core/src/parser-options.ts index 4da90ab1c..01e69b665 100644 --- a/javascript/packages/core/src/parser-options.ts +++ b/javascript/packages/core/src/parser-options.ts @@ -1,10 +1,13 @@ +import type { Arena } from "./arena.js" + export interface ParserOptions { track_whitespace?: boolean analyze?: boolean strict?: boolean + arena?: Arena } -export const DEFAULT_PARSER_OPTIONS: ParserOptions = { +export const DEFAULT_PARSER_OPTIONS: Omit = { track_whitespace: false, analyze: true, strict: true, diff --git a/javascript/packages/node-wasm/src/wasm-backend.ts b/javascript/packages/node-wasm/src/wasm-backend.ts index 36e414816..2104e85c6 100644 --- a/javascript/packages/node-wasm/src/wasm-backend.ts +++ b/javascript/packages/node-wasm/src/wasm-backend.ts @@ -1,9 +1,46 @@ import { name, version } from "../package.json" -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class WASMArenaBackend implements ArenaBackend { + private backend: any + private arenaId: number + + constructor(backend: any, arenaId: number) { + this.backend = backend + this.arenaId = arenaId + } + + get position(): number { + return this.backend.arenaPosition(this.arenaId) + } + + get capacity(): number { + return this.backend.arenaCapacity(this.arenaId) + } + + reset(): void { + this.backend.resetArena(this.arenaId) + } + + free(): void { + this.backend.freeArena(this.arenaId) + } + + toBackendOption(): { arenaId: number } { + return { arenaId: this.arenaId } + } +} export class HerbBackendNodeWASM extends HerbBackend { backendVersion(): string { return `${name}@${version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const arenaId = (this.backend as any).createArena(options?.size ?? 0) + return new Arena(new WASMArenaBackend(this.backend, arenaId)) + } } diff --git a/javascript/packages/node-wasm/test/node-wasm.test.ts b/javascript/packages/node-wasm/test/node-wasm.test.ts index cbe466079..c4c827d98 100644 --- a/javascript/packages/node-wasm/test/node-wasm.test.ts +++ b/javascript/packages/node-wasm/test/node-wasm.test.ts @@ -14,137 +14,160 @@ describe("@herb-tools/node-wasm", () => { }) describe("Arena", () => { - test("createArena function exists on backend", () => { - expect(Herb.backend.createArena).toBeDefined() - expect(typeof Herb.backend.createArena).toBe("function") - }) - - test("arena functions exist on backend", () => { - expect(Herb.backend.resetArena).toBeDefined() - expect(Herb.backend.freeArena).toBeDefined() - expect(Herb.backend.arenaPosition).toBeDefined() - expect(Herb.backend.arenaCapacity).toBeDefined() - }) - - test("creating an arena returns a valid id", () => { - const arenaId = Herb.backend.createArena(0) - expect(arenaId).toBeGreaterThan(0) - Herb.backend.freeArena(arenaId) + test("createArena returns an Arena", () => { + const arena = Herb.createArena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + arena.free() }) test("creating an arena with custom size", () => { - const arenaId = Herb.backend.createArena(1024 * 1024) - expect(arenaId).toBeGreaterThan(0) - expect(Herb.backend.arenaCapacity(arenaId)).toBeGreaterThanOrEqual(1024 * 1024) - Herb.backend.freeArena(arenaId) + const arena = Herb.createArena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() }) test("arena position starts at zero", () => { - const arenaId = Herb.backend.createArena(0) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) - Herb.backend.freeArena(arenaId) + const arena = Herb.createArena() + expect(arena.position).toBe(0) + arena.free() }) test("arena position increases after parsing", () => { - const arenaId = Herb.backend.createArena(0) - const initialPosition = Herb.backend.arenaPosition(arenaId) + const arena = Herb.createArena() + const initialPosition = arena.position - Herb.backend.parse("
hello
", { arenaId }) + Herb.parse("
hello
", { arena }) - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(initialPosition) - Herb.backend.freeArena(arenaId) + expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() }) test("arena can be reused for multiple parse calls", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arenaId }) - const positionAfterFirst = Herb.backend.arenaPosition(arenaId) + const result1 = Herb.parse("
first
", { arena }) + const positionAfterFirst = arena.position - const result2 = Herb.backend.parse("second", { arenaId }) - const positionAfterSecond = Herb.backend.arenaPosition(arenaId) + const result2 = Herb.parse("second", { arena }) + const positionAfterSecond = arena.position expect(result1).toBeDefined() expect(result2).toBeDefined() expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) - Herb.backend.freeArena(arenaId) + arena.free() }) test("arena reset returns position to zero", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - Herb.backend.parse("
hello
", { arenaId }) - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) + Herb.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) - Herb.backend.resetArena(arenaId) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) - Herb.backend.freeArena(arenaId) + arena.reset() + expect(arena.position).toBe(0) + arena.free() }) test("arena can be reused after reset", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arenaId }) - Herb.backend.resetArena(arenaId) + const result1 = Herb.parse("
first
", { arena }) + arena.reset() - const result2 = Herb.backend.parse("second", { arenaId }) + const result2 = Herb.parse("second", { arena }) expect(result1).toBeDefined() expect(result2).toBeDefined() - Herb.backend.freeArena(arenaId) + arena.free() }) test("multiple arenas can be used independently", () => { - const arenaId1 = Herb.backend.createArena(0) - const arenaId2 = Herb.backend.createArena(0) + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() - Herb.backend.parse("
first
", { arenaId: arenaId1 }) - const position1 = Herb.backend.arenaPosition(arenaId1) + Herb.parse("
first
", { arena: arena1 }) + const position1 = arena1.position - Herb.backend.parse("second", { arenaId: arenaId2 }) - const position2 = Herb.backend.arenaPosition(arenaId2) + Herb.parse("second", { arena: arena2 }) + const position2 = arena2.position expect(position1).toBeGreaterThan(0) expect(position2).toBeGreaterThan(0) - expect(Herb.backend.arenaPosition(arenaId1)).toBe(position1) + expect(arena1.position).toBe(position1) - Herb.backend.freeArena(arenaId1) - Herb.backend.freeArena(arenaId2) + arena1.free() + arena2.free() }) test("parsing many templates with shared arena", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() for (let i = 0; i < 100; i++) { - const result = Herb.backend.parse(`
template ${i}
`, { arenaId }) + const result = Herb.parse(`
template ${i}
`, { arena }) expect(result).toBeDefined() } - expect(Herb.backend.arenaPosition(arenaId)).toBeGreaterThan(0) - Herb.backend.freeArena(arenaId) + expect(arena.position).toBeGreaterThan(0) + arena.free() }) test("arena reset allows reuse for batch processing", () => { - const arenaId = Herb.backend.createArena(0) + const arena = Herb.createArena() for (let batch = 0; batch < 3; batch++) { for (let i = 0; i < 10; i++) { - const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arenaId }) + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) expect(result).toBeDefined() } - Herb.backend.resetArena(arenaId) - expect(Herb.backend.arenaPosition(arenaId)).toBe(0) + arena.reset() + expect(arena.position).toBe(0) } - Herb.backend.freeArena(arenaId) + arena.free() + }) + + test("arena free releases resources", () => { + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() }) - test("invalid arena id returns -1 for position", () => { - expect(Herb.backend.arenaPosition(99999)).toBe(-1) + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() }) - test("invalid arena id returns -1 for capacity", () => { - expect(Herb.backend.arenaCapacity(99999)).toBe(-1) + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) + arena.free() }) }) diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index e8cb0ca55..8510a62f5 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -22,8 +22,8 @@ extern "C" { #include napi_value Herb_lex(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -34,24 +34,53 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { char* string = CheckString(env, args[0]); if (!string) { return nullptr; } - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + hb_arena_T* external_arena = nullptr; - if (!arena) { - free(string); - return nullptr; + if (argc >= 2) { + napi_valuetype valuetype; + napi_typeof(env, args[1], &valuetype); + + if (valuetype == napi_object) { + bool has_arena_prop; + napi_has_named_property(env, args[1], "arena", &has_arena_prop); + + if (has_arena_prop) { + napi_value arena_prop; + napi_get_named_property(env, args[1], "arena", &arena_prop); + external_arena = get_arena_from_value(env, arena_prop); + } + } } - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(string); - return nullptr; + hb_arena_T* arena; + bool owns_arena; + + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(string); + return nullptr; + } + owns_arena = true; } herb_lex_result_T* lex_result = herb_lex(string, arena); if (!lex_result) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } free(string); return nullptr; } @@ -65,8 +94,8 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { } napi_value Herb_lex_file(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -77,24 +106,53 @@ napi_value Herb_lex_file(napi_env env, napi_callback_info info) { char* file_path = CheckString(env, args[0]); if (!file_path) { return nullptr; } - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + hb_arena_T* external_arena = nullptr; + + if (argc >= 2) { + napi_valuetype valuetype; + napi_typeof(env, args[1], &valuetype); - if (!arena) { - free(file_path); - return nullptr; + if (valuetype == napi_object) { + bool has_arena_prop; + napi_has_named_property(env, args[1], "arena", &has_arena_prop); + + if (has_arena_prop) { + napi_value arena_prop; + napi_get_named_property(env, args[1], "arena", &arena_prop); + external_arena = get_arena_from_value(env, arena_prop); + } + } } - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(file_path); - return nullptr; + hb_arena_T* arena; + bool owns_arena; + + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + return nullptr; + } + owns_arena = true; } herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); if (!lex_result) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } free(file_path); return nullptr; } @@ -223,8 +281,8 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { } napi_value Herb_parse_file(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -243,31 +301,102 @@ napi_value Herb_parse_file(napi_env env, napi_callback_info info) { return nullptr; } - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + hb_arena_T* external_arena = nullptr; - if (!arena) { - free(file_path); - free(string); - return nullptr; + if (argc >= 2) { + napi_valuetype valuetype; + napi_typeof(env, args[1], &valuetype); + + if (valuetype == napi_object) { + napi_value track_whitespace_prop; + bool has_track_whitespace_prop; + napi_has_named_property(env, args[1], "track_whitespace", &has_track_whitespace_prop); + + if (has_track_whitespace_prop) { + napi_get_named_property(env, args[1], "track_whitespace", &track_whitespace_prop); + bool track_whitespace_value; + napi_get_value_bool(env, track_whitespace_prop, &track_whitespace_value); + + if (track_whitespace_value) { + parser_options.track_whitespace = true; + } + } + + napi_value analyze_prop; + bool has_analyze_prop; + napi_has_named_property(env, args[1], "analyze", &has_analyze_prop); + + if (has_analyze_prop) { + napi_get_named_property(env, args[1], "analyze", &analyze_prop); + bool analyze_value; + napi_get_value_bool(env, analyze_prop, &analyze_value); + + if (!analyze_value) { + parser_options.analyze = false; + } + } + + napi_value strict_prop; + bool has_strict_prop; + napi_has_named_property(env, args[1], "strict", &has_strict_prop); + + if (has_strict_prop) { + napi_get_named_property(env, args[1], "strict", &strict_prop); + bool strict_value; + napi_get_value_bool(env, strict_prop, &strict_value); + parser_options.strict = strict_value; + } + + bool has_arena_prop; + napi_has_named_property(env, args[1], "arena", &has_arena_prop); + + if (has_arena_prop) { + napi_value arena_prop; + napi_get_named_property(env, args[1], "arena", &arena_prop); + external_arena = get_arena_from_value(env, arena_prop); + } + } } - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(file_path); - free(string); - return nullptr; + hb_arena_T* arena; + bool owns_arena; + + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + free(file_path); + free(string); + return nullptr; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + free(file_path); + free(string); + return nullptr; + } + owns_arena = true; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); if (!root) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } free(file_path); free(string); return nullptr; } + root->owns_arena = owns_arena; + napi_value result = CreateParseResult(env, root, source_value); ast_node_free((AST_NODE_T *) root); diff --git a/javascript/packages/node/src/node-backend.ts b/javascript/packages/node/src/node-backend.ts index 57206b342..aabb67b3f 100644 --- a/javascript/packages/node/src/node-backend.ts +++ b/javascript/packages/node/src/node-backend.ts @@ -1,9 +1,44 @@ import packageJSON from "../package.json" with { type: "json" } -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class NodeArenaBackend implements ArenaBackend { + private nativeArena: any + + constructor(nativeArena: any) { + this.nativeArena = nativeArena + } + + get position(): number { + return this.nativeArena.position + } + + get capacity(): number { + return this.nativeArena.capacity + } + + reset(): void { + this.nativeArena.reset() + } + + free(): void { + this.nativeArena.free() + } + + toBackendOption(): { arena: any } { + return { arena: this.nativeArena } + } +} export class HerbBackendNode extends HerbBackend { backendVersion(): string { return `${packageJSON.name}@${packageJSON.version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const nativeArena = new (this.backend as any).Arena(options) + return new Arena(new NodeArenaBackend(nativeArena)) + } } diff --git a/javascript/packages/node/test/node.test.ts b/javascript/packages/node/test/node.test.ts index 746576bb4..06ffecc4a 100644 --- a/javascript/packages/node/test/node.test.ts +++ b/javascript/packages/node/test/node.test.ts @@ -12,114 +12,159 @@ describe("@herb-tools/node", () => { }) describe("Arena", () => { - test("Arena class exists on backend", () => { - expect(Herb.backend.Arena).toBeDefined() - }) - - test("creating an arena with default size", () => { - const arena = new Herb.backend.Arena() + test("createArena returns an Arena", () => { + const arena = Herb.createArena() expect(arena).toBeDefined() expect(arena.capacity).toBeGreaterThan(0) + arena.free() }) test("creating an arena with custom size", () => { - const arena = new Herb.backend.Arena({ size: 1024 * 1024 }) + const arena = Herb.createArena({ size: 1024 * 1024 }) expect(arena).toBeDefined() expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() }) test("arena position starts at zero", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() expect(arena.position).toBe(0) + arena.free() }) test("arena position increases after parsing", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() const initialPosition = arena.position - Herb.backend.parse("
hello
", { arena }) + Herb.parse("
hello
", { arena }) expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() }) test("arena can be reused for multiple parse calls", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arena }) + const result1 = Herb.parse("
first
", { arena }) const positionAfterFirst = arena.position - const result2 = Herb.backend.parse("second", { arena }) + const result2 = Herb.parse("second", { arena }) const positionAfterSecond = arena.position expect(result1).toBeDefined() expect(result2).toBeDefined() expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + arena.free() }) test("arena reset returns position to zero", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() - Herb.backend.parse("
hello
", { arena }) + Herb.parse("
hello
", { arena }) expect(arena.position).toBeGreaterThan(0) arena.reset() expect(arena.position).toBe(0) + arena.free() }) test("arena can be reused after reset", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() - const result1 = Herb.backend.parse("
first
", { arena }) + const result1 = Herb.parse("
first
", { arena }) arena.reset() - const result2 = Herb.backend.parse("second", { arena }) + const result2 = Herb.parse("second", { arena }) expect(result1).toBeDefined() expect(result2).toBeDefined() + arena.free() }) test("multiple arenas can be used independently", () => { - const arena1 = new Herb.backend.Arena() - const arena2 = new Herb.backend.Arena() + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() - Herb.backend.parse("
first
", { arena: arena1 }) + Herb.parse("
first
", { arena: arena1 }) const position1 = arena1.position - Herb.backend.parse("second", { arena: arena2 }) + Herb.parse("second", { arena: arena2 }) const position2 = arena2.position expect(position1).toBeGreaterThan(0) expect(position2).toBeGreaterThan(0) expect(arena1.position).toBe(position1) + + arena1.free() + arena2.free() }) test("parsing many templates with shared arena", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() for (let i = 0; i < 100; i++) { - const result = Herb.backend.parse(`
template ${i}
`, { arena }) + const result = Herb.parse(`
template ${i}
`, { arena }) expect(result).toBeDefined() } expect(arena.position).toBeGreaterThan(0) + arena.free() }) test("arena reset allows reuse for batch processing", () => { - const arena = new Herb.backend.Arena() + const arena = Herb.createArena() for (let batch = 0; batch < 3; batch++) { for (let i = 0; i < 10; i++) { - const result = Herb.backend.parse(`
batch ${batch} item ${i}
`, { arena }) + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) expect(result).toBeDefined() } arena.reset() expect(arena.position).toBe(0) } + + arena.free() }) test("arena free releases resources", () => { - const arena = new Herb.backend.Arena() - Herb.backend.parse("
hello
", { arena }) + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) arena.free() }) }) diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index 4aa896372..50e0a1ecd 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -26,23 +26,43 @@ extern "C" { using namespace emscripten; -val Herb_lex(const std::string& source) { - hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); +val Herb_lex(const std::string& source, val options) { + hb_arena_T* external_arena = nullptr; - if (!arena) { - return val::null(); + if (!options.isUndefined() && !options.isNull() && options.typeOf().as() == "object") { + if (options.hasOwnProperty("arenaId")) { + int arena_id = options["arenaId"].as(); + external_arena = get_arena_by_id(arena_id); + } } - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return val::null(); + hb_arena_T* arena; + bool owns_arena; + + if (external_arena) { + arena = external_arena; + owns_arena = false; + } else { + arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + + if (!arena) { + return val::null(); + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return val::null(); + } + owns_arena = true; } herb_lex_result_T* lex_result = herb_lex(source.c_str(), arena); if (!lex_result) { - hb_arena_free(arena); - free(arena); + if (owns_arena) { + hb_arena_free(arena); + free(arena); + } return val::null(); } From cf78e31f5ccc0f1b064957f9e5a9f02539c02e3d Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:18:20 +0100 Subject: [PATCH 16/18] Extract arena helpers --- ext/herb/arena.c | 37 ++++ ext/herb/arena.h | 10 + ext/herb/extension.c | 128 +++---------- javascript/packages/node/extension/arena.cpp | 44 +++++ javascript/packages/node/extension/arena.h | 9 + javascript/packages/node/extension/herb.cpp | 185 ++++--------------- wasm/arena.cpp | 37 ++++ wasm/arena.h | 11 ++ wasm/herb-wasm.cpp | 73 ++------ 9 files changed, 214 insertions(+), 320 deletions(-) diff --git a/ext/herb/arena.c b/ext/herb/arena.c index 3abdda3c5..c340a01f0 100644 --- a/ext/herb/arena.c +++ b/ext/herb/arena.c @@ -136,6 +136,43 @@ hb_arena_T* get_arena_from_value(VALUE arena_obj) { return wrapper->arena; } +VALUE get_arena_option_from_hash(VALUE options) { + if (NIL_P(options)) return Qnil; + + VALUE arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(arena)) { arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } + + return arena; +} + +bool setup_arena_context(VALUE external_arena, arena_context_T* context) { + if (!NIL_P(external_arena)) { + context->arena = get_arena_from_value(external_arena); + context->owns_arena = false; + return true; + } + + context->arena = malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = NULL; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = NULL; + } +} + void Init_herb_arena(VALUE mHerb) { cArena = rb_define_class_under(mHerb, "Arena", rb_cObject); rb_define_alloc_func(cArena, Arena_allocate); diff --git a/ext/herb/arena.h b/ext/herb/arena.h index 6c0c7c631..92d4c8dab 100644 --- a/ext/herb/arena.h +++ b/ext/herb/arena.h @@ -2,6 +2,7 @@ #define HERB_EXT_ARENA_H #include +#include #include "../../src/include/util/hb_arena.h" extern VALUE cArena; @@ -16,6 +17,15 @@ VALUE Arena_stats(VALUE self); hb_arena_T* get_arena_from_value(VALUE arena_obj); +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +VALUE get_arena_option_from_hash(VALUE options); +bool setup_arena_context(VALUE external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + void Init_herb_arena(VALUE mHerb); #endif diff --git a/ext/herb/extension.c b/ext/herb/extension.c index bd93b3f52..fb7790c28 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -25,47 +25,26 @@ static VALUE Herb_lex(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source); bool print_arena_stats = false; - VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } - - external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); - if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena; - bool owns_arena; - - if (!NIL_P(external_arena)) { - arena = get_arena_from_value(external_arena); - owns_arena = false; - } else { - arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; - } - owns_arena = true; - } + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } - herb_lex_result_T* lex_result = herb_lex(string, arena); + herb_lex_result_T* lex_result = herb_lex(string, context.arena); if (!lex_result) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return Qnil; } VALUE result = create_lex_result(lex_result->tokens, source); - if (print_arena_stats) { hb_arena_print_stats(arena); } + if (print_arena_stats) { hb_arena_print_stats(context.arena); } herb_free_lex_result(&lex_result); @@ -78,48 +57,27 @@ static VALUE Herb_lex_file(int argc, VALUE* argv, VALUE self) { char* file_path = (char*) check_string(path); bool print_arena_stats = false; - VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } - - external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); - if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena; - bool owns_arena; - - if (!NIL_P(external_arena)) { - arena = get_arena_from_value(external_arena); - owns_arena = false; - } else { - arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; - } - owns_arena = true; - } + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } - herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + herb_lex_result_T* lex_result = herb_lex_file(file_path, context.arena); if (!lex_result) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return Qnil; } VALUE source_value = read_file_to_ruby_string(file_path); VALUE result = create_lex_result(lex_result->tokens, source_value); - if (print_arena_stats) { hb_arena_print_stats(arena); } + if (print_arena_stats) { hb_arena_print_stats(context.arena); } herb_free_lex_result(&lex_result); @@ -134,7 +92,6 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; bool print_arena_stats = false; - VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -152,43 +109,23 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } - - external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); - if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena; - bool owns_arena; - - if (!NIL_P(external_arena)) { - arena = get_arena_from_value(external_arena); - owns_arena = false; - } else { - arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; - } - owns_arena = true; - } + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); if (!root) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return Qnil; } - root->owns_arena = owns_arena; + root->owns_arena = context.owns_arena; VALUE result = create_parse_result(root, source); - if (print_arena_stats) { hb_arena_print_stats(arena); } + if (print_arena_stats) { hb_arena_print_stats(context.arena); } ast_node_free((AST_NODE_T*) root); @@ -206,7 +143,6 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; bool print_arena_stats = false; - VALUE external_arena = Qnil; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -224,43 +160,23 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } - - external_arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); - if (NIL_P(external_arena)) { external_arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } } - hb_arena_T* arena; - bool owns_arena; - - if (!NIL_P(external_arena)) { - arena = get_arena_from_value(external_arena); - owns_arena = false; - } else { - arena = malloc(sizeof(hb_arena_T)); - if (!arena) { return Qnil; } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return Qnil; - } - owns_arena = true; - } + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); if (!root) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return Qnil; } - root->owns_arena = owns_arena; + root->owns_arena = context.owns_arena; VALUE result = create_parse_result(root, source_value); - if (print_arena_stats) { hb_arena_print_stats(arena); } + if (print_arena_stats) { hb_arena_print_stats(context.arena); } ast_node_free((AST_NODE_T*) root); diff --git a/javascript/packages/node/extension/arena.cpp b/javascript/packages/node/extension/arena.cpp index 2ef07448d..0ece9ae0f 100644 --- a/javascript/packages/node/extension/arena.cpp +++ b/javascript/packages/node/extension/arena.cpp @@ -166,6 +166,50 @@ hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val) { return wrapper->arena; } +hb_arena_T* get_arena_option_from_object(napi_env env, napi_value options) { + if (!options) return nullptr; + + napi_valuetype valuetype; + napi_typeof(env, options, &valuetype); + if (valuetype != napi_object) return nullptr; + + bool has_arena_prop; + napi_has_named_property(env, options, "arena", &has_arena_prop); + if (!has_arena_prop) return nullptr; + + napi_value arena_prop; + napi_get_named_property(env, options, "arena", &arena_prop); + return get_arena_from_value(env, arena_prop); +} + +bool setup_arena_context(napi_env env, hb_arena_T* external_arena, arena_context_T* context) { + if (external_arena) { + context->arena = external_arena; + context->owns_arena = false; + return true; + } + + context->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = nullptr; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = nullptr; + } +} + void Init_herb_arena(napi_env env, napi_value exports) { napi_property_descriptor arena_properties[] = { { "reset", nullptr, Arena_reset, nullptr, nullptr, nullptr, napi_default, nullptr }, diff --git a/javascript/packages/node/extension/arena.h b/javascript/packages/node/extension/arena.h index 471d3dea6..1b26550f7 100644 --- a/javascript/packages/node/extension/arena.h +++ b/javascript/packages/node/extension/arena.h @@ -17,6 +17,15 @@ napi_value Arena_free(napi_env env, napi_callback_info info); hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val); +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +hb_arena_T* get_arena_option_from_object(napi_env env, napi_value options); +bool setup_arena_context(napi_env env, hb_arena_T* external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + void Init_herb_arena(napi_env env, napi_value exports); #endif diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index 8510a62f5..3900db161 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -34,53 +34,18 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { char* string = CheckString(env, args[0]); if (!string) { return nullptr; } - hb_arena_T* external_arena = nullptr; - - if (argc >= 2) { - napi_valuetype valuetype; - napi_typeof(env, args[1], &valuetype); - - if (valuetype == napi_object) { - bool has_arena_prop; - napi_has_named_property(env, args[1], "arena", &has_arena_prop); - - if (has_arena_prop) { - napi_value arena_prop; - napi_get_named_property(env, args[1], "arena", &arena_prop); - external_arena = get_arena_from_value(env, arena_prop); - } - } - } - - hb_arena_T* arena; - bool owns_arena; - - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - free(string); - return nullptr; - } + hb_arena_T* external_arena = (argc >= 2) ? get_arena_option_from_object(env, args[1]) : nullptr; - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(string); - return nullptr; - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(string); + return nullptr; } - herb_lex_result_T* lex_result = herb_lex(string, arena); + herb_lex_result_T* lex_result = herb_lex(string, context.arena); if (!lex_result) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); free(string); return nullptr; } @@ -106,53 +71,18 @@ napi_value Herb_lex_file(napi_env env, napi_callback_info info) { char* file_path = CheckString(env, args[0]); if (!file_path) { return nullptr; } - hb_arena_T* external_arena = nullptr; - - if (argc >= 2) { - napi_valuetype valuetype; - napi_typeof(env, args[1], &valuetype); - - if (valuetype == napi_object) { - bool has_arena_prop; - napi_has_named_property(env, args[1], "arena", &has_arena_prop); - - if (has_arena_prop) { - napi_value arena_prop; - napi_get_named_property(env, args[1], "arena", &arena_prop); - external_arena = get_arena_from_value(env, arena_prop); - } - } - } + hb_arena_T* external_arena = (argc >= 2) ? get_arena_option_from_object(env, args[1]) : nullptr; - hb_arena_T* arena; - bool owns_arena; - - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - free(file_path); - return nullptr; - } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(file_path); - return nullptr; - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(file_path); + return nullptr; } - herb_lex_result_T* lex_result = herb_lex_file(file_path, arena); + herb_lex_result_T* lex_result = herb_lex_file(file_path, context.arena); if (!lex_result) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); free(file_path); return nullptr; } @@ -226,51 +156,25 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { parser_options.strict = strict_value; } - bool has_arena_prop; - napi_has_named_property(env, args[1], "arena", &has_arena_prop); - - if (has_arena_prop) { - napi_value arena_prop; - napi_get_named_property(env, args[1], "arena", &arena_prop); - external_arena = get_arena_from_value(env, arena_prop); - } + external_arena = get_arena_option_from_object(env, args[1]); } } - hb_arena_T* arena; - bool owns_arena; - - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - free(string); - return nullptr; - } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(string); - return nullptr; - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(string); + return nullptr; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); if (!root) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); free(string); return nullptr; } - root->owns_arena = owns_arena; + root->owns_arena = context.owns_arena; napi_value result = CreateParseResult(env, root, args[0]); @@ -348,54 +252,27 @@ napi_value Herb_parse_file(napi_env env, napi_callback_info info) { parser_options.strict = strict_value; } - bool has_arena_prop; - napi_has_named_property(env, args[1], "arena", &has_arena_prop); - - if (has_arena_prop) { - napi_value arena_prop; - napi_get_named_property(env, args[1], "arena", &arena_prop); - external_arena = get_arena_from_value(env, arena_prop); - } + external_arena = get_arena_option_from_object(env, args[1]); } } - hb_arena_T* arena; - bool owns_arena; - - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - free(file_path); - free(string); - return nullptr; - } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - free(file_path); - free(string); - return nullptr; - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(file_path); + free(string); + return nullptr; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); if (!root) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); free(file_path); free(string); return nullptr; } - root->owns_arena = owns_arena; + root->owns_arena = context.owns_arena; napi_value result = CreateParseResult(env, root, source_value); diff --git a/wasm/arena.cpp b/wasm/arena.cpp index 9295866ed..e27f85283 100644 --- a/wasm/arena.cpp +++ b/wasm/arena.cpp @@ -68,3 +68,40 @@ hb_arena_T* get_arena_by_id(int arena_id) { } return nullptr; } + +hb_arena_T* get_arena_option_from_object(emscripten::val options) { + if (options.isUndefined() || options.isNull()) return nullptr; + if (options.typeOf().as() != "object") return nullptr; + if (!options.hasOwnProperty("arenaId")) return nullptr; + + int arena_id = options["arenaId"].as(); + return get_arena_by_id(arena_id); +} + +bool setup_arena_context(hb_arena_T* external_arena, arena_context_T* context) { + if (external_arena) { + context->arena = external_arena; + context->owns_arena = false; + return true; + } + + context->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = nullptr; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = nullptr; + } +} diff --git a/wasm/arena.h b/wasm/arena.h index b28f92101..e1711327a 100644 --- a/wasm/arena.h +++ b/wasm/arena.h @@ -1,6 +1,8 @@ #ifndef HERB_WASM_ARENA_H #define HERB_WASM_ARENA_H +#include + extern "C" { #include "../src/include/util/hb_arena.h" } @@ -13,4 +15,13 @@ int Herb_arenaCapacity(int arena_id); hb_arena_T* get_arena_by_id(int arena_id); +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +hb_arena_T* get_arena_option_from_object(emscripten::val options); +bool setup_arena_context(hb_arena_T* external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + #endif diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index 50e0a1ecd..70b13543f 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -27,42 +27,17 @@ extern "C" { using namespace emscripten; val Herb_lex(const std::string& source, val options) { - hb_arena_T* external_arena = nullptr; + hb_arena_T* external_arena = get_arena_option_from_object(options); - if (!options.isUndefined() && !options.isNull() && options.typeOf().as() == "object") { - if (options.hasOwnProperty("arenaId")) { - int arena_id = options["arenaId"].as(); - external_arena = get_arena_by_id(arena_id); - } - } - - hb_arena_T* arena; - bool owns_arena; - - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - return val::null(); - } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return val::null(); - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(external_arena, &context)) { + return val::null(); } - herb_lex_result_T* lex_result = herb_lex(source.c_str(), arena); + herb_lex_result_T* lex_result = herb_lex(source.c_str(), context.arena); if (!lex_result) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return val::null(); } @@ -75,7 +50,6 @@ val Herb_lex(const std::string& source, val options) { val Herb_parse(const std::string& source, val options) { parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; - hb_arena_T* external_arena = nullptr; if (!options.isUndefined() && !options.isNull() && options.typeOf().as() == "object") { if (options.hasOwnProperty("track_whitespace")) { @@ -95,44 +69,23 @@ val Herb_parse(const std::string& source, val options) { if (options.hasOwnProperty("strict")) { parser_options.strict = options["strict"].as(); } - - if (options.hasOwnProperty("arenaId")) { - int arena_id = options["arenaId"].as(); - external_arena = get_arena_by_id(arena_id); - } } - hb_arena_T* arena; - bool owns_arena; + hb_arena_T* external_arena = get_arena_option_from_object(options); - if (external_arena) { - arena = external_arena; - owns_arena = false; - } else { - arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); - - if (!arena) { - return val::null(); - } - - if (!hb_arena_init(arena, KB(512))) { - free(arena); - return val::null(); - } - owns_arena = true; + arena_context_T context; + if (!setup_arena_context(external_arena, &context)) { + return val::null(); } - AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options, arena); + AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options, context.arena); if (!root) { - if (owns_arena) { - hb_arena_free(arena); - free(arena); - } + cleanup_arena_context(&context); return val::null(); } - root->owns_arena = owns_arena; + root->owns_arena = context.owns_arena; val result = CreateParseResult(root, source); From 309bd4a3d687c7b1521e7a2209e2cce074fc56d9 Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:45:29 +0100 Subject: [PATCH 17/18] RuboCop --- test/arena_test.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/arena_test.rb b/test/arena_test.rb index e78ddddd5..f0d0789fa 100644 --- a/test/arena_test.rb +++ b/test/arena_test.rb @@ -10,7 +10,7 @@ class ArenaTest < Minitest::Spec test "creating an arena with default size" do arena = Herb::Arena.new assert_instance_of Herb::Arena, arena - assert arena.capacity > 0 + assert arena.capacity.positive? end test "creating an arena with custom size" do @@ -51,7 +51,7 @@ class ArenaTest < Minitest::Spec arena = Herb::Arena.new Herb.parse("
hello
", arena: arena) - assert arena.position > 0 + assert arena.position.positive? arena.reset assert_equal 0, arena.position @@ -85,8 +85,8 @@ class ArenaTest < Minitest::Spec Herb.parse("second", arena: arena2) position2 = arena2.position - assert position1 > 0 - assert position2 > 0 + assert position1.positive? + assert position2.positive? assert_equal position1, arena1.position end @@ -105,7 +105,7 @@ class ArenaTest < Minitest::Spec assert result.value end - assert arena.position > 0 + assert arena.position.positive? end test "arena reset allows reuse for batch processing" do From 647ab6ed892b15d30cb55d2f328b60109914eebb Mon Sep 17 00:00:00 2001 From: Marco Roth Date: Wed, 18 Feb 2026 04:47:27 +0100 Subject: [PATCH 18/18] Lint --- ext/herb/arena.c | 28 ++++++++-------------------- ext/herb/arena.h | 2 +- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/ext/herb/arena.c b/ext/herb/arena.c index c340a01f0..e7ace0fc7 100644 --- a/ext/herb/arena.c +++ b/ext/herb/arena.c @@ -63,9 +63,7 @@ VALUE Arena_initialize(int argc, VALUE* argv, VALUE self) { TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); wrapper->arena = malloc(sizeof(hb_arena_T)); - if (!wrapper->arena) { - rb_raise(rb_eNoMemError, "Failed to allocate arena"); - } + if (!wrapper->arena) { rb_raise(rb_eNoMemError, "Failed to allocate arena"); } if (!hb_arena_init(wrapper->arena, initial_size)) { free(wrapper->arena); @@ -81,9 +79,7 @@ VALUE Arena_reset(VALUE self) { herb_arena_wrapper_T* wrapper; TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); - if (!wrapper->arena || !wrapper->initialized) { - rb_raise(rb_eRuntimeError, "Arena not initialized"); - } + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } hb_arena_reset(wrapper->arena); return self; @@ -93,9 +89,7 @@ VALUE Arena_position(VALUE self) { herb_arena_wrapper_T* wrapper; TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); - if (!wrapper->arena || !wrapper->initialized) { - rb_raise(rb_eRuntimeError, "Arena not initialized"); - } + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } return SIZET2NUM(hb_arena_position(wrapper->arena)); } @@ -104,9 +98,7 @@ VALUE Arena_capacity(VALUE self) { herb_arena_wrapper_T* wrapper; TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); - if (!wrapper->arena || !wrapper->initialized) { - rb_raise(rb_eRuntimeError, "Arena not initialized"); - } + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } return SIZET2NUM(hb_arena_capacity(wrapper->arena)); } @@ -115,29 +107,25 @@ VALUE Arena_stats(VALUE self) { herb_arena_wrapper_T* wrapper; TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); - if (!wrapper->arena || !wrapper->initialized) { - rb_raise(rb_eRuntimeError, "Arena not initialized"); - } + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } hb_arena_print_stats(wrapper->arena); return Qnil; } hb_arena_T* get_arena_from_value(VALUE arena_obj) { - if (NIL_P(arena_obj)) return NULL; + if (NIL_P(arena_obj)) { return NULL; } herb_arena_wrapper_T* wrapper; TypedData_Get_Struct(arena_obj, herb_arena_wrapper_T, &herb_arena_type, wrapper); - if (!wrapper->arena || !wrapper->initialized) { - rb_raise(rb_eRuntimeError, "Arena not initialized"); - } + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } return wrapper->arena; } VALUE get_arena_option_from_hash(VALUE options) { - if (NIL_P(options)) return Qnil; + if (NIL_P(options)) { return Qnil; } VALUE arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); if (NIL_P(arena)) { arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } diff --git a/ext/herb/arena.h b/ext/herb/arena.h index 92d4c8dab..3964318cd 100644 --- a/ext/herb/arena.h +++ b/ext/herb/arena.h @@ -1,9 +1,9 @@ #ifndef HERB_EXT_ARENA_H #define HERB_EXT_ARENA_H +#include "../../src/include/util/hb_arena.h" #include #include -#include "../../src/include/util/hb_arena.h" extern VALUE cArena; extern const rb_data_type_t herb_arena_type;