diff --git a/ext/herb/arena.c b/ext/herb/arena.c new file mode 100644 index 000000000..e7ace0fc7 --- /dev/null +++ b/ext/herb/arena.c @@ -0,0 +1,172 @@ +#include + +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" +#include "../../src/include/util/hb_arena_debug.h" + +#include "arena.h" + +VALUE cArena; + +typedef struct { + hb_arena_T* arena; + bool initialized; +} herb_arena_wrapper_T; + +static void herb_arena_free(void* data) { + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) data; + if (wrapper->arena && wrapper->initialized) { + hb_arena_free(wrapper->arena); + free(wrapper->arena); + } + free(wrapper); +} + +static size_t herb_arena_memsize(const void* data) { + const herb_arena_wrapper_T* wrapper = (const herb_arena_wrapper_T*) data; + if (wrapper->arena && wrapper->initialized) { + return sizeof(herb_arena_wrapper_T) + hb_arena_capacity(wrapper->arena); + } + return sizeof(herb_arena_wrapper_T); +} + +const rb_data_type_t herb_arena_type = { + .wrap_struct_name = "Herb::Arena", + .function = { + .dmark = NULL, + .dfree = herb_arena_free, + .dsize = herb_arena_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +VALUE Arena_allocate(VALUE klass) { + herb_arena_wrapper_T* wrapper = malloc(sizeof(herb_arena_wrapper_T)); + wrapper->arena = NULL; + wrapper->initialized = false; + return TypedData_Wrap_Struct(klass, &herb_arena_type, wrapper); +} + +VALUE Arena_initialize(int argc, VALUE* argv, VALUE self) { + VALUE options; + rb_scan_args(argc, argv, "0:", &options); + + size_t initial_size = KB(512); + + if (!NIL_P(options)) { + VALUE size_val = rb_hash_lookup(options, rb_utf8_str_new_cstr("size")); + if (NIL_P(size_val)) { size_val = rb_hash_lookup(options, ID2SYM(rb_intern("size"))); } + if (!NIL_P(size_val)) { initial_size = NUM2SIZET(size_val); } + } + + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + wrapper->arena = malloc(sizeof(hb_arena_T)); + if (!wrapper->arena) { rb_raise(rb_eNoMemError, "Failed to allocate arena"); } + + if (!hb_arena_init(wrapper->arena, initial_size)) { + free(wrapper->arena); + wrapper->arena = NULL; + rb_raise(rb_eRuntimeError, "Failed to initialize arena"); + } + + wrapper->initialized = true; + return self; +} + +VALUE Arena_reset(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } + + hb_arena_reset(wrapper->arena); + return self; +} + +VALUE Arena_position(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } + + return SIZET2NUM(hb_arena_position(wrapper->arena)); +} + +VALUE Arena_capacity(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } + + return SIZET2NUM(hb_arena_capacity(wrapper->arena)); +} + +VALUE Arena_stats(VALUE self) { + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(self, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } + + hb_arena_print_stats(wrapper->arena); + return Qnil; +} + +hb_arena_T* get_arena_from_value(VALUE arena_obj) { + if (NIL_P(arena_obj)) { return NULL; } + + herb_arena_wrapper_T* wrapper; + TypedData_Get_Struct(arena_obj, herb_arena_wrapper_T, &herb_arena_type, wrapper); + + if (!wrapper->arena || !wrapper->initialized) { rb_raise(rb_eRuntimeError, "Arena not initialized"); } + + return wrapper->arena; +} + +VALUE get_arena_option_from_hash(VALUE options) { + if (NIL_P(options)) { return Qnil; } + + VALUE arena = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena")); + if (NIL_P(arena)) { arena = rb_hash_lookup(options, ID2SYM(rb_intern("arena"))); } + + return arena; +} + +bool setup_arena_context(VALUE external_arena, arena_context_T* context) { + if (!NIL_P(external_arena)) { + context->arena = get_arena_from_value(external_arena); + context->owns_arena = false; + return true; + } + + context->arena = malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = NULL; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = NULL; + } +} + +void Init_herb_arena(VALUE mHerb) { + cArena = rb_define_class_under(mHerb, "Arena", rb_cObject); + rb_define_alloc_func(cArena, Arena_allocate); + rb_define_method(cArena, "initialize", Arena_initialize, -1); + rb_define_method(cArena, "reset", Arena_reset, 0); + rb_define_method(cArena, "position", Arena_position, 0); + rb_define_method(cArena, "capacity", Arena_capacity, 0); + rb_define_method(cArena, "stats", Arena_stats, 0); +} diff --git a/ext/herb/arena.h b/ext/herb/arena.h new file mode 100644 index 000000000..3964318cd --- /dev/null +++ b/ext/herb/arena.h @@ -0,0 +1,31 @@ +#ifndef HERB_EXT_ARENA_H +#define HERB_EXT_ARENA_H + +#include "../../src/include/util/hb_arena.h" +#include +#include + +extern VALUE cArena; +extern const rb_data_type_t herb_arena_type; + +VALUE Arena_allocate(VALUE klass); +VALUE Arena_initialize(int argc, VALUE* argv, VALUE self); +VALUE Arena_reset(VALUE self); +VALUE Arena_position(VALUE self); +VALUE Arena_capacity(VALUE self); +VALUE Arena_stats(VALUE self); + +hb_arena_T* get_arena_from_value(VALUE arena_obj); + +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +VALUE get_arena_option_from_hash(VALUE options); +bool setup_arena_context(VALUE external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + +void Init_herb_arena(VALUE mHerb); + +#endif diff --git a/ext/herb/extconf.rb b/ext/herb/extconf.rb index fce654bf6..fa4d68575 100644 --- a/ext/herb/extconf.rb +++ b/ext/herb/extconf.rb @@ -52,6 +52,7 @@ ] core_src_files = [ + "arena.c", "extension.c", "nodes.c", "error_helpers.c", @@ -66,6 +67,7 @@ abort("could not find herb.h") unless find_header("herb.h") abort("could not find nodes.h (run `ruby templates/template.rb` to generate the file)") unless find_header("nodes.h") +abort("could not find arena.h") unless find_header("arena.h") abort("could not find extension.h") unless find_header("extension.h") abort("could not find extension_helpers.h") unless find_header("extension_helpers.h") diff --git a/ext/herb/extension.c b/ext/herb/extension.c index d04974370..fb7790c28 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -1,10 +1,15 @@ #include +#include "arena.h" #include "error_helpers.h" #include "extension.h" #include "extension_helpers.h" #include "nodes.h" +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" +#include "../../src/include/util/hb_arena_debug.h" + VALUE mHerb; VALUE cPosition; VALUE cLocation; @@ -14,26 +19,67 @@ VALUE cResult; VALUE cLexResult; VALUE cParseResult; -static VALUE Herb_lex(VALUE self, VALUE source) { +static VALUE Herb_lex(int argc, VALUE* argv, VALUE self) { + VALUE source, options; + rb_scan_args(argc, argv, "1:", &source, &options); + char* string = (char*) check_string(source); + bool print_arena_stats = false; + + if (!NIL_P(options)) { + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } + + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } + + herb_lex_result_T* lex_result = herb_lex(string, context.arena); - hb_array_T* tokens = herb_lex(string); + if (!lex_result) { + cleanup_arena_context(&context); + return Qnil; + } + + VALUE result = create_lex_result(lex_result->tokens, source); - VALUE result = create_lex_result(tokens, source); + if (print_arena_stats) { hb_arena_print_stats(context.arena); } - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } -static VALUE Herb_lex_file(VALUE self, VALUE path) { +static VALUE Herb_lex_file(int argc, VALUE* argv, VALUE self) { + VALUE path, options; + rb_scan_args(argc, argv, "1:", &path, &options); + char* file_path = (char*) check_string(path); - hb_array_T* tokens = herb_lex_file(file_path); + bool print_arena_stats = false; + + if (!NIL_P(options)) { + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } + + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, context.arena); + + if (!lex_result) { + cleanup_arena_context(&context); + return Qnil; + } VALUE source_value = read_file_to_ruby_string(file_path); - VALUE result = create_lex_result(tokens, source_value); + VALUE result = create_lex_result(lex_result->tokens, source_value); - herb_free_tokens(&tokens); + if (print_arena_stats) { hb_arena_print_stats(context.arena); } + + herb_free_lex_result(&lex_result); return result; } @@ -45,6 +91,7 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source); parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + bool print_arena_stats = false; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -58,12 +105,28 @@ static VALUE Herb_parse(int argc, VALUE* argv, VALUE self) { VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict")); if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); } if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } + + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } + + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); + + if (!root) { + cleanup_arena_context(&context); + return Qnil; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + root->owns_arena = context.owns_arena; VALUE result = create_parse_result(root, source); + if (print_arena_stats) { hb_arena_print_stats(context.arena); } + ast_node_free((AST_NODE_T*) root); return result; @@ -79,6 +142,7 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { char* string = (char*) check_string(source_value); parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + bool print_arena_stats = false; if (!NIL_P(options)) { VALUE track_whitespace = rb_hash_lookup(options, rb_utf8_str_new_cstr("track_whitespace")); @@ -92,12 +156,28 @@ static VALUE Herb_parse_file(int argc, VALUE* argv, VALUE self) { VALUE strict = rb_hash_lookup(options, rb_utf8_str_new_cstr("strict")); if (NIL_P(strict)) { strict = rb_hash_lookup(options, ID2SYM(rb_intern("strict"))); } if (!NIL_P(strict)) { parser_options.strict = RTEST(strict); } + + VALUE arena_stats = rb_hash_lookup(options, rb_utf8_str_new_cstr("arena_stats")); + if (NIL_P(arena_stats)) { arena_stats = rb_hash_lookup(options, ID2SYM(rb_intern("arena_stats"))); } + if (!NIL_P(arena_stats) && RTEST(arena_stats)) { print_arena_stats = true; } + } + + arena_context_T context; + if (!setup_arena_context(get_arena_option_from_hash(options), &context)) { return Qnil; } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); + + if (!root) { + cleanup_arena_context(&context); + return Qnil; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + root->owns_arena = context.owns_arena; VALUE result = create_parse_result(root, source_value); + if (print_arena_stats) { hb_arena_print_stats(context.arena); } + ast_node_free((AST_NODE_T*) root); return result; @@ -171,10 +251,12 @@ __attribute__((__visibility__("default"))) void Init_herb(void) { cLexResult = rb_define_class_under(mHerb, "LexResult", cResult); cParseResult = rb_define_class_under(mHerb, "ParseResult", cResult); + Init_herb_arena(mHerb); + rb_define_singleton_method(mHerb, "parse", Herb_parse, -1); - rb_define_singleton_method(mHerb, "lex", Herb_lex, 1); + rb_define_singleton_method(mHerb, "lex", Herb_lex, -1); rb_define_singleton_method(mHerb, "parse_file", Herb_parse_file, -1); - rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, 1); + rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, -1); rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, -1); rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1); rb_define_singleton_method(mHerb, "version", Herb_version, 0); diff --git a/java/herb_jni.c b/java/herb_jni.c index f6acb6dfe..ba7d68d0e 100644 --- a/java/herb_jni.c +++ b/java/herb_jni.c @@ -3,6 +3,8 @@ #include "../../src/include/extract.h" #include "../../src/include/herb.h" +#include "../../src/include/macros.h" +#include "../../src/include/util/hb_arena.h" #include "../../src/include/util/hb_buffer.h" #include @@ -61,7 +63,21 @@ Java_org_herb_Herb_parse(JNIEnv* env, jclass clazz, jstring source, jobject opti } } - AST_DOCUMENT_NODE_T* ast = herb_parse(src, &parser_options); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + + return NULL; + } + + AST_DOCUMENT_NODE_T* ast = herb_parse(src, &parser_options, arena); jobject result = CreateParseResult(env, ast, source); @@ -75,11 +91,30 @@ JNIEXPORT jobject JNICALL Java_org_herb_Herb_lex(JNIEnv* env, jclass clazz, jstring source) { const char* src = (*env)->GetStringUTFChars(env, source, 0); - hb_array_T* tokens = herb_lex(src); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } + + herb_lex_result_T* lex_result = herb_lex(src, arena); + + if (!lex_result) { + hb_arena_free(arena); + free(arena); + (*env)->ReleaseStringUTFChars(env, source, src); + return NULL; + } - jobject result = CreateLexResult(env, tokens, source); + jobject result = CreateLexResult(env, lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); (*env)->ReleaseStringUTFChars(env, source, src); return result; diff --git a/javascript/packages/browser/src/wasm-backend.ts b/javascript/packages/browser/src/wasm-backend.ts index 3c0b53af0..61d3c9173 100644 --- a/javascript/packages/browser/src/wasm-backend.ts +++ b/javascript/packages/browser/src/wasm-backend.ts @@ -1,6 +1,37 @@ import { name, version } from "../package.json" -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class WASMArenaBackend implements ArenaBackend { + private backend: any + private arenaId: number + + constructor(backend: any, arenaId: number) { + this.backend = backend + this.arenaId = arenaId + } + + get position(): number { + return this.backend.arenaPosition(this.arenaId) + } + + get capacity(): number { + return this.backend.arenaCapacity(this.arenaId) + } + + reset(): void { + this.backend.resetArena(this.arenaId) + } + + free(): void { + this.backend.freeArena(this.arenaId) + } + + toBackendOption(): { arenaId: number } { + return { arenaId: this.arenaId } + } +} export class HerbBackendWASM extends HerbBackend { lexFile(): never { @@ -14,4 +45,10 @@ export class HerbBackendWASM extends HerbBackend { backendVersion(): string { return `${name}@${version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const arenaId = (this.backend as any).createArena(options?.size ?? 0) + return new Arena(new WASMArenaBackend(this.backend, arenaId)) + } } diff --git a/javascript/packages/browser/test/browser.test.ts b/javascript/packages/browser/test/browser.test.ts index c4edd044d..8bfe2f25e 100644 --- a/javascript/packages/browser/test/browser.test.ts +++ b/javascript/packages/browser/test/browser.test.ts @@ -10,6 +10,164 @@ describe("@herb-tools/browser", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("createArena returns an Arena", () => { + const arena = Herb.createArena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + arena.free() + }) + + test("creating an arena with custom size", () => { + const arena = Herb.createArena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() + }) + + test("arena position starts at zero", () => { + const arena = Herb.createArena() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena position increases after parsing", () => { + const arena = Herb.createArena() + const initialPosition = arena.position + + Herb.parse("
hello
", { arena }) + + expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() + }) + + test("arena can be reused for multiple parse calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + const positionAfterFirst = arena.position + + const result2 = Herb.parse("second", { arena }) + const positionAfterSecond = arena.position + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + arena.free() + }) + + test("arena reset returns position to zero", () => { + const arena = Herb.createArena() + + Herb.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) + + arena.reset() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena can be reused after reset", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + arena.reset() + + const result2 = Herb.parse("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + arena.free() + }) + + test("multiple arenas can be used independently", () => { + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() + + Herb.parse("
first
", { arena: arena1 }) + const position1 = arena1.position + + Herb.parse("second", { arena: arena2 }) + const position2 = arena2.position + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(arena1.position).toBe(position1) + + arena1.free() + arena2.free() + }) + + test("parsing many templates with shared arena", () => { + const arena = Herb.createArena() + + for (let i = 0; i < 100; i++) { + const result = Herb.parse(`
template ${i}
`, { arena }) + expect(result).toBeDefined() + } + + expect(arena.position).toBeGreaterThan(0) + arena.free() + }) + + test("arena reset allows reuse for batch processing", () => { + const arena = Herb.createArena() + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) + expect(result).toBeDefined() + } + arena.reset() + expect(arena.position).toBe(0) + } + + arena.free() + }) + + test("arena free releases resources", () => { + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/javascript/packages/core/src/arena.ts b/javascript/packages/core/src/arena.ts new file mode 100644 index 000000000..6562912bd --- /dev/null +++ b/javascript/packages/core/src/arena.ts @@ -0,0 +1,36 @@ +export interface ArenaBackend { + position: number + capacity: number + reset(): void + free(): void + toBackendOption(): unknown +} + +export class Arena { + private backend: ArenaBackend + + constructor(backend: ArenaBackend) { + this.backend = backend + } + + get position(): number { + return this.backend.position + } + + get capacity(): number { + return this.backend.capacity + } + + reset(): this { + this.backend.reset() + return this + } + + free(): void { + this.backend.free() + } + + toBackendOption(): unknown { + return this.backend.toBackendOption() + } +} diff --git a/javascript/packages/core/src/backend.ts b/javascript/packages/core/src/backend.ts index a00ca103d..a5b4701f4 100644 --- a/javascript/packages/core/src/backend.ts +++ b/javascript/packages/core/src/backend.ts @@ -1,14 +1,13 @@ import type { SerializedParseResult } from "./parse-result.js" import type { SerializedLexResult } from "./lex-result.js" -import type { ParserOptions } from "./parser-options.js" import type { ExtractRubyOptions } from "./extract-ruby-options.js" interface LibHerbBackendFunctions { - lex: (source: string) => SerializedLexResult - lexFile: (path: string) => SerializedLexResult + lex: (source: string, options?: Record) => SerializedLexResult + lexFile: (path: string, options?: Record) => SerializedLexResult - parse: (source: string, options?: ParserOptions) => SerializedParseResult - parseFile: (path: string) => SerializedParseResult + parse: (source: string, options?: Record) => SerializedParseResult + parseFile: (path: string, options?: Record) => SerializedParseResult extractRuby: (source: string, options?: ExtractRubyOptions) => string extractHTML: (source: string) => string diff --git a/javascript/packages/core/src/herb-backend.ts b/javascript/packages/core/src/herb-backend.ts index a6a77c13d..2afb65bd9 100644 --- a/javascript/packages/core/src/herb-backend.ts +++ b/javascript/packages/core/src/herb-backend.ts @@ -6,10 +6,16 @@ import { ParseResult } from "./parse-result.js" import { DEFAULT_PARSER_OPTIONS } from "./parser-options.js" import { DEFAULT_EXTRACT_RUBY_OPTIONS } from "./extract-ruby-options.js" +import type { Arena } from "./arena.js" import type { LibHerbBackend, BackendPromise } from "./backend.js" import type { ParserOptions } from "./parser-options.js" +import type { LexOptions } from "./lex-options.js" import type { ExtractRubyOptions } from "./extract-ruby-options.js" +export interface CreateArenaOptions { + size?: number +} + /** * The main Herb parser interface, providing methods to lex and parse input. */ @@ -44,25 +50,41 @@ export abstract class HerbBackend { /** * Lexes the given source string into a `LexResult`. * @param source - The source code to lex. + * @param options - Optional lexing options. * @returns A `LexResult` instance. * @throws Error if the backend is not loaded. */ - lex(source: string): LexResult { + lex(source: string, options?: LexOptions): LexResult { this.ensureBackend() - return LexResult.from(this.backend.lex(ensureString(source))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return LexResult.from(this.backend.lex(ensureString(source), mergedOptions)) } /** * Lexes a file. * @param path - The file path to lex. + * @param options - Optional lexing options. * @returns A `LexResult` instance. * @throws Error if the backend is not loaded. */ - lexFile(path: string): LexResult { + lexFile(path: string, options?: LexOptions): LexResult { this.ensureBackend() - return LexResult.from(this.backend.lexFile(ensureString(path))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return LexResult.from(this.backend.lexFile(ensureString(path), mergedOptions)) } /** @@ -75,7 +97,12 @@ export abstract class HerbBackend { parse(source: string, options?: ParserOptions): ParseResult { this.ensureBackend() - const mergedOptions = { ...DEFAULT_PARSER_OPTIONS, ...options } + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...DEFAULT_PARSER_OPTIONS, ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } return ParseResult.from(this.backend.parse(ensureString(source), mergedOptions)) } @@ -83,13 +110,21 @@ export abstract class HerbBackend { /** * Parses a file. * @param path - The file path to parse. + * @param options - Optional parsing options. * @returns A `ParseResult` instance. * @throws Error if the backend is not loaded. */ - parseFile(path: string): ParseResult { + parseFile(path: string, options?: ParserOptions): ParseResult { this.ensureBackend() - return ParseResult.from(this.backend.parseFile(ensureString(path))) + const { arena, ...restOptions } = options || {} + const mergedOptions: Record = { ...DEFAULT_PARSER_OPTIONS, ...restOptions } + + if (arena) { + Object.assign(mergedOptions, arena.toBackendOption()) + } + + return ParseResult.from(this.backend.parseFile(ensureString(path), mergedOptions)) } /** @@ -159,4 +194,12 @@ export abstract class HerbBackend { * @returns A string representing the backend version. */ abstract backendVersion(): string + + /** + * Creates a new Arena for memory allocation during parsing. + * @param options - Optional arena creation options. + * @returns An Arena instance. + * @throws Error if the backend is not loaded. + */ + abstract createArena(options?: CreateArenaOptions): Arena } diff --git a/javascript/packages/core/src/index.ts b/javascript/packages/core/src/index.ts index b8c2d488f..188c678d9 100644 --- a/javascript/packages/core/src/index.ts +++ b/javascript/packages/core/src/index.ts @@ -1,3 +1,4 @@ +export * from "./arena.js" export * from "./ast-utils.js" export * from "./backend.js" export * from "./diagnostic.js" @@ -6,6 +7,7 @@ export * from "./errors.js" export * from "./extract-ruby-options.js" export * from "./herb-backend.js" export * from "./levenshtein.js" +export * from "./lex-options.js" export * from "./lex-result.js" export * from "./location.js" export * from "./node-type-guards.js" diff --git a/javascript/packages/core/src/lex-options.ts b/javascript/packages/core/src/lex-options.ts new file mode 100644 index 000000000..cf2ba211b --- /dev/null +++ b/javascript/packages/core/src/lex-options.ts @@ -0,0 +1,5 @@ +import type { Arena } from "./arena.js" + +export interface LexOptions { + arena?: Arena +} diff --git a/javascript/packages/core/src/parser-options.ts b/javascript/packages/core/src/parser-options.ts index 4da90ab1c..01e69b665 100644 --- a/javascript/packages/core/src/parser-options.ts +++ b/javascript/packages/core/src/parser-options.ts @@ -1,10 +1,13 @@ +import type { Arena } from "./arena.js" + export interface ParserOptions { track_whitespace?: boolean analyze?: boolean strict?: boolean + arena?: Arena } -export const DEFAULT_PARSER_OPTIONS: ParserOptions = { +export const DEFAULT_PARSER_OPTIONS: Omit = { track_whitespace: false, analyze: true, strict: true, diff --git a/javascript/packages/node-wasm/src/wasm-backend.ts b/javascript/packages/node-wasm/src/wasm-backend.ts index 36e414816..2104e85c6 100644 --- a/javascript/packages/node-wasm/src/wasm-backend.ts +++ b/javascript/packages/node-wasm/src/wasm-backend.ts @@ -1,9 +1,46 @@ import { name, version } from "../package.json" -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class WASMArenaBackend implements ArenaBackend { + private backend: any + private arenaId: number + + constructor(backend: any, arenaId: number) { + this.backend = backend + this.arenaId = arenaId + } + + get position(): number { + return this.backend.arenaPosition(this.arenaId) + } + + get capacity(): number { + return this.backend.arenaCapacity(this.arenaId) + } + + reset(): void { + this.backend.resetArena(this.arenaId) + } + + free(): void { + this.backend.freeArena(this.arenaId) + } + + toBackendOption(): { arenaId: number } { + return { arenaId: this.arenaId } + } +} export class HerbBackendNodeWASM extends HerbBackend { backendVersion(): string { return `${name}@${version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const arenaId = (this.backend as any).createArena(options?.size ?? 0) + return new Arena(new WASMArenaBackend(this.backend, arenaId)) + } } diff --git a/javascript/packages/node-wasm/test/node-wasm.test.ts b/javascript/packages/node-wasm/test/node-wasm.test.ts index 5732bf6b2..c4c827d98 100644 --- a/javascript/packages/node-wasm/test/node-wasm.test.ts +++ b/javascript/packages/node-wasm/test/node-wasm.test.ts @@ -13,6 +13,164 @@ describe("@herb-tools/node-wasm", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("createArena returns an Arena", () => { + const arena = Herb.createArena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + arena.free() + }) + + test("creating an arena with custom size", () => { + const arena = Herb.createArena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() + }) + + test("arena position starts at zero", () => { + const arena = Herb.createArena() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena position increases after parsing", () => { + const arena = Herb.createArena() + const initialPosition = arena.position + + Herb.parse("
hello
", { arena }) + + expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() + }) + + test("arena can be reused for multiple parse calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + const positionAfterFirst = arena.position + + const result2 = Herb.parse("second", { arena }) + const positionAfterSecond = arena.position + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + arena.free() + }) + + test("arena reset returns position to zero", () => { + const arena = Herb.createArena() + + Herb.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) + + arena.reset() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena can be reused after reset", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + arena.reset() + + const result2 = Herb.parse("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + arena.free() + }) + + test("multiple arenas can be used independently", () => { + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() + + Herb.parse("
first
", { arena: arena1 }) + const position1 = arena1.position + + Herb.parse("second", { arena: arena2 }) + const position2 = arena2.position + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(arena1.position).toBe(position1) + + arena1.free() + arena2.free() + }) + + test("parsing many templates with shared arena", () => { + const arena = Herb.createArena() + + for (let i = 0; i < 100; i++) { + const result = Herb.parse(`
template ${i}
`, { arena }) + expect(result).toBeDefined() + } + + expect(arena.position).toBeGreaterThan(0) + arena.free() + }) + + test("arena reset allows reuse for batch processing", () => { + const arena = Herb.createArena() + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) + expect(result).toBeDefined() + } + arena.reset() + expect(arena.position).toBe(0) + } + + arena.free() + }) + + test("arena free releases resources", () => { + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/javascript/packages/node/binding.gyp b/javascript/packages/node/binding.gyp index 68ea9aa43..300a7fca2 100644 --- a/javascript/packages/node/binding.gyp +++ b/javascript/packages/node/binding.gyp @@ -4,6 +4,7 @@ "target_name": "<(module_name)", "product_dir": "<(module_path)", "sources": [ + "./extension/arena.cpp", "./extension/error_helpers.cpp", "./extension/extension_helpers.cpp", "./extension/herb.cpp", diff --git a/javascript/packages/node/extension/arena.cpp b/javascript/packages/node/extension/arena.cpp new file mode 100644 index 000000000..0ece9ae0f --- /dev/null +++ b/javascript/packages/node/extension/arena.cpp @@ -0,0 +1,235 @@ +#include "arena.h" + +#include + +extern "C" { +#include "../extension/libherb/include/macros.h" +#include "../extension/libherb/include/util/hb_arena.h" +} + +napi_ref arena_constructor_ref = nullptr; + +typedef struct { + hb_arena_T* arena; + bool initialized; +} herb_arena_wrapper_T; + +static void Arena_destructor(napi_env env, void* finalize_data, void* finalize_hint) { + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) finalize_data; + if (wrapper->arena && wrapper->initialized) { + hb_arena_free(wrapper->arena); + free(wrapper->arena); + } + free(wrapper); +} + +napi_value Arena_constructor(napi_env env, napi_callback_info info) { + size_t argc = 1; + napi_value args[1]; + napi_value this_val; + napi_get_cb_info(env, info, &argc, args, &this_val, nullptr); + + size_t initial_size = KB(512); + + if (argc >= 1) { + napi_valuetype valuetype; + napi_typeof(env, args[0], &valuetype); + + if (valuetype == napi_object) { + bool has_size_prop; + napi_has_named_property(env, args[0], "size", &has_size_prop); + + if (has_size_prop) { + napi_value size_prop; + napi_get_named_property(env, args[0], "size", &size_prop); + uint32_t size_value; + napi_get_value_uint32(env, size_prop, &size_value); + initial_size = (size_t) size_value; + } + } + } + + herb_arena_wrapper_T* wrapper = (herb_arena_wrapper_T*) malloc(sizeof(herb_arena_wrapper_T)); + if (!wrapper) { + napi_throw_error(env, nullptr, "Failed to allocate arena wrapper"); + return nullptr; + } + + wrapper->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!wrapper->arena) { + free(wrapper); + napi_throw_error(env, nullptr, "Failed to allocate arena"); + return nullptr; + } + + if (!hb_arena_init(wrapper->arena, initial_size)) { + free(wrapper->arena); + free(wrapper); + napi_throw_error(env, nullptr, "Failed to initialize arena"); + return nullptr; + } + + wrapper->initialized = true; + + napi_wrap(env, this_val, wrapper, Arena_destructor, nullptr, nullptr); + + return this_val; +} + +napi_value Arena_reset(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + hb_arena_reset(wrapper->arena); + + return this_val; +} + +napi_value Arena_get_position(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + napi_value result; + napi_create_uint32(env, (uint32_t) hb_arena_position(wrapper->arena), &result); + return result; +} + +napi_value Arena_get_capacity(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_throw_error(env, nullptr, "Arena not initialized"); + return nullptr; + } + + napi_value result; + napi_create_uint32(env, (uint32_t) hb_arena_capacity(wrapper->arena), &result); + return result; +} + +napi_value Arena_free(napi_env env, napi_callback_info info) { + napi_value this_val; + napi_get_cb_info(env, info, nullptr, nullptr, &this_val, nullptr); + + herb_arena_wrapper_T* wrapper; + napi_unwrap(env, this_val, (void**) &wrapper); + + if (!wrapper || !wrapper->arena || !wrapper->initialized) { + napi_value undefined; + napi_get_undefined(env, &undefined); + return undefined; + } + + hb_arena_free(wrapper->arena); + free(wrapper->arena); + wrapper->arena = nullptr; + wrapper->initialized = false; + + napi_value undefined; + napi_get_undefined(env, &undefined); + return undefined; +} + +hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val) { + if (!arena_val) return nullptr; + + napi_valuetype valuetype; + napi_typeof(env, arena_val, &valuetype); + if (valuetype != napi_object) return nullptr; + + herb_arena_wrapper_T* wrapper; + napi_status status = napi_unwrap(env, arena_val, (void**) &wrapper); + if (status != napi_ok) return nullptr; + + if (!wrapper || !wrapper->arena || !wrapper->initialized) return nullptr; + + return wrapper->arena; +} + +hb_arena_T* get_arena_option_from_object(napi_env env, napi_value options) { + if (!options) return nullptr; + + napi_valuetype valuetype; + napi_typeof(env, options, &valuetype); + if (valuetype != napi_object) return nullptr; + + bool has_arena_prop; + napi_has_named_property(env, options, "arena", &has_arena_prop); + if (!has_arena_prop) return nullptr; + + napi_value arena_prop; + napi_get_named_property(env, options, "arena", &arena_prop); + return get_arena_from_value(env, arena_prop); +} + +bool setup_arena_context(napi_env env, hb_arena_T* external_arena, arena_context_T* context) { + if (external_arena) { + context->arena = external_arena; + context->owns_arena = false; + return true; + } + + context->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = nullptr; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = nullptr; + } +} + +void Init_herb_arena(napi_env env, napi_value exports) { + napi_property_descriptor arena_properties[] = { + { "reset", nullptr, Arena_reset, nullptr, nullptr, nullptr, napi_default, nullptr }, + { "free", nullptr, Arena_free, nullptr, nullptr, nullptr, napi_default, nullptr }, + { "position", nullptr, nullptr, Arena_get_position, nullptr, nullptr, napi_default, nullptr }, + { "capacity", nullptr, nullptr, Arena_get_capacity, nullptr, nullptr, napi_default, nullptr }, + }; + + napi_value arena_class; + napi_define_class( + env, + "Arena", + NAPI_AUTO_LENGTH, + Arena_constructor, + nullptr, + sizeof(arena_properties) / sizeof(arena_properties[0]), + arena_properties, + &arena_class + ); + + napi_create_reference(env, arena_class, 1, &arena_constructor_ref); + napi_set_named_property(env, exports, "Arena", arena_class); +} diff --git a/javascript/packages/node/extension/arena.h b/javascript/packages/node/extension/arena.h new file mode 100644 index 000000000..1b26550f7 --- /dev/null +++ b/javascript/packages/node/extension/arena.h @@ -0,0 +1,31 @@ +#ifndef HERB_NODE_ARENA_H +#define HERB_NODE_ARENA_H + +#include + +extern "C" { +#include "../extension/libherb/include/util/hb_arena.h" +} + +extern napi_ref arena_constructor_ref; + +napi_value Arena_constructor(napi_env env, napi_callback_info info); +napi_value Arena_reset(napi_env env, napi_callback_info info); +napi_value Arena_get_position(napi_env env, napi_callback_info info); +napi_value Arena_get_capacity(napi_env env, napi_callback_info info); +napi_value Arena_free(napi_env env, napi_callback_info info); + +hb_arena_T* get_arena_from_value(napi_env env, napi_value arena_val); + +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +hb_arena_T* get_arena_option_from_object(napi_env env, napi_value options); +bool setup_arena_context(napi_env env, hb_arena_T* external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + +void Init_herb_arena(napi_env env, napi_value exports); + +#endif diff --git a/javascript/packages/node/extension/herb.cpp b/javascript/packages/node/extension/herb.cpp index 9ecd66ee2..3900db161 100644 --- a/javascript/packages/node/extension/herb.cpp +++ b/javascript/packages/node/extension/herb.cpp @@ -3,12 +3,15 @@ extern "C" { #include "../extension/libherb/include/extract.h" #include "../extension/libherb/include/herb.h" #include "../extension/libherb/include/location.h" +#include "../extension/libherb/include/macros.h" #include "../extension/libherb/include/range.h" #include "../extension/libherb/include/token.h" +#include "../extension/libherb/include/util/hb_arena.h" #include "../extension/libherb/include/util/hb_array.h" #include "../extension/libherb/include/util/hb_buffer.h" } +#include "arena.h" #include "error_helpers.h" #include "extension_helpers.h" #include "nodes.h" @@ -19,8 +22,8 @@ extern "C" { #include napi_value Herb_lex(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -31,18 +34,33 @@ napi_value Herb_lex(napi_env env, napi_callback_info info) { char* string = CheckString(env, args[0]); if (!string) { return nullptr; } - hb_array_T* tokens = herb_lex(string); - napi_value result = CreateLexResult(env, tokens, args[0]); + hb_arena_T* external_arena = (argc >= 2) ? get_arena_option_from_object(env, args[1]) : nullptr; + + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(string); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex(string, context.arena); + + if (!lex_result) { + cleanup_arena_context(&context); + free(string); + return nullptr; + } + + napi_value result = CreateLexResult(env, lex_result->tokens, args[0]); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); free(string); return result; } napi_value Herb_lex_file(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -53,11 +71,26 @@ napi_value Herb_lex_file(napi_env env, napi_callback_info info) { char* file_path = CheckString(env, args[0]); if (!file_path) { return nullptr; } - hb_array_T* tokens = herb_lex_file(file_path); + hb_arena_T* external_arena = (argc >= 2) ? get_arena_option_from_object(env, args[1]) : nullptr; + + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(file_path); + return nullptr; + } + + herb_lex_result_T* lex_result = herb_lex_file(file_path, context.arena); + + if (!lex_result) { + cleanup_arena_context(&context); + free(file_path); + return nullptr; + } + napi_value source_value = ReadFileToString(env, file_path); - napi_value result = CreateLexResult(env, tokens, source_value); + napi_value result = CreateLexResult(env, lex_result->tokens, source_value); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); free(file_path); return result; @@ -77,6 +110,7 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { if (!string) { return nullptr; } parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + hb_arena_T* external_arena = nullptr; if (argc >= 2) { napi_valuetype valuetype; @@ -121,10 +155,27 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { napi_get_value_bool(env, strict_prop, &strict_value); parser_options.strict = strict_value; } + + external_arena = get_arena_option_from_object(env, args[1]); } } - AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options); + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); + + if (!root) { + cleanup_arena_context(&context); + free(string); + return nullptr; + } + + root->owns_arena = context.owns_arena; + napi_value result = CreateParseResult(env, root, args[0]); ast_node_free((AST_NODE_T *) root); @@ -134,8 +185,8 @@ napi_value Herb_parse(napi_env env, napi_callback_info info) { } napi_value Herb_parse_file(napi_env env, napi_callback_info info) { - size_t argc = 1; - napi_value args[1]; + size_t argc = 2; + napi_value args[2]; napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); if (argc < 1) { @@ -154,7 +205,75 @@ napi_value Herb_parse_file(napi_env env, napi_callback_info info) { return nullptr; } - AST_DOCUMENT_NODE_T* root = herb_parse(string, nullptr); + parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; + hb_arena_T* external_arena = nullptr; + + if (argc >= 2) { + napi_valuetype valuetype; + napi_typeof(env, args[1], &valuetype); + + if (valuetype == napi_object) { + napi_value track_whitespace_prop; + bool has_track_whitespace_prop; + napi_has_named_property(env, args[1], "track_whitespace", &has_track_whitespace_prop); + + if (has_track_whitespace_prop) { + napi_get_named_property(env, args[1], "track_whitespace", &track_whitespace_prop); + bool track_whitespace_value; + napi_get_value_bool(env, track_whitespace_prop, &track_whitespace_value); + + if (track_whitespace_value) { + parser_options.track_whitespace = true; + } + } + + napi_value analyze_prop; + bool has_analyze_prop; + napi_has_named_property(env, args[1], "analyze", &has_analyze_prop); + + if (has_analyze_prop) { + napi_get_named_property(env, args[1], "analyze", &analyze_prop); + bool analyze_value; + napi_get_value_bool(env, analyze_prop, &analyze_value); + + if (!analyze_value) { + parser_options.analyze = false; + } + } + + napi_value strict_prop; + bool has_strict_prop; + napi_has_named_property(env, args[1], "strict", &has_strict_prop); + + if (has_strict_prop) { + napi_get_named_property(env, args[1], "strict", &strict_prop); + bool strict_value; + napi_get_value_bool(env, strict_prop, &strict_value); + parser_options.strict = strict_value; + } + + external_arena = get_arena_option_from_object(env, args[1]); + } + } + + arena_context_T context; + if (!setup_arena_context(env, external_arena, &context)) { + free(file_path); + free(string); + return nullptr; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(string, &parser_options, context.arena); + + if (!root) { + cleanup_arena_context(&context); + free(file_path); + free(string); + return nullptr; + } + + root->owns_arena = context.owns_arena; + napi_value result = CreateParseResult(env, root, source_value); ast_node_free((AST_NODE_T *) root); @@ -274,6 +393,8 @@ napi_value Herb_version(napi_env env, napi_callback_info info) { } napi_value Init(napi_env env, napi_value exports) { + Init_herb_arena(env, exports); + napi_property_descriptor descriptors[] = { { "parse", nullptr, Herb_parse, nullptr, nullptr, nullptr, napi_default, nullptr }, { "lex", nullptr, Herb_lex, nullptr, nullptr, nullptr, napi_default, nullptr }, diff --git a/javascript/packages/node/src/node-backend.ts b/javascript/packages/node/src/node-backend.ts index 57206b342..aabb67b3f 100644 --- a/javascript/packages/node/src/node-backend.ts +++ b/javascript/packages/node/src/node-backend.ts @@ -1,9 +1,44 @@ import packageJSON from "../package.json" with { type: "json" } -import { HerbBackend } from "@herb-tools/core" +import { HerbBackend, Arena } from "@herb-tools/core" +import type { ArenaBackend, CreateArenaOptions } from "@herb-tools/core" + +class NodeArenaBackend implements ArenaBackend { + private nativeArena: any + + constructor(nativeArena: any) { + this.nativeArena = nativeArena + } + + get position(): number { + return this.nativeArena.position + } + + get capacity(): number { + return this.nativeArena.capacity + } + + reset(): void { + this.nativeArena.reset() + } + + free(): void { + this.nativeArena.free() + } + + toBackendOption(): { arena: any } { + return { arena: this.nativeArena } + } +} export class HerbBackendNode extends HerbBackend { backendVersion(): string { return `${packageJSON.name}@${packageJSON.version}` } + + createArena(options?: CreateArenaOptions): Arena { + this.ensureBackend() + const nativeArena = new (this.backend as any).Arena(options) + return new Arena(new NodeArenaBackend(nativeArena)) + } } diff --git a/javascript/packages/node/test/node.test.ts b/javascript/packages/node/test/node.test.ts index fe3fdd6dc..06ffecc4a 100644 --- a/javascript/packages/node/test/node.test.ts +++ b/javascript/packages/node/test/node.test.ts @@ -11,6 +11,164 @@ describe("@herb-tools/node", () => { expect(Herb).toBeDefined() }) + describe("Arena", () => { + test("createArena returns an Arena", () => { + const arena = Herb.createArena() + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThan(0) + arena.free() + }) + + test("creating an arena with custom size", () => { + const arena = Herb.createArena({ size: 1024 * 1024 }) + expect(arena).toBeDefined() + expect(arena.capacity).toBeGreaterThanOrEqual(1024 * 1024) + arena.free() + }) + + test("arena position starts at zero", () => { + const arena = Herb.createArena() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena position increases after parsing", () => { + const arena = Herb.createArena() + const initialPosition = arena.position + + Herb.parse("
hello
", { arena }) + + expect(arena.position).toBeGreaterThan(initialPosition) + arena.free() + }) + + test("arena can be reused for multiple parse calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + const positionAfterFirst = arena.position + + const result2 = Herb.parse("second", { arena }) + const positionAfterSecond = arena.position + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(positionAfterSecond).toBeGreaterThan(positionAfterFirst) + arena.free() + }) + + test("arena reset returns position to zero", () => { + const arena = Herb.createArena() + + Herb.parse("
hello
", { arena }) + expect(arena.position).toBeGreaterThan(0) + + arena.reset() + expect(arena.position).toBe(0) + arena.free() + }) + + test("arena can be reused after reset", () => { + const arena = Herb.createArena() + + const result1 = Herb.parse("
first
", { arena }) + arena.reset() + + const result2 = Herb.parse("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + arena.free() + }) + + test("multiple arenas can be used independently", () => { + const arena1 = Herb.createArena() + const arena2 = Herb.createArena() + + Herb.parse("
first
", { arena: arena1 }) + const position1 = arena1.position + + Herb.parse("second", { arena: arena2 }) + const position2 = arena2.position + + expect(position1).toBeGreaterThan(0) + expect(position2).toBeGreaterThan(0) + expect(arena1.position).toBe(position1) + + arena1.free() + arena2.free() + }) + + test("parsing many templates with shared arena", () => { + const arena = Herb.createArena() + + for (let i = 0; i < 100; i++) { + const result = Herb.parse(`
template ${i}
`, { arena }) + expect(result).toBeDefined() + } + + expect(arena.position).toBeGreaterThan(0) + arena.free() + }) + + test("arena reset allows reuse for batch processing", () => { + const arena = Herb.createArena() + + for (let batch = 0; batch < 3; batch++) { + for (let i = 0; i < 10; i++) { + const result = Herb.parse(`
batch ${batch} item ${i}
`, { arena }) + expect(result).toBeDefined() + } + arena.reset() + expect(arena.position).toBe(0) + } + + arena.free() + }) + + test("arena free releases resources", () => { + const arena = Herb.createArena() + Herb.parse("
hello
", { arena }) + arena.free() + }) + + test("arena works with lex", () => { + const arena = Herb.createArena() + + const result = Herb.lex("
hello
", { arena }) + + expect(result).toBeDefined() + expect(result.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be reused for multiple lex calls", () => { + const arena = Herb.createArena() + + const result1 = Herb.lex("
first
", { arena }) + const result2 = Herb.lex("second", { arena }) + + expect(result1).toBeDefined() + expect(result2).toBeDefined() + expect(result1.value.tokens.length).toBeGreaterThan(0) + expect(result2.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + + test("arena can be used for both parse and lex", () => { + const arena = Herb.createArena() + + const parseResult = Herb.parse("
parsed
", { arena }) + const lexResult = Herb.lex("lexed", { arena }) + + expect(parseResult).toBeDefined() + expect(lexResult).toBeDefined() + expect(parseResult.value).toBeDefined() + expect(lexResult.value.tokens.length).toBeGreaterThan(0) + arena.free() + }) + }) + test("Herb export is of instance HerbBackend", () => { expect(Herb instanceof HerbBackend).toBeTruthy() }) diff --git a/lib/herb/cli.rb b/lib/herb/cli.rb index 4a793843b..6320ab001 100644 --- a/lib/herb/cli.rb +++ b/lib/herb/cli.rb @@ -8,7 +8,7 @@ class Herb::CLI include Herb::Colors - attr_accessor :json, :silent, :no_interactive, :no_log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict + attr_accessor :json, :silent, :no_interactive, :no_log_file, :no_timing, :local, :escape, :no_escape, :freeze, :debug, :tool, :strict, :arena_stats def initialize(args) @args = args @@ -139,13 +139,13 @@ def result show_config exit(0) when "parse" - Herb.parse(file_content, strict: strict.nil? || strict) + Herb.parse(file_content, strict: strict.nil? || strict, arena_stats: arena_stats) when "compile" compile_template when "render" render_template when "lex" - Herb.lex(file_content) + Herb.lex(file_content, arena_stats: arena_stats) when "ruby" puts Herb.extract_ruby(file_content) exit(0) @@ -249,6 +249,10 @@ def option_parser parser.on("--tool TOOL", "Show config for specific tool: linter, formatter (for config command)") do |t| self.tool = t.to_sym end + + parser.on("--arena-stats", "Print arena memory statistics (for lex/parse commands)") do + self.arena_stats = true + end end end diff --git a/rust/build.rs b/rust/build.rs index 0c4cd0cb0..d97988d61 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -89,6 +89,7 @@ fn main() { .clang_arg(format!("-I{}", include_dir.display())) .clang_arg(format!("-I{}", prism_include.display())) .allowlist_function("herb_.*") + .allowlist_function("hb_arena_.*") .allowlist_function("hb_array_.*") .allowlist_function("hb_buffer_.*") .allowlist_function("token_type_to_string") @@ -100,6 +101,7 @@ fn main() { .allowlist_type("element_source_t") .allowlist_type("ast_node_type_T") .allowlist_type("error_type_T") + .allowlist_type("hb_arena_T") .allowlist_type("hb_array_T") .allowlist_type("hb_buffer_T") .allowlist_type("hb_string_T") diff --git a/rust/src/ffi.rs b/rust/src/ffi.rs index 191f7a9ba..d530cfc4b 100644 --- a/rust/src/ffi.rs +++ b/rust/src/ffi.rs @@ -1,5 +1,6 @@ pub use crate::bindings::{ - ast_node_free, element_source_to_string, hb_array_get, hb_array_size, hb_buffer_init, - hb_buffer_value, hb_string_T, herb_extract, herb_extract_ruby_to_buffer_with_options, - herb_free_tokens, herb_lex, herb_parse, herb_prism_version, herb_version, token_type_to_string, + ast_node_free, element_source_to_string, hb_arena_free, hb_arena_init, hb_array_get, + hb_array_size, hb_buffer_init, hb_buffer_value, hb_string_T, herb_extract, + herb_extract_ruby_to_buffer_with_options, herb_free_lex_result, herb_lex, herb_parse, + herb_prism_version, herb_version, token_type_to_string, }; diff --git a/rust/src/herb.rs b/rust/src/herb.rs index ab9c70f11..7400abacc 100644 --- a/rust/src/herb.rs +++ b/rust/src/herb.rs @@ -1,4 +1,4 @@ -use crate::bindings::{hb_array_T, hb_buffer_T, token_T}; +use crate::bindings::{hb_arena_T, hb_buffer_T, herb_lex_result_T, token_T}; use crate::convert::token_from_c; use crate::{LexResult, ParseResult}; use std::ffi::CString; @@ -40,12 +40,29 @@ impl Default for ExtractRubyOptions { pub fn lex(source: &str) -> Result { unsafe { let c_source = CString::new(source).map_err(|e| e.to_string())?; - let c_tokens = crate::ffi::herb_lex(c_source.as_ptr()); - if c_tokens.is_null() { + let arena = libc::malloc(std::mem::size_of::()) as *mut hb_arena_T; + + if arena.is_null() { + return Err("Failed to allocate arena".to_string()); + } + + if !crate::ffi::hb_arena_init(arena, 512 * 1024) { + libc::free(arena as *mut std::ffi::c_void); + + return Err("Failed to initialize arena".to_string()); + } + + let lex_result = crate::ffi::herb_lex(c_source.as_ptr(), arena); + + if lex_result.is_null() { + crate::ffi::hb_arena_free(arena); + libc::free(arena as *mut std::ffi::c_void); + return Err("Failed to lex source".to_string()); } + let c_tokens = (*lex_result).tokens; let array_size = crate::ffi::hb_array_size(c_tokens); let mut tokens = Vec::with_capacity(array_size); @@ -57,8 +74,8 @@ pub fn lex(source: &str) -> Result { } } - let mut c_tokens_ptr = c_tokens; - crate::ffi::herb_free_tokens(&mut c_tokens_ptr as *mut *mut hb_array_T); + let mut lex_result_ptr = lex_result; + crate::ffi::herb_free_lex_result(&mut lex_result_ptr as *mut *mut herb_lex_result_T); Ok(LexResult::new(tokens)) } @@ -71,6 +88,17 @@ pub fn parse(source: &str) -> Result { pub fn parse_with_options(source: &str, options: &ParserOptions) -> Result { unsafe { let c_source = CString::new(source).map_err(|e| e.to_string())?; + let arena = libc::malloc(std::mem::size_of::()) as *mut hb_arena_T; + + if arena.is_null() { + return Err("Failed to allocate arena".to_string()); + } + + if !crate::ffi::hb_arena_init(arena, 512 * 1024) { + libc::free(arena as *mut std::ffi::c_void); + + return Err("Failed to initialize arena".to_string()); + } let c_parser_options = crate::bindings::parser_options_T { track_whitespace: options.track_whitespace, @@ -78,9 +106,12 @@ pub fn parse_with_options(source: &str, options: &ParserOptions) -> Resulttype == AST_ERB_CONTENT_NODE) { AST_ERB_CONTENT_NODE_T* erb_content_node = (AST_ERB_CONTENT_NODE_T*) node; @@ -83,7 +90,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { append_erb_multiple_blocks_in_tag_error( erb_content_node->base.location.start, erb_content_node->base.location.end, - erb_content_node->base.errors + erb_content_node->base.errors, + context->arena ); } @@ -93,7 +101,8 @@ static bool analyze_erb_content(const AST_NODE_T* node, void* data) { append_erb_case_with_conditions_error( erb_content_node->base.location.start, erb_content_node->base.location.end, - erb_content_node->base.errors + erb_content_node->base.errors, + context->arena ); } } else { @@ -341,7 +350,8 @@ static AST_NODE_T* create_control_node( hb_array_T* children, AST_NODE_T* subsequent, AST_ERB_END_NODE_T* end_node, - control_type_t control_type + control_type_t control_type, + hb_arena_T* arena ) { hb_array_T* errors = erb_node->base.errors; erb_node->base.errors = NULL; @@ -390,6 +400,11 @@ static AST_NODE_T* create_control_node( } } + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + switch (control_type) { case CONTROL_TYPE_IF: case CONTROL_TYPE_ELSIF: { @@ -403,14 +418,22 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ELSE: { - return ( - AST_NODE_T* - ) ast_erb_else_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_else_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_CASE: @@ -449,7 +472,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } else { hb_array_free(&in_conditions); @@ -464,7 +488,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } } @@ -478,7 +503,8 @@ static AST_NODE_T* create_control_node( children, start_position, end_position, - errors + errors, + arena ); } @@ -491,7 +517,8 @@ static AST_NODE_T* create_control_node( children, start_position, end_position, - errors + errors, + arena ); } @@ -521,7 +548,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -538,14 +566,22 @@ static AST_NODE_T* create_control_node( rescue_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_ENSURE: { - return ( - AST_NODE_T* - ) ast_erb_ensure_node_init(tag_opening, content, tag_closing, children, start_position, end_position, errors); + return (AST_NODE_T*) ast_erb_ensure_node_init( + tag_opening, + content, + tag_closing, + children, + start_position, + end_position, + errors, + arena + ); } case CONTROL_TYPE_UNLESS: { @@ -563,7 +599,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -576,7 +613,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -589,7 +627,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -602,7 +641,8 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } @@ -615,14 +655,15 @@ static AST_NODE_T* create_control_node( end_node, start_position, end_position, - errors + errors, + arena ); } case CONTROL_TYPE_YIELD: { return ( AST_NODE_T* - ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors); + ) ast_erb_yield_node_init(tag_opening, content, tag_closing, start_position, end_position, errors, arena); } default: return NULL; @@ -686,6 +727,11 @@ static size_t process_control_structure( hb_array_T* when_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + location_T* then_keyword = NULL; const char* source = erb_content->content ? erb_content->content->value : NULL; @@ -712,11 +758,10 @@ static size_t process_control_structure( when_statements, erb_content->tag_opening->location.start, when_end_position, - when_errors + when_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(when_conditions, (AST_NODE_T*) when_node); continue; @@ -729,6 +774,11 @@ static size_t process_control_structure( hb_array_T* in_errors = erb_content->base.errors; erb_content->base.errors = NULL; + if (erb_content->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_content->analyzed_ruby); + erb_content->analyzed_ruby = NULL; + } + location_T* in_then_keyword = NULL; const char* in_source = erb_content->content ? erb_content->content->value : NULL; @@ -755,11 +805,10 @@ static size_t process_control_structure( in_statements, erb_content->tag_opening->location.start, in_end_position, - in_errors + in_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_content); - hb_array_append(in_conditions, (AST_NODE_T*) in_node); continue; @@ -790,6 +839,11 @@ static size_t process_control_structure( hb_array_T* else_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + else_clause = ast_erb_else_node_init( next_erb->tag_opening, next_erb->content, @@ -797,10 +851,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -817,17 +870,21 @@ static size_t process_control_structure( hb_array_T* end_errors = end_erb->base.errors; end_erb->base.errors = NULL; + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, end_erb->tag_closing, end_erb->tag_opening->location.start, erb_content_end_position(end_erb), - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -852,6 +909,11 @@ static size_t process_control_structure( hb_array_T* case_match_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_MATCH_NODE_T* case_match_node = ast_erb_case_match_node_init( erb_node->tag_opening, erb_node->content, @@ -862,11 +924,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_match_errors + case_match_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_match_node); hb_array_free(&when_conditions); hb_array_free(&children); @@ -877,6 +938,11 @@ static size_t process_control_structure( hb_array_T* case_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_CASE_NODE_T* case_node = ast_erb_case_node_init( erb_node->tag_opening, erb_node->content, @@ -887,11 +953,10 @@ static size_t process_control_structure( end_node, start_position, end_position, - case_errors + case_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) case_node); hb_array_free(&in_conditions); hb_array_free(&children); @@ -938,6 +1003,11 @@ static size_t process_control_structure( hb_array_T* else_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + else_clause = ast_erb_else_node_init( next_erb->tag_opening, next_erb->content, @@ -945,10 +1015,9 @@ static size_t process_control_structure( else_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - else_errors + else_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -984,6 +1053,11 @@ static size_t process_control_structure( hb_array_T* ensure_errors = next_erb->base.errors; next_erb->base.errors = NULL; + if (next_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(next_erb->analyzed_ruby); + next_erb->analyzed_ruby = NULL; + } + ensure_clause = ast_erb_ensure_node_init( next_erb->tag_opening, next_erb->content, @@ -991,10 +1065,9 @@ static size_t process_control_structure( ensure_children, next_erb->tag_opening->location.start, erb_content_end_position(next_erb), - ensure_errors + ensure_errors, + context->arena ); - - ast_node_free((AST_NODE_T*) next_erb); } } } @@ -1011,17 +1084,21 @@ static size_t process_control_structure( hb_array_T* end_errors = end_erb->base.errors; end_erb->base.errors = NULL; + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, end_erb->tag_closing, end_erb->tag_opening->location.start, erb_content_end_position(end_erb), - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } @@ -1043,6 +1120,11 @@ static size_t process_control_structure( hb_array_T* begin_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BEGIN_NODE_T* begin_node = ast_erb_begin_node_init( erb_node->tag_opening, erb_node->content, @@ -1054,12 +1136,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - begin_errors + begin_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) begin_node); + return index; } @@ -1081,17 +1163,21 @@ static size_t process_control_structure( position_T close_end_pos = erb_content_end_position(close_erb); + if (close_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(close_erb->analyzed_ruby); + close_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( close_erb->tag_opening, close_erb->content, close_erb->tag_closing, close_erb->tag_opening->location.start, close_end_pos, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) close_erb); - index++; } } @@ -1110,6 +1196,11 @@ static size_t process_control_structure( hb_array_T* block_errors = erb_node->base.errors; erb_node->base.errors = NULL; + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + AST_ERB_BLOCK_NODE_T* block_node = ast_erb_block_node_init( erb_node->tag_opening, erb_node->content, @@ -1118,12 +1209,12 @@ static size_t process_control_structure( end_node, start_position, end_position, - block_errors + block_errors, + context->arena ); - ast_node_free((AST_NODE_T*) erb_node); - hb_array_append(output_array, (AST_NODE_T*) block_node); + return index; } @@ -1157,26 +1248,30 @@ static size_t process_control_structure( position_T end_erb_final_pos = erb_content_end_position(end_erb); + if (end_erb->analyzed_ruby != NULL) { + free_analyzed_ruby(end_erb->analyzed_ruby); + end_erb->analyzed_ruby = NULL; + } + end_node = ast_erb_end_node_init( end_erb->tag_opening, end_erb->content, end_erb->tag_closing, end_erb->tag_opening->location.start, end_erb_final_pos, - end_errors + end_errors, + context->arena ); - ast_node_free((AST_NODE_T*) end_erb); - index++; } } } - AST_NODE_T* control_node = create_control_node(erb_node, children, subsequent, end_node, initial_type); + AST_NODE_T* control_node = + create_control_node(erb_node, children, subsequent, end_node, initial_type, context->arena); if (control_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(output_array, control_node); } else { hb_array_free(&children); @@ -1201,10 +1296,15 @@ static size_t process_subsequent_block( index = process_block_children(node, array, index, children, context, parent_type); - AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type); + if (erb_node->analyzed_ruby != NULL) { + free_analyzed_ruby(erb_node->analyzed_ruby); + erb_node->analyzed_ruby = NULL; + } + + AST_NODE_T* subsequent_node = create_control_node(erb_node, children, NULL, NULL, type, context->arena); if (subsequent_node) { - ast_node_free((AST_NODE_T*) erb_node); + // no-op } else { hb_array_free(&children); } @@ -1336,10 +1436,9 @@ hb_array_T* rewrite_node_array(AST_NODE_T* node, hb_array_T* array, analyze_ruby continue; case CONTROL_TYPE_YIELD: { - AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type); + AST_NODE_T* yield_node = create_control_node(erb_node, NULL, NULL, NULL, type, context->arena); if (yield_node) { - ast_node_free((AST_NODE_T*) erb_node); hb_array_append(new_array, yield_node); } else { hb_array_append(new_array, item); @@ -1432,7 +1531,13 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { if (keyword == NULL) { keyword = erb_keyword_from_analyzed_ruby(analyzed); } if (keyword != NULL && !token_value_empty(content_node->tag_closing)) { - append_erb_control_flow_scope_error(keyword, node->location.start, node->location.end, node->errors); + append_erb_control_flow_scope_error( + keyword, + node->location.start, + node->location.end, + node->errors, + context->arena + ); } } } @@ -1440,7 +1545,7 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { if (node->type == AST_ERB_IF_NODE) { const AST_ERB_IF_NODE_T* if_node = (const AST_ERB_IF_NODE_T*) node; - if (if_node->end_node == NULL) { check_erb_node_for_missing_end(node); } + if (if_node->end_node == NULL) { check_erb_node_for_missing_end(node, context->arena); } if (if_node->statements != NULL) { for (size_t i = 0; i < hb_array_size(if_node->statements); i++) { @@ -1465,7 +1570,8 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { keyword, subsequent->location.start, subsequent->location.end, - subsequent->errors + subsequent->errors, + context->arena ); } } @@ -1510,7 +1616,7 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { if (node->type == AST_ERB_UNLESS_NODE || node->type == AST_ERB_WHILE_NODE || node->type == AST_ERB_UNTIL_NODE || node->type == AST_ERB_FOR_NODE || node->type == AST_ERB_CASE_NODE || node->type == AST_ERB_CASE_MATCH_NODE || node->type == AST_ERB_BEGIN_NODE || node->type == AST_ERB_BLOCK_NODE || node->type == AST_ERB_ELSE_NODE) { - check_erb_node_for_missing_end(node); + check_erb_node_for_missing_end(node, context->arena); if (is_loop_node) { context->loop_depth--; } if (is_begin_node) { context->rescue_depth--; } @@ -1534,12 +1640,14 @@ static bool detect_invalid_erb_structures(const AST_NODE_T* node, void* data) { } void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, bool strict) { - herb_visit_node((AST_NODE_T*) document, analyze_erb_content, NULL); + analyze_erb_content_context_T erb_content_context = { .arena = document->arena }; + herb_visit_node((AST_NODE_T*) document, analyze_erb_content, &erb_content_context); analyze_ruby_context_T* context = malloc(sizeof(analyze_ruby_context_T)); context->document = document; context->parent = NULL; context->ruby_context_stack = hb_array_init(8); + context->arena = document->arena; herb_visit_node((AST_NODE_T*) document, transform_erb_nodes, context); herb_transform_conditional_elements(document); @@ -1548,6 +1656,7 @@ void herb_analyze_parse_tree(AST_DOCUMENT_NODE_T* document, const char* source, invalid_erb_context_T* invalid_context = malloc(sizeof(invalid_erb_context_T)); invalid_context->loop_depth = 0; invalid_context->rescue_depth = 0; + invalid_context->arena = document->arena; herb_visit_node((AST_NODE_T*) document, detect_invalid_erb_structures, invalid_context); diff --git a/src/analyze_conditional_elements.c b/src/analyze_conditional_elements.c index 20c3f5bd5..2d8e8552d 100644 --- a/src/analyze_conditional_elements.c +++ b/src/analyze_conditional_elements.c @@ -213,7 +213,14 @@ typedef struct { bool is_if; } conditional_open_tag_T; -static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document_errors) { +typedef struct { + hb_array_T* errors; + hb_arena_T* arena; +} conditional_transform_context_T; + +static void rewrite_conditional_elements(hb_array_T* nodes, conditional_transform_context_T* context) { + hb_array_T* document_errors = context->errors; + hb_arena_T* arena = context->arena; if (!nodes || hb_array_size(nodes) == 0) { return; } if (!document_errors) { return; } @@ -256,7 +263,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document open_node->location.start.line, open_node->location.start.column, open_node->location.start, - open_node->location.end + open_node->location.end, + arena ); hb_array_append(document_errors, multiple_tags_error); @@ -373,7 +381,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document node->location.start.line, node->location.start.column, mismatched_open->open_conditional->location.start, - node->location.end + node->location.end, + arena ); hb_array_append(document_errors, mismatch_error); @@ -409,7 +418,8 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document ELEMENT_SOURCE_HTML, start_position, end_position, - errors + errors, + arena ); free(condition_copy); @@ -488,39 +498,39 @@ static void rewrite_conditional_elements(hb_array_T* nodes, hb_array_T* document static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* data); -static void transform_conditional_elements_in_array(hb_array_T* array, hb_array_T* document_errors) { +static void transform_conditional_elements_in_array(hb_array_T* array, conditional_transform_context_T* context) { if (!array) { return; } for (size_t i = 0; i < hb_array_size(array); i++) { AST_NODE_T* child = (AST_NODE_T*) hb_array_get(array, i); - if (child) { herb_visit_node(child, transform_conditional_elements_visitor, document_errors); } + if (child) { herb_visit_node(child, transform_conditional_elements_visitor, context); } } - rewrite_conditional_elements(array, document_errors); + rewrite_conditional_elements(array, context); } static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* data) { if (!node) { return false; } - hb_array_T* document_errors = (hb_array_T*) data; + conditional_transform_context_T* context = (conditional_transform_context_T*) data; switch (node->type) { case AST_DOCUMENT_NODE: { AST_DOCUMENT_NODE_T* doc = (AST_DOCUMENT_NODE_T*) node; - transform_conditional_elements_in_array(doc->children, document_errors); + transform_conditional_elements_in_array(doc->children, context); return false; } case AST_HTML_ELEMENT_NODE: { AST_HTML_ELEMENT_NODE_T* element = (AST_HTML_ELEMENT_NODE_T*) node; - transform_conditional_elements_in_array(element->body, document_errors); + transform_conditional_elements_in_array(element->body, context); return false; } case AST_ERB_IF_NODE: { AST_ERB_IF_NODE_T* if_node = (AST_ERB_IF_NODE_T*) node; - transform_conditional_elements_in_array(if_node->statements, document_errors); + transform_conditional_elements_in_array(if_node->statements, context); if (if_node->subsequent) { herb_visit_node(if_node->subsequent, transform_conditional_elements_visitor, data); } @@ -529,13 +539,13 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_ELSE_NODE: { AST_ERB_ELSE_NODE_T* else_node = (AST_ERB_ELSE_NODE_T*) node; - transform_conditional_elements_in_array(else_node->statements, document_errors); + transform_conditional_elements_in_array(else_node->statements, context); return false; } case AST_ERB_UNLESS_NODE: { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; - transform_conditional_elements_in_array(unless_node->statements, document_errors); + transform_conditional_elements_in_array(unless_node->statements, context); if (unless_node->else_clause) { herb_visit_node((AST_NODE_T*) unless_node->else_clause, transform_conditional_elements_visitor, data); @@ -546,31 +556,31 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_BLOCK_NODE: { AST_ERB_BLOCK_NODE_T* block_node = (AST_ERB_BLOCK_NODE_T*) node; - transform_conditional_elements_in_array(block_node->body, document_errors); + transform_conditional_elements_in_array(block_node->body, context); return false; } case AST_ERB_WHILE_NODE: { AST_ERB_WHILE_NODE_T* while_node = (AST_ERB_WHILE_NODE_T*) node; - transform_conditional_elements_in_array(while_node->statements, document_errors); + transform_conditional_elements_in_array(while_node->statements, context); return false; } case AST_ERB_UNTIL_NODE: { AST_ERB_UNTIL_NODE_T* until_node = (AST_ERB_UNTIL_NODE_T*) node; - transform_conditional_elements_in_array(until_node->statements, document_errors); + transform_conditional_elements_in_array(until_node->statements, context); return false; } case AST_ERB_FOR_NODE: { AST_ERB_FOR_NODE_T* for_node = (AST_ERB_FOR_NODE_T*) node; - transform_conditional_elements_in_array(for_node->statements, document_errors); + transform_conditional_elements_in_array(for_node->statements, context); return false; } case AST_ERB_CASE_NODE: { AST_ERB_CASE_NODE_T* case_node = (AST_ERB_CASE_NODE_T*) node; - transform_conditional_elements_in_array(case_node->children, document_errors); + transform_conditional_elements_in_array(case_node->children, context); for (size_t i = 0; i < hb_array_size(case_node->conditions); i++) { AST_NODE_T* when = (AST_NODE_T*) hb_array_get(case_node->conditions, i); @@ -586,13 +596,13 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_WHEN_NODE: { AST_ERB_WHEN_NODE_T* when_node = (AST_ERB_WHEN_NODE_T*) node; - transform_conditional_elements_in_array(when_node->statements, document_errors); + transform_conditional_elements_in_array(when_node->statements, context); return false; } case AST_ERB_BEGIN_NODE: { AST_ERB_BEGIN_NODE_T* begin_node = (AST_ERB_BEGIN_NODE_T*) node; - transform_conditional_elements_in_array(begin_node->statements, document_errors); + transform_conditional_elements_in_array(begin_node->statements, context); if (begin_node->rescue_clause) { herb_visit_node((AST_NODE_T*) begin_node->rescue_clause, transform_conditional_elements_visitor, data); @@ -611,7 +621,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_RESCUE_NODE: { AST_ERB_RESCUE_NODE_T* rescue_node = (AST_ERB_RESCUE_NODE_T*) node; - transform_conditional_elements_in_array(rescue_node->statements, document_errors); + transform_conditional_elements_in_array(rescue_node->statements, context); if (rescue_node->subsequent) { herb_visit_node((AST_NODE_T*) rescue_node->subsequent, transform_conditional_elements_visitor, data); @@ -622,7 +632,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* case AST_ERB_ENSURE_NODE: { AST_ERB_ENSURE_NODE_T* ensure_node = (AST_ERB_ENSURE_NODE_T*) node; - transform_conditional_elements_in_array(ensure_node->statements, document_errors); + transform_conditional_elements_in_array(ensure_node->statements, context); return false; } @@ -631,5 +641,7 @@ static bool transform_conditional_elements_visitor(const AST_NODE_T* node, void* } void herb_transform_conditional_elements(AST_DOCUMENT_NODE_T* document) { - herb_visit_node((AST_NODE_T*) document, transform_conditional_elements_visitor, document->base.errors); + conditional_transform_context_T context = { .errors = document->base.errors, .arena = document->arena }; + + herb_visit_node((AST_NODE_T*) document, transform_conditional_elements_visitor, &context); } diff --git a/src/analyze_conditional_open_tags.c b/src/analyze_conditional_open_tags.c index b6b002d1d..c151c6de8 100644 --- a/src/analyze_conditional_open_tags.c +++ b/src/analyze_conditional_open_tags.c @@ -12,8 +12,13 @@ #include #include +typedef struct { + hb_array_T* errors; + hb_arena_T* arena; +} conditional_open_tags_context_T; + static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void* data); -static void transform_conditional_open_tags_in_array(hb_array_T* array, hb_array_T* document_errors); +static void transform_conditional_open_tags_in_array(hb_array_T* array, conditional_open_tags_context_T* context); static bool is_non_void_open_tag(AST_NODE_T* node) { if (!node || node->type != AST_HTML_OPEN_TAG_NODE) { return false; } @@ -194,14 +199,19 @@ static token_T* get_first_branch_tag_name_token_unless(AST_ERB_UNLESS_NODE_T* un return result.tag ? result.tag->tag_name : NULL; } -static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_OPEN_TAG_NODE_T* second_tag) { +static void add_multiple_tags_error_to_erb_node( + AST_NODE_T* erb_node, + AST_HTML_OPEN_TAG_NODE_T* second_tag, + hb_arena_T* arena +) { if (!erb_node || !second_tag) { return; } CONDITIONAL_ELEMENT_MULTIPLE_TAGS_ERROR_T* error = conditional_element_multiple_tags_error_init( second_tag->base.location.start.line, second_tag->base.location.start.column, erb_node->location.start, - erb_node->location.end + erb_node->location.end, + arena ); if (!erb_node->errors) { erb_node->errors = hb_array_init(1); } @@ -209,13 +219,13 @@ static void add_multiple_tags_error_to_erb_node(AST_NODE_T* erb_node, AST_HTML_O hb_array_append(erb_node->errors, error); } -static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { +static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node, hb_arena_T* arena) { if (!if_node || !if_node->subsequent) { return; } single_open_tag_result_T if_result = get_single_open_tag_from_statements(if_node->statements); if (if_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) if_node, if_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) if_node, if_result.second_tag, arena); return; } @@ -243,7 +253,7 @@ static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { single_open_tag_result_T branch_result = get_single_open_tag_from_statements(branch_statements); if (branch_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node(current, branch_result.second_tag); + add_multiple_tags_error_to_erb_node(current, branch_result.second_tag, arena); return; } if (!branch_result.tag) { return; } @@ -254,13 +264,13 @@ static void check_and_report_multiple_tags_in_if(AST_ERB_IF_NODE_T* if_node) { (void) ends_with_else; } -static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unless_node) { +static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unless_node, hb_arena_T* arena) { if (!unless_node || !unless_node->else_clause) { return; } single_open_tag_result_T unless_result = get_single_open_tag_from_statements(unless_node->statements); if (unless_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node, unless_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node, unless_result.second_tag, arena); return; } @@ -269,13 +279,13 @@ static void check_and_report_multiple_tags_in_unless(AST_ERB_UNLESS_NODE_T* unle single_open_tag_result_T else_result = get_single_open_tag_from_statements(unless_node->else_clause->statements); if (else_result.has_multiple_tags) { - add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node->else_clause, else_result.second_tag); + add_multiple_tags_error_to_erb_node((AST_NODE_T*) unless_node->else_clause, else_result.second_tag, arena); return; } } -static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* document_errors) { - (void) document_errors; +static void rewrite_conditional_open_tags(hb_array_T* nodes, conditional_open_tags_context_T* context) { + hb_arena_T* arena = context->arena; if (!nodes || hb_array_size(nodes) == 0) { return; } @@ -297,7 +307,7 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen conditional_node = node; tag_name_token = get_first_branch_tag_name_token(if_node); } else { - check_and_report_multiple_tags_in_if(if_node); + check_and_report_multiple_tags_in_if(if_node, arena); } } else if (node->type == AST_ERB_UNLESS_NODE) { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; @@ -307,7 +317,7 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen conditional_node = node; tag_name_token = get_first_branch_tag_name_token_unless(unless_node); } else { - check_and_report_multiple_tags_in_unless(unless_node); + check_and_report_multiple_tags_in_unless(unless_node, arena); } } @@ -336,7 +346,8 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen false, conditional_node->location.start, conditional_node->location.end, - conditional_open_tag_errors + conditional_open_tag_errors, + arena ); hb_array_T* element_errors = hb_array_init(1); @@ -350,7 +361,8 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen ELEMENT_SOURCE_HTML, start_position, end_position, - element_errors + element_errors, + arena ); hb_array_set(nodes, i, element); @@ -400,44 +412,44 @@ static void rewrite_conditional_open_tags(hb_array_T* nodes, hb_array_T* documen hb_array_free(&consumed_indices); } -static void transform_conditional_open_tags_in_array(hb_array_T* array, hb_array_T* document_errors) { +static void transform_conditional_open_tags_in_array(hb_array_T* array, conditional_open_tags_context_T* context) { if (!array) { return; } for (size_t i = 0; i < hb_array_size(array); i++) { AST_NODE_T* child = (AST_NODE_T*) hb_array_get(array, i); - if (child) { herb_visit_node(child, transform_conditional_open_tags_visitor, document_errors); } + if (child) { herb_visit_node(child, transform_conditional_open_tags_visitor, context); } } - rewrite_conditional_open_tags(array, document_errors); + rewrite_conditional_open_tags(array, context); } static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void* data) { if (!node) { return false; } - hb_array_T* document_errors = (hb_array_T*) data; + conditional_open_tags_context_T* context = (conditional_open_tags_context_T*) data; switch (node->type) { case AST_DOCUMENT_NODE: { AST_DOCUMENT_NODE_T* doc = (AST_DOCUMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(doc->children, document_errors); + transform_conditional_open_tags_in_array(doc->children, context); return false; } case AST_HTML_ELEMENT_NODE: { AST_HTML_ELEMENT_NODE_T* element = (AST_HTML_ELEMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(element->body, document_errors); + transform_conditional_open_tags_in_array(element->body, context); return false; } case AST_HTML_CONDITIONAL_ELEMENT_NODE: { AST_HTML_CONDITIONAL_ELEMENT_NODE_T* conditional = (AST_HTML_CONDITIONAL_ELEMENT_NODE_T*) node; - transform_conditional_open_tags_in_array(conditional->body, document_errors); + transform_conditional_open_tags_in_array(conditional->body, context); return false; } case AST_ERB_IF_NODE: { AST_ERB_IF_NODE_T* if_node = (AST_ERB_IF_NODE_T*) node; - transform_conditional_open_tags_in_array(if_node->statements, document_errors); + transform_conditional_open_tags_in_array(if_node->statements, context); if (if_node->subsequent) { herb_visit_node(if_node->subsequent, transform_conditional_open_tags_visitor, data); } @@ -446,13 +458,13 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_ELSE_NODE: { AST_ERB_ELSE_NODE_T* else_node = (AST_ERB_ELSE_NODE_T*) node; - transform_conditional_open_tags_in_array(else_node->statements, document_errors); + transform_conditional_open_tags_in_array(else_node->statements, context); return false; } case AST_ERB_UNLESS_NODE: { AST_ERB_UNLESS_NODE_T* unless_node = (AST_ERB_UNLESS_NODE_T*) node; - transform_conditional_open_tags_in_array(unless_node->statements, document_errors); + transform_conditional_open_tags_in_array(unless_node->statements, context); if (unless_node->else_clause) { herb_visit_node((AST_NODE_T*) unless_node->else_clause, transform_conditional_open_tags_visitor, data); @@ -462,31 +474,31 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_BLOCK_NODE: { AST_ERB_BLOCK_NODE_T* block_node = (AST_ERB_BLOCK_NODE_T*) node; - transform_conditional_open_tags_in_array(block_node->body, document_errors); + transform_conditional_open_tags_in_array(block_node->body, context); return false; } case AST_ERB_WHILE_NODE: { AST_ERB_WHILE_NODE_T* while_node = (AST_ERB_WHILE_NODE_T*) node; - transform_conditional_open_tags_in_array(while_node->statements, document_errors); + transform_conditional_open_tags_in_array(while_node->statements, context); return false; } case AST_ERB_UNTIL_NODE: { AST_ERB_UNTIL_NODE_T* until_node = (AST_ERB_UNTIL_NODE_T*) node; - transform_conditional_open_tags_in_array(until_node->statements, document_errors); + transform_conditional_open_tags_in_array(until_node->statements, context); return false; } case AST_ERB_FOR_NODE: { AST_ERB_FOR_NODE_T* for_node = (AST_ERB_FOR_NODE_T*) node; - transform_conditional_open_tags_in_array(for_node->statements, document_errors); + transform_conditional_open_tags_in_array(for_node->statements, context); return false; } case AST_ERB_CASE_NODE: { AST_ERB_CASE_NODE_T* case_node = (AST_ERB_CASE_NODE_T*) node; - transform_conditional_open_tags_in_array(case_node->children, document_errors); + transform_conditional_open_tags_in_array(case_node->children, context); for (size_t i = 0; i < hb_array_size(case_node->conditions); i++) { AST_NODE_T* when_node = (AST_NODE_T*) hb_array_get(case_node->conditions, i); @@ -502,7 +514,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_CASE_MATCH_NODE: { AST_ERB_CASE_MATCH_NODE_T* case_match_node = (AST_ERB_CASE_MATCH_NODE_T*) node; - transform_conditional_open_tags_in_array(case_match_node->children, document_errors); + transform_conditional_open_tags_in_array(case_match_node->children, context); for (size_t i = 0; i < hb_array_size(case_match_node->conditions); i++) { AST_NODE_T* in_node = (AST_NODE_T*) hb_array_get(case_match_node->conditions, i); @@ -518,19 +530,19 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_WHEN_NODE: { AST_ERB_WHEN_NODE_T* when_node = (AST_ERB_WHEN_NODE_T*) node; - transform_conditional_open_tags_in_array(when_node->statements, document_errors); + transform_conditional_open_tags_in_array(when_node->statements, context); return false; } case AST_ERB_IN_NODE: { AST_ERB_IN_NODE_T* in_node = (AST_ERB_IN_NODE_T*) node; - transform_conditional_open_tags_in_array(in_node->statements, document_errors); + transform_conditional_open_tags_in_array(in_node->statements, context); return false; } case AST_ERB_BEGIN_NODE: { AST_ERB_BEGIN_NODE_T* begin_node = (AST_ERB_BEGIN_NODE_T*) node; - transform_conditional_open_tags_in_array(begin_node->statements, document_errors); + transform_conditional_open_tags_in_array(begin_node->statements, context); if (begin_node->rescue_clause) { herb_visit_node((AST_NODE_T*) begin_node->rescue_clause, transform_conditional_open_tags_visitor, data); @@ -549,7 +561,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_RESCUE_NODE: { AST_ERB_RESCUE_NODE_T* rescue_node = (AST_ERB_RESCUE_NODE_T*) node; - transform_conditional_open_tags_in_array(rescue_node->statements, document_errors); + transform_conditional_open_tags_in_array(rescue_node->statements, context); if (rescue_node->subsequent) { herb_visit_node((AST_NODE_T*) rescue_node->subsequent, transform_conditional_open_tags_visitor, data); @@ -560,7 +572,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void case AST_ERB_ENSURE_NODE: { AST_ERB_ENSURE_NODE_T* ensure_node = (AST_ERB_ENSURE_NODE_T*) node; - transform_conditional_open_tags_in_array(ensure_node->statements, document_errors); + transform_conditional_open_tags_in_array(ensure_node->statements, context); return false; } @@ -569,5 +581,7 @@ static bool transform_conditional_open_tags_visitor(const AST_NODE_T* node, void } void herb_transform_conditional_open_tags(AST_DOCUMENT_NODE_T* document) { - herb_visit_node((AST_NODE_T*) document, transform_conditional_open_tags_visitor, document->base.errors); + conditional_open_tags_context_T context = { .errors = document->base.errors, .arena = document->arena }; + + herb_visit_node((AST_NODE_T*) document, transform_conditional_open_tags_visitor, &context); } diff --git a/src/ast_node.c b/src/ast_node.c index be8dcd6ca..688802ba6 100644 --- a/src/ast_node.c +++ b/src/ast_node.c @@ -27,12 +27,12 @@ void ast_node_init(AST_NODE_T* node, const ast_node_type_T type, position_T star } } -AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) { - AST_LITERAL_NODE_T* literal = malloc(sizeof(AST_LITERAL_NODE_T)); +AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token, hb_arena_T* arena) { + AST_LITERAL_NODE_T* literal = hb_arena_alloc(arena, sizeof(AST_LITERAL_NODE_T)); ast_node_init(&literal->base, AST_LITERAL_NODE, token->location.start, token->location.end, NULL); - literal->content = herb_strdup(token->value); + literal->content = hb_arena_strdup(arena, token->value); return literal; } diff --git a/src/extract.c b/src/extract.c index 0c67cd6d7..3a8c4dfc3 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1,5 +1,7 @@ #include "include/herb.h" #include "include/io.h" +#include "include/macros.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/util/string.h" @@ -19,7 +21,24 @@ void herb_extract_ruby_to_buffer_with_options( ) { herb_extract_ruby_options_T extract_options = options ? *options : HERB_EXTRACT_RUBY_DEFAULT_OPTIONS; - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; bool skip_erb_content = false; bool is_comment_tag = false; bool is_erb_comment_tag = false; @@ -136,7 +155,7 @@ void herb_extract_ruby_to_buffer_with_options( } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { @@ -144,7 +163,24 @@ void herb_extract_ruby_to_buffer(const char* source, hb_buffer_T* output) { } void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } + + hb_array_T* tokens = result->tokens; for (size_t i = 0; i < hb_array_size(tokens); i++) { const token_T* token = hb_array_get(tokens, i); @@ -157,7 +193,7 @@ void herb_extract_html_to_buffer(const char* source, hb_buffer_T* output) { } } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); } char* herb_extract_ruby_with_semicolons(const char* source) { diff --git a/src/herb.c b/src/herb.c index cd07317fc..321c3f040 100644 --- a/src/herb.c +++ b/src/herb.c @@ -2,8 +2,10 @@ #include "include/analyze.h" #include "include/io.h" #include "include/lexer.h" +#include "include/macros.h" #include "include/parser.h" #include "include/token.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" #include "include/util/hb_buffer.h" #include "include/version.h" @@ -11,9 +13,11 @@ #include #include -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source) { +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena) { + if (!arena) { return NULL; } + lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); token_T* token = NULL; hb_array_T* tokens = hb_array_init(128); @@ -24,14 +28,28 @@ HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source) { hb_array_append(tokens, token); - return tokens; + herb_lex_result_T* result = malloc(sizeof(herb_lex_result_T)); + if (!result) { + hb_array_free(&tokens); + return NULL; + } + + result->tokens = tokens; + result->arena = arena; + + return result; } -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options) { +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse( + const char* source, + const parser_options_T* options, + hb_arena_T* arena +) { if (!source) { source = ""; } + if (!arena) { return NULL; } lexer_T lexer = { 0 }; - lexer_init(&lexer, source); + lexer_init(&lexer, source, arena); parser_T parser = { 0 }; parser_options_T parser_options = HERB_DEFAULT_PARSER_OPTIONS; @@ -49,20 +67,34 @@ HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const return document; } -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex_file(const char* path) { +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena) { char* source = herb_read_file(path); - hb_array_T* tokens = herb_lex(source); + herb_lex_result_T* result = herb_lex(source, arena); free(source); - return tokens; + return result; } HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* output) { - hb_array_T* tokens = herb_lex(source); + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + if (!arena) { return; } + + if (!hb_arena_init(arena, KB(512))) { + free(arena); + return; + } + + herb_lex_result_T* result = herb_lex(source, arena); + + if (!result) { + hb_arena_free(arena); + free(arena); + return; + } - for (size_t i = 0; i < hb_array_size(tokens); i++) { - token_T* token = hb_array_get(tokens, i); + for (size_t i = 0; i < hb_array_size(result->tokens); i++) { + token_T* token = hb_array_get(result->tokens, i); hb_string_T type = token_to_string(token); hb_buffer_append_string(output, type); @@ -71,7 +103,23 @@ HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* hb_buffer_append(output, "\n"); } - herb_free_tokens(&tokens); + herb_free_lex_result(&result); +} + +void herb_free_lex_result(herb_lex_result_T** result) { + if (!result || !*result) { return; } + + herb_lex_result_T* r = *result; + + if (r->tokens) { hb_array_free(&r->tokens); } + + if (r->arena) { + hb_arena_free(r->arena); + free(r->arena); + } + + free(r); + *result = NULL; } HERB_EXPORTED_FUNCTION void herb_free_tokens(hb_array_T** tokens) { diff --git a/src/include/analyze.h b/src/include/analyze.h index ac8d7e73b..b9b945695 100644 --- a/src/include/analyze.h +++ b/src/include/analyze.h @@ -9,6 +9,7 @@ typedef struct ANALYZE_RUBY_CONTEXT_STRUCT { AST_DOCUMENT_NODE_T* document; AST_NODE_T* parent; hb_array_T* ruby_context_stack; + hb_arena_T* arena; } analyze_ruby_context_T; typedef enum { @@ -36,6 +37,7 @@ typedef enum { typedef struct { int loop_depth; int rescue_depth; + hb_arena_T* arena; } invalid_erb_context_T; void herb_analyze_parse_errors(AST_DOCUMENT_NODE_T* document, const char* source); diff --git a/src/include/analyze_helpers.h b/src/include/analyze_helpers.h index 8c4f94c61..285ab2756 100644 --- a/src/include/analyze_helpers.h +++ b/src/include/analyze_helpers.h @@ -6,6 +6,7 @@ #include "analyzed_ruby.h" #include "ast_node.h" +#include "util/hb_arena.h" bool has_if_node(analyzed_ruby_T* analyzed); bool has_elsif_node(analyzed_ruby_T* analyzed); @@ -58,6 +59,6 @@ bool search_unexpected_in_nodes(analyzed_ruby_T* analyzed); bool search_unexpected_rescue_nodes(analyzed_ruby_T* analyzed); bool search_unexpected_when_nodes(analyzed_ruby_T* analyzed); -void check_erb_node_for_missing_end(const AST_NODE_T* node); +void check_erb_node_for_missing_end(const AST_NODE_T* node, hb_arena_T* arena); #endif diff --git a/src/include/ast_node.h b/src/include/ast_node.h index 98b3801b0..2bfb59b81 100644 --- a/src/include/ast_node.h +++ b/src/include/ast_node.h @@ -5,11 +5,12 @@ #include "errors.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" void ast_node_init(AST_NODE_T* node, ast_node_type_T type, position_T start, position_T end, hb_array_T* errors); void ast_node_free(AST_NODE_T* node); -AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token); +AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token, hb_arena_T* arena); size_t ast_node_sizeof(void); size_t ast_node_child_count(AST_NODE_T* node); diff --git a/src/include/herb.h b/src/include/herb.h index 3e6876dd7..bd120cfd5 100644 --- a/src/include/herb.h +++ b/src/include/herb.h @@ -5,6 +5,7 @@ #include "extract.h" #include "macros.h" #include "parser.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" @@ -14,16 +15,26 @@ extern "C" { #endif +typedef struct { + hb_array_T* tokens; + hb_arena_T* arena; +} herb_lex_result_T; + HERB_EXPORTED_FUNCTION void herb_lex_to_buffer(const char* source, hb_buffer_T* output); -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex(const char* source); -HERB_EXPORTED_FUNCTION hb_array_T* herb_lex_file(const char* path); +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex(const char* source, hb_arena_T* arena); +HERB_EXPORTED_FUNCTION herb_lex_result_T* herb_lex_file(const char* path, hb_arena_T* arena); -HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse(const char* source, const parser_options_T* options); +HERB_EXPORTED_FUNCTION AST_DOCUMENT_NODE_T* herb_parse( + const char* source, + const parser_options_T* options, + hb_arena_T* arena +); HERB_EXPORTED_FUNCTION const char* herb_version(void); HERB_EXPORTED_FUNCTION const char* herb_prism_version(void); +void herb_free_lex_result(herb_lex_result_T** result); HERB_EXPORTED_FUNCTION void herb_free_tokens(hb_array_T** tokens); #ifdef __cplusplus diff --git a/src/include/lexer.h b/src/include/lexer.h index 142f3fb1c..adb13935e 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -3,8 +3,9 @@ #include "lexer_struct.h" #include "token_struct.h" +#include "util/hb_arena.h" -void lexer_init(lexer_T* lexer, const char* source); +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena); token_T* lexer_next_token(lexer_T* lexer); token_T* lexer_error(lexer_T* lexer, const char* message); diff --git a/src/include/lexer_struct.h b/src/include/lexer_struct.h index 94b132559..46e6a492a 100644 --- a/src/include/lexer_struct.h +++ b/src/include/lexer_struct.h @@ -1,6 +1,7 @@ #ifndef HERB_LEXER_STRUCT_H #define HERB_LEXER_STRUCT_H +#include "util/hb_arena.h" #include "util/hb_string.h" #include @@ -29,6 +30,8 @@ typedef struct LEXER_STRUCT { uint32_t stall_counter; uint32_t last_position; bool stalled; + + hb_arena_T* arena; } lexer_T; #endif diff --git a/src/include/parser.h b/src/include/parser.h index 81b34d8d2..f1b276d68 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -3,6 +3,7 @@ #include "ast_node.h" #include "lexer.h" +#include "util/hb_arena.h" #include "util/hb_array.h" typedef enum { @@ -24,6 +25,7 @@ typedef struct PARSER_OPTIONS_STRUCT { typedef struct MATCH_TAGS_CONTEXT_STRUCT { hb_array_T* errors; bool strict; + hb_arena_T* arena; } match_tags_context_T; extern const parser_options_T HERB_DEFAULT_PARSER_OPTIONS; @@ -37,6 +39,7 @@ typedef struct PARSER_STRUCT { parser_options_T options; size_t consecutive_error_count; bool in_recovery_mode; + hb_arena_T* arena; } parser_T; size_t parser_sizeof(void); @@ -48,7 +51,7 @@ AST_DOCUMENT_NODE_T* herb_parser_parse(parser_T* parser); void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document, bool strict); void herb_parser_deinit(parser_T* parser); -void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict); +void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena); bool match_tags_visitor(const AST_NODE_T* node, void* data); #endif diff --git a/src/include/parser_helpers.h b/src/include/parser_helpers.h index 537f7a5bc..b0b90c6c8 100644 --- a/src/include/parser_helpers.h +++ b/src/include/parser_helpers.h @@ -2,9 +2,7 @@ #define HERB_PARSER_HELPERS_H #include "ast_nodes.h" -#include "errors.h" #include "parser.h" -#include "token.h" #include "util/hb_array.h" #include "util/hb_buffer.h" #include "util/hb_string.h" @@ -44,6 +42,7 @@ token_T* parser_consume_if_present(parser_T* parser, token_type_T type); token_T* parser_consume_expected(parser_T* parser, token_type_T type, hb_array_T* array); AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors diff --git a/src/include/token.h b/src/include/token.h index 9680bbf6f..34b8850d5 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -2,15 +2,20 @@ #define HERB_TOKEN_H #include "lexer_struct.h" -#include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" #include "util/hb_string.h" token_T* token_init(hb_string_T value, token_type_T type, lexer_T* lexer); hb_string_T token_to_string(const token_T* token); const char* token_type_to_string(token_type_T type); -token_T* token_copy(token_T* token); +char* token_value(const token_T* token); +int token_type(const token_T* token); + +size_t token_sizeof(void); + +token_T* token_copy(token_T* token, hb_arena_T* arena); void token_free(token_T* token); diff --git a/src/include/token_struct.h b/src/include/token_struct.h index 8ff44f04c..b6b9cd5a7 100644 --- a/src/include/token_struct.h +++ b/src/include/token_struct.h @@ -1,6 +1,8 @@ #ifndef HERB_TOKEN_STRUCT_H #define HERB_TOKEN_STRUCT_H +#include + #include "location.h" #include "range.h" @@ -54,6 +56,7 @@ typedef struct TOKEN_STRUCT { range_T range; location_T location; token_type_T type; + bool arena_allocated; } token_T; #endif diff --git a/src/include/util/hb_arena.h b/src/include/util/hb_arena.h index 8c89be6d5..8333781ca 100644 --- a/src/include/util/hb_arena.h +++ b/src/include/util/hb_arena.h @@ -22,6 +22,8 @@ typedef struct HB_ARENA_STRUCT { bool hb_arena_init(hb_arena_T* allocator, size_t initial_size); void* hb_arena_alloc(hb_arena_T* allocator, size_t size); +char* hb_arena_strdup(hb_arena_T* allocator, const char* string); +char* hb_arena_strndup(hb_arena_T* allocator, const char* string, size_t length); size_t hb_arena_position(hb_arena_T* allocator); size_t hb_arena_capacity(hb_arena_T* allocator); void hb_arena_reset(hb_arena_T* allocator); diff --git a/src/lexer.c b/src/lexer.c index 60a0c6194..0e7be6b35 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -30,7 +30,7 @@ static bool lexer_stalled(lexer_T* lexer) { return lexer->stalled; } -void lexer_init(lexer_T* lexer, const char* source) { +void lexer_init(lexer_T* lexer, const char* source, hb_arena_T* arena) { if (source != NULL) { lexer->source = hb_string(source); } else { @@ -51,6 +51,8 @@ void lexer_init(lexer_T* lexer, const char* source) { lexer->stall_counter = 0; lexer->last_position = 0; lexer->stalled = false; + + lexer->arena = arena; } token_T* lexer_error(lexer_T* lexer, const char* message) { diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 3b4862a3b..5a07f1897 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -2,7 +2,6 @@ #include "include/lexer.h" #include "include/lexer_struct.h" #include "include/macros.h" -#include "include/token.h" #include "include/util/hb_string.h" #include @@ -92,14 +91,11 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok token_T* token = lexer_next_token(lexer); while (token && (token->type == TOKEN_WHITESPACE || token->type == TOKEN_NEWLINE)) { - token_free(token); token = lexer_next_token(lexer); } bool result = (token && token->type == token_type); - if (token) { token_free(token); } - lexer->current_position = saved_position; lexer->current_line = saved_line; lexer->current_column = saved_column; diff --git a/src/main.c b/src/main.c index b45caa5d7..8bb48d595 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,11 @@ #include "include/extract.h" #include "include/herb.h" #include "include/io.h" +#include "include/macros.h" #include "include/ruby_parser.h" +#include "include/token.h" +#include "include/util/hb_arena.h" +#include "include/util/hb_arena_debug.h" #include "include/util/hb_buffer.h" #include "include/util/string.h" @@ -33,6 +37,23 @@ void print_time_diff(const struct timespec start, const struct timespec end, con printf(" %8.6f s\n\n", s); } +static hb_arena_T* allocate_arena(void) { + hb_arena_T* arena = malloc(sizeof(hb_arena_T)); + + if (!arena) { + fprintf(stderr, "Failed to allocate arena\n"); + return NULL; + } + + if (!hb_arena_init(arena, KB(16))) { + fprintf(stderr, "Failed to initialize arena\n"); + free(arena); + return NULL; + } + + return arena; +} + int main(const int argc, char* argv[]) { if (argc < 2) { puts("./herb [command] [options]\n"); @@ -62,13 +83,59 @@ int main(const int argc, char* argv[]) { struct timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); + if (string_equals(argv[1], "visit")) { + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); + clock_gettime(CLOCK_MONOTONIC, &end); + + ast_pretty_print_node((AST_NODE_T*) root, 0, 0, &output); + printf("%s\n", output.value); + + print_time_diff(start, end, "visiting"); + + ast_node_free((AST_NODE_T*) root); + free(output.value); + free(source); + + return 0; + } + if (string_equals(argv[1], "lex")) { - herb_lex_to_buffer(source, &output); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + herb_lex_result_T* result = herb_lex(source, arena); clock_gettime(CLOCK_MONOTONIC, &end); - puts(output.value); - print_time_diff(start, end, "lexing"); + int silent = 0; + if (argc > 3 && string_equals(argv[3], "--silent")) { silent = 1; } + + if (!silent) { + for (size_t i = 0; i < hb_array_size(result->tokens); i++) { + token_T* token = hb_array_get(result->tokens, i); + hb_string_T type = token_to_string(token); + hb_buffer_append_string(&output, type); + free(type.data); + hb_buffer_append(&output, "\n"); + } + + puts(output.value); + print_time_diff(start, end, "lexing"); + + printf("\n"); + hb_arena_print_stats(arena); + } + hb_arena_free(arena); + free(arena); free(output.value); free(source); @@ -76,7 +143,13 @@ int main(const int argc, char* argv[]) { } if (string_equals(argv[1], "parse")) { - AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL); + hb_arena_T* arena = allocate_arena(); + if (!arena) { + free(source); + return 1; + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source, NULL, arena); clock_gettime(CLOCK_MONOTONIC, &end); @@ -88,6 +161,9 @@ int main(const int argc, char* argv[]) { puts(output.value); print_time_diff(start, end, "parsing"); + + printf("\n"); + hb_arena_print_stats(arena); } ast_node_free((AST_NODE_T*) root); diff --git a/src/parser.c b/src/parser.c index b415bb5a3..9683d5066 100644 --- a/src/parser.c +++ b/src/parser.c @@ -48,6 +48,7 @@ void herb_parser_init(parser_T* parser, lexer_T* lexer, parser_options_T options parser->options = options; parser->consecutive_error_count = 0; parser->in_recovery_mode = false; + parser->arena = lexer->arena; } static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { @@ -82,7 +83,8 @@ static AST_CDATA_NODE_T* parser_parse_cdata(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(content.value); @@ -128,7 +130,8 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { comment_end, comment_end->location.start, comment_end->location.end, - errors + errors, + parser->arena ); } else { comment_end = parser_consume_expected(parser, TOKEN_HTML_COMMENT_END, errors); @@ -140,7 +143,8 @@ static AST_HTML_COMMENT_NODE_T* parser_parse_html_comment(parser_T* parser) { comment_end, comment_start->location.start, comment_end->location.end, - errors + errors, + parser->arena ); free(comment.value); @@ -185,7 +189,8 @@ static AST_HTML_DOCTYPE_NODE_T* parser_parse_html_doctype(parser_T* parser) { tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -232,7 +237,8 @@ static AST_XML_DECLARATION_NODE_T* parser_parse_xml_declaration(parser_T* parser tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -267,7 +273,8 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra token->value, token->location.start, token->location.end, - document_errors + document_errors, + parser->arena ); token_free(token); @@ -285,10 +292,15 @@ static AST_HTML_TEXT_NODE_T* parser_parse_text_content(parser_T* parser, hb_arra AST_HTML_TEXT_NODE_T* text_node = NULL; if (hb_buffer_length(&content) > 0) { - text_node = - ast_html_text_node_init(hb_buffer_value(&content), start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init( + hb_buffer_value(&content), + start, + parser->current_token->location.start, + errors, + parser->arena + ); } else { - text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors); + text_node = ast_html_text_node_init("", start, parser->current_token->location.start, errors, parser->arena); } free(content.value); @@ -355,7 +367,7 @@ static AST_HTML_ATTRIBUTE_NAME_NODE_T* parser_parse_html_attribute_name(parser_T } AST_HTML_ATTRIBUTE_NAME_NODE_T* attribute_name = - ast_html_attribute_name_node_init(children, node_start, node_end, errors); + ast_html_attribute_name_node_init(children, node_start, node_end, errors, parser->arena); free(buffer.value); @@ -410,7 +422,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -423,7 +436,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -453,7 +467,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); parser_append_literal_node_from_buffer(parser, &buffer, children, start); @@ -466,7 +481,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -527,7 +543,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value opening_quote->value, potential_closing->location.start, potential_closing->location.end, - errors + errors, + parser->arena ); lexer_restore_state(parser->lexer, saved_state); @@ -579,7 +596,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_quoted_html_attribute_value true, opening_quote->location.start, closing_quote->location.end, - errors + errors, + parser->arena ); token_free(opening_quote); @@ -604,14 +622,15 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, erb_node->base.location.start, erb_node->base.location.end, - errors + errors, + parser->arena ); } //
if (token_is(parser, TOKEN_IDENTIFIER)) { token_T* identifier = parser_consume_expected(parser, TOKEN_IDENTIFIER, errors); - AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier); + AST_LITERAL_NODE_T* literal = ast_literal_node_init_from_token(identifier, parser->arena); token_free(identifier); hb_array_append(children, literal); @@ -623,7 +642,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, literal->base.location.start, literal->base.location.end, - errors + errors, + parser->arena ); } @@ -641,11 +661,12 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser "backtick (`)", start, end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = - ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors); + ast_html_attribute_value_node_init(NULL, children, NULL, false, start, end, errors, parser->arena); token_free(token); @@ -658,7 +679,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser token_type_to_string(parser->current_token->type), parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_VALUE_NODE_T* value = ast_html_attribute_value_node_init( @@ -668,7 +690,8 @@ static AST_HTML_ATTRIBUTE_VALUE_NODE_T* parser_parse_html_attribute_value(parser false, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); return value; @@ -724,11 +747,13 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) token_free(whitespace); } - token_T* equals_with_whitespace = calloc(1, sizeof(token_T)); + token_T* equals_with_whitespace = hb_arena_alloc(parser->arena, sizeof(token_T)); equals_with_whitespace->type = TOKEN_EQUALS; - equals_with_whitespace->value = herb_strdup(equals_buffer.value); + + equals_with_whitespace->value = hb_arena_strdup(parser->arena, equals_buffer.value); equals_with_whitespace->location = (location_T) { .start = equals_start, .end = equals_end }; equals_with_whitespace->range = (range_T) { .from = range_start, .to = range_end }; + equals_with_whitespace->arena_allocated = true; free(equals_buffer.value); @@ -740,7 +765,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); } else { return ast_html_attribute_node_init( @@ -749,7 +775,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } } else { @@ -769,18 +796,19 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) if (hb_array_size(attribute_name->children) > 0) { AST_LITERAL_NODE_T* first_child = (AST_LITERAL_NODE_T*) hb_array_get(attribute_name->children, 0); - if (first_child && first_child->content) { attribute_name_string = herb_strdup(first_child->content); } + if (first_child && first_child->content) { + attribute_name_string = hb_arena_strdup(parser->arena, first_child->content); + } } append_missing_attribute_value_error( attribute_name_string ? attribute_name_string : "unknown", equals->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); - if (attribute_name_string) { free(attribute_name_string); } - AST_HTML_ATTRIBUTE_VALUE_NODE_T* empty_value = ast_html_attribute_value_node_init( NULL, hb_array_init(8), @@ -788,7 +816,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) false, equals->location.end, parser->current_token->location.start, - errors + errors, + parser->arena ); AST_HTML_ATTRIBUTE_NODE_T* attribute_node = ast_html_attribute_node_init( @@ -797,7 +826,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) empty_value, attribute_name->base.location.start, parser->current_token->location.start, - NULL + NULL, + parser->arena ); token_free(equals); @@ -813,7 +843,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) attribute_value, attribute_name->base.location.start, attribute_value->base.location.end, - NULL + NULL, + parser->arena ); token_free(equals); @@ -827,7 +858,8 @@ static AST_HTML_ATTRIBUTE_NODE_T* parser_parse_html_attribute(parser_T* parser) NULL, attribute_name->base.location.start, attribute_name->base.location.end, - NULL + NULL, + parser->arena ); } @@ -959,7 +991,13 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { while (token_is_none_of(parser, TOKEN_HTML_TAG_END, TOKEN_HTML_TAG_SELF_CLOSE, TOKEN_EOF)) { if (token_is_any_of(parser, TOKEN_HTML_TAG_START, TOKEN_HTML_TAG_START_CLOSE)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors); + append_unclosed_open_tag_error( + tag_name, + tag_name->location.start, + parser->current_token->location.start, + errors, + parser->arena + ); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -969,7 +1007,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { false, tag_start->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1021,7 +1060,13 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { } if (token_is(parser, TOKEN_EOF)) { - append_unclosed_open_tag_error(tag_name, tag_name->location.start, parser->current_token->location.start, errors); + append_unclosed_open_tag_error( + tag_name, + tag_name->location.start, + parser->current_token->location.start, + errors, + parser->arena + ); AST_HTML_OPEN_TAG_NODE_T* open_tag_node = ast_html_open_tag_node_init( tag_start, @@ -1031,7 +1076,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { false, tag_start->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1068,7 +1114,8 @@ static AST_HTML_OPEN_TAG_NODE_T* parser_parse_html_open_tag(parser_T* parser) { is_self_closing, tag_start->location.start, tag_end->location.end, - errors + errors, + parser->arena ); token_free(tag_start); @@ -1102,7 +1149,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) got.data, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); free(expected.data); @@ -1116,7 +1164,8 @@ static AST_HTML_CLOSE_TAG_NODE_T* parser_parse_html_close_tag(parser_T* parser) tag_closing, tag_opening->location.start, tag_closing->location.end, - errors + errors, + parser->arena ); token_free(tag_opening); @@ -1140,7 +1189,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_self_closing_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - NULL + NULL, + parser->arena ); } @@ -1161,7 +1211,9 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( parser_parse_in_data_state(parser, body, errors); } - if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { return parser_handle_missing_close_tag(open_tag, body, errors); } + if (!token_is(parser, TOKEN_HTML_TAG_START_CLOSE)) { + return parser_handle_missing_close_tag(parser, open_tag, body, errors); + } AST_HTML_CLOSE_TAG_NODE_T* close_tag = parser_parse_html_close_tag(parser); @@ -1183,7 +1235,13 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( token_T* unclosed = parser_pop_open_tag(parser); if (unclosed != NULL) { - append_missing_closing_tag_error(unclosed, unclosed->location.start, unclosed->location.end, errors); + append_missing_closing_tag_error( + unclosed, + unclosed->location.start, + unclosed->location.end, + errors, + parser->arena + ); token_free(unclosed); } } @@ -1203,7 +1261,8 @@ static AST_HTML_ELEMENT_NODE_T* parser_parse_html_regular_element( ELEMENT_SOURCE_HTML, open_tag->base.location.start, close_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -1248,7 +1307,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { parser->current_token->location.start.column, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); end_position = parser->current_token->location.start; } else { @@ -1256,7 +1316,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { opening_tag, opening_tag->location.start, parser->current_token->location.start, - errors + errors, + parser->arena ); end_position = parser->current_token->location.start; } @@ -1270,7 +1331,8 @@ static AST_ERB_CONTENT_NODE_T* parser_parse_erb_tag(parser_T* parser) { false, opening_tag->location.start, end_position, - errors + errors, + parser->arena ); token_free(opening_tag); @@ -1481,9 +1543,19 @@ static size_t find_implicit_close_index(hb_array_T* nodes, size_t start_idx, hb_ return hb_array_size(nodes); } -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict); - -static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T* errors, bool strict) { +static hb_array_T* parser_build_elements_from_tags( + hb_array_T* nodes, + hb_array_T* errors, + bool strict, + hb_arena_T* arena +); + +static hb_array_T* parser_build_elements_from_tags( + hb_array_T* nodes, + hb_array_T* errors, + bool strict, + hb_arena_T* arena +) { hb_array_T* result = hb_array_init(hb_array_size(nodes)); for (size_t index = 0; index < hb_array_size(nodes); index++) { @@ -1506,7 +1578,7 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T hb_array_append(body, hb_array_get(nodes, j)); } - hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict); + hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict, arena); hb_array_free(&body); position_T end_position = open_tag->base.location.end; @@ -1524,12 +1596,18 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T end_position, open_tag->base.location.start, open_tag->base.location.end, - element_errors + element_errors, + arena ); } - AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = - ast_html_omitted_close_tag_node_init(open_tag->tag_name, end_position, end_position, hb_array_init(8)); + AST_HTML_OMITTED_CLOSE_TAG_NODE_T* omitted_close_tag = ast_html_omitted_close_tag_node_init( + open_tag->tag_name, + end_position, + end_position, + hb_array_init(8), + arena + ); AST_HTML_ELEMENT_NODE_T* element = ast_html_element_node_init( (AST_NODE_T*) open_tag, @@ -1540,7 +1618,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T ELEMENT_SOURCE_HTML, open_tag->base.location.start, end_position, - element_errors + element_errors, + arena ); hb_array_append(result, element); @@ -1552,7 +1631,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T open_tag->tag_name, open_tag->base.location.start, open_tag->base.location.end, - open_tag->base.errors + open_tag->base.errors, + arena ); } @@ -1567,7 +1647,7 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T hb_array_append(body, hb_array_get(nodes, j)); } - hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict); + hb_array_T* processed_body = parser_build_elements_from_tags(body, errors, strict, arena); hb_array_free(&body); hb_array_T* element_errors = hb_array_init(8); @@ -1581,7 +1661,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T ELEMENT_SOURCE_HTML, open_tag->base.location.start, close_tag->base.location.end, - element_errors + element_errors, + arena ); hb_array_append(result, element); @@ -1597,7 +1678,8 @@ static hb_array_T* parser_build_elements_from_tags(hb_array_T* nodes, hb_array_T close_tag->tag_name, close_tag->base.location.start, close_tag->base.location.end, - close_tag->base.errors + close_tag->base.errors, + arena ); } } @@ -1620,7 +1702,8 @@ static AST_DOCUMENT_NODE_T* parser_parse_document(parser_T* parser) { token_T* eof = parser_consume_expected(parser, TOKEN_EOF, errors); - AST_DOCUMENT_NODE_T* document_node = ast_document_node_init(children, start, eof->location.end, errors); + AST_DOCUMENT_NODE_T* document_node = + ast_document_node_init(children, start, eof->location.end, errors, parser->arena); token_free(eof); @@ -1638,7 +1721,8 @@ static void parser_handle_whitespace(parser_T* parser, token_T* whitespace_token whitespace_token, whitespace_token->location.start, whitespace_token->location.end, - errors + errors, + parser->arena ); hb_array_append(children, whitespace_node); } @@ -1665,10 +1749,10 @@ void herb_parser_deinit(parser_T* parser) { if (parser->open_tags_stack != NULL) { hb_array_free(&parser->open_tags_stack); } } -void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict) { +void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict, hb_arena_T* arena) { if (nodes == NULL || hb_array_size(nodes) == 0) { return; } - hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, strict); + hb_array_T* processed = parser_build_elements_from_tags(nodes, errors, strict, arena); nodes->size = 0; @@ -1678,7 +1762,7 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict hb_array_free(&processed); - match_tags_context_T context = { .errors = errors, .strict = strict }; + match_tags_context_T context = { .errors = errors, .strict = strict, .arena = arena }; for (size_t i = 0; i < hb_array_size(nodes); i++) { AST_NODE_T* node = (AST_NODE_T*) hb_array_get(nodes, i); @@ -1691,5 +1775,5 @@ void match_tags_in_node_array(hb_array_T* nodes, hb_array_T* errors, bool strict void herb_parser_match_html_tags_post_analyze(AST_DOCUMENT_NODE_T* document, bool strict) { if (document == NULL) { return; } - match_tags_in_node_array(document->children, document->base.errors, strict); + match_tags_in_node_array(document->children, document->base.errors, strict, document->arena); } diff --git a/src/parser_helpers.c b/src/parser_helpers.c index b6a76f58c..3b941c9c6 100644 --- a/src/parser_helpers.c +++ b/src/parser_helpers.c @@ -11,7 +11,7 @@ #include void parser_push_open_tag(const parser_T* parser, token_T* tag_name) { - token_T* copy = token_copy(tag_name); + token_T* copy = token_copy(tag_name, parser->arena); hb_array_push(parser->open_tags_stack, copy); } @@ -104,7 +104,8 @@ void parser_append_unexpected_error( token_type_to_string(token->type), token->location.start, token->location.end, - errors + errors, + parser->arena ); token_free(token); @@ -116,7 +117,8 @@ void parser_append_unexpected_token_error(parser_T* parser, token_type_T expecte parser->current_token, parser->current_token->location.start, parser->current_token->location.end, - errors + errors, + parser->arena ); } @@ -129,7 +131,7 @@ void parser_append_literal_node_from_buffer( if (hb_buffer_length(buffer) == 0) { return; } AST_LITERAL_NODE_T* literal = - ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL); + ast_literal_node_init(hb_buffer_value(buffer), start, parser->current_token->location.start, NULL, parser->arena); if (children != NULL) { hb_array_append(children, literal); } hb_buffer_clear(buffer); @@ -152,13 +154,21 @@ token_T* parser_consume_expected(parser_T* parser, const token_type_T expected_t if (token == NULL) { token = parser_advance(parser); - append_unexpected_token_error(expected_type, token, token->location.start, token->location.end, array); + append_unexpected_token_error( + expected_type, + token, + token->location.start, + token->location.end, + array, + parser->arena + ); } return token; } AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( + const parser_T* parser, AST_HTML_OPEN_TAG_NODE_T* open_tag, hb_array_T* body, hb_array_T* errors @@ -167,7 +177,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( open_tag->tag_name, open_tag->tag_name->location.start, open_tag->tag_name->location.end, - errors + errors, + parser->arena ); return ast_html_element_node_init( @@ -179,7 +190,8 @@ AST_HTML_ELEMENT_NODE_T* parser_handle_missing_close_tag( ELEMENT_SOURCE_HTML, open_tag->base.location.start, open_tag->base.location.end, - errors + errors, + parser->arena ); } @@ -197,14 +209,16 @@ void parser_handle_mismatched_tags( actual_tag, actual_tag->location.start, actual_tag->location.end, - errors + errors, + parser->arena ); } else { append_missing_opening_tag_error( close_tag->tag_name, close_tag->tag_name->location.start, close_tag->tag_name->location.end, - errors + errors, + parser->arena ); } } diff --git a/src/prism_helpers.c b/src/prism_helpers.c index c7b76b0a3..7da4e54dd 100644 --- a/src/prism_helpers.c +++ b/src/prism_helpers.c @@ -36,7 +36,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error( pm_diagnostic_id_human(error->diag_id), pm_error_level_to_string(error->level), start, - end + end, + NULL ); } @@ -50,7 +51,8 @@ RUBY_PARSE_ERROR_T* ruby_parse_error_from_prism_error_with_positions( pm_diagnostic_id_human(error->diag_id), pm_error_level_to_string(error->level), start, - end + end, + NULL ); } diff --git a/src/token.c b/src/token.c index 932ae498e..45af9a120 100644 --- a/src/token.c +++ b/src/token.c @@ -9,17 +9,22 @@ #include #include +size_t token_sizeof(void) { + return sizeof(struct TOKEN_STRUCT); +} + token_T* token_init(hb_string_T value, const token_type_T type, lexer_T* lexer) { - token_T* token = calloc(1, sizeof(token_T)); + token_T* token = hb_arena_alloc(lexer->arena, token_sizeof()); if (type == TOKEN_NEWLINE) { lexer->current_line++; lexer->current_column = 0; } - token->value = hb_string_to_c_string_using_malloc(value); + token->value = value.data ? hb_arena_strndup(lexer->arena, value.data, value.length) : NULL; token->type = type; + token->arena_allocated = true; token->range = (range_T) { .from = lexer->previous_position, .to = lexer->current_position }; location_from( @@ -112,19 +117,23 @@ hb_string_T token_to_string(const token_T* token) { return hb_string(string); } -token_T* token_copy(token_T* token) { +token_T* token_copy(token_T* token, hb_arena_T* arena) { if (!token) { return NULL; } - token_T* new_token = calloc(1, sizeof(token_T)); + token_T* new_token = arena ? hb_arena_alloc(arena, token_sizeof()) : calloc(1, token_sizeof()); if (!new_token) { return NULL; } if (token->value) { - new_token->value = herb_strdup(token->value); - - if (!new_token->value) { - free(new_token); - return NULL; + if (arena) { + new_token->value = hb_arena_strdup(arena, token->value); + } else { + new_token->value = herb_strdup(token->value); + + if (!new_token->value) { + free(new_token); + return NULL; + } } } else { new_token->value = NULL; @@ -133,6 +142,7 @@ token_T* token_copy(token_T* token) { new_token->type = token->type; new_token->range = token->range; new_token->location = token->location; + new_token->arena_allocated = arena != NULL; return new_token; } @@ -144,7 +154,9 @@ bool token_value_empty(const token_T* token) { void token_free(token_T* token) { if (!token) { return; } - if (token->value != NULL) { free(token->value); } + if (!token->arena_allocated) { + if (token->value != NULL) { free(token->value); } - free(token); + free(token); + } } diff --git a/src/util/hb_arena.c b/src/util/hb_arena.c index 7c503df0a..bacc09e8f 100644 --- a/src/util/hb_arena.c +++ b/src/util/hb_arena.c @@ -7,7 +7,7 @@ #include #include -#define hb_arena_for_each_page(allocator, page) \ +#define hb_arena_for_each_page(allocator, _page) \ for (hb_arena_page_T* page = (allocator)->head; page != NULL; page = page->next) static inline size_t hb_arena_align_size(size_t size, size_t alignment) { @@ -109,6 +109,37 @@ void* hb_arena_alloc(hb_arena_T* allocator, size_t size) { return hb_arena_page_alloc_from(allocator->tail, required_size); } +char* hb_arena_strdup(hb_arena_T* allocator, const char* string) { + assert(allocator != NULL); + + if (string == NULL) { return NULL; } + + size_t length = strlen(string); + char* copy = hb_arena_alloc(allocator, length + 1); + + if (copy != NULL) { + memcpy(copy, string, length); + copy[length] = '\0'; + } + + return copy; +} + +char* hb_arena_strndup(hb_arena_T* allocator, const char* string, size_t length) { + assert(allocator != NULL); + + if (string == NULL) { return NULL; } + + char* copy = hb_arena_alloc(allocator, length + 1); + + if (copy != NULL) { + memcpy(copy, string, length); + copy[length] = '\0'; + } + + return copy; +} + size_t hb_arena_position(hb_arena_T* allocator) { size_t total = 0; diff --git a/templates/src/analyze_missing_end.c.erb b/templates/src/analyze_missing_end.c.erb index 291b89e4d..1ed9925e0 100644 --- a/templates/src/analyze_missing_end.c.erb +++ b/templates/src/analyze_missing_end.c.erb @@ -7,7 +7,7 @@ end -%> -void check_erb_node_for_missing_end(const AST_NODE_T* node) { +void check_erb_node_for_missing_end(const AST_NODE_T* node, hb_arena_T* arena) { switch (node->type) { <%- nodes_with_end_node.each do |node| -%> <%- keyword = node.name.gsub(/^ERB/, '').gsub(/Match|Node$/, '').downcase -%> @@ -23,7 +23,8 @@ void check_erb_node_for_missing_end(const AST_NODE_T* node) { <%- end -%> <%= node.human %>->tag_opening->location.start, <%= node.human %>->tag_closing->location.end, - node->errors + node->errors, + arena ); } diff --git a/templates/src/ast_nodes.c.erb b/templates/src/ast_nodes.c.erb index 4ec348db6..07fe68f7c 100644 --- a/templates/src/ast_nodes.c.erb +++ b/templates/src/ast_nodes.c.erb @@ -1,5 +1,6 @@ #include #include +#include #include @@ -9,21 +10,22 @@ #include "include/errors.h" #include "include/token.h" #include "include/util.h" +#include "include/util/hb_arena.h" #include "include/util/hb_array.h" <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>) { - <%= node.struct_type %>* <%= node.human %> = malloc(sizeof(<%= node.struct_type %>)); + <%= node.struct_type %>* <%= node.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= node.struct_type %>)) : malloc(sizeof(<%= node.struct_type %>)); ast_node_init(&<%= node.human %>->base, <%= node.type %>, start_position, end_position, errors); <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= node.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::NodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::ArrayField -%> @@ -35,7 +37,13 @@ <%- when Herb::Template::PrismNodeField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + if (arena) { + char* temp = (char*) hb_arena_alloc(arena, strlen(<%= field.name %>) + 1); + strcpy(temp, <%= field.name %>); + <%= node.human %>-><%= field.name %> = temp; + } else { + <%= node.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + } <%- when Herb::Template::AnalyzedRubyField -%> <%= node.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::VoidPointerField -%> @@ -47,6 +55,11 @@ <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + <%= node.human %>->arena = arena; + <%= node.human %>->owns_arena = true; + <%- end -%> + return <%= node.human %>; } <%- end -%> @@ -72,20 +85,60 @@ hb_string_T ast_node_human_type(AST_NODE_T* node) { } void ast_free_base_node(AST_NODE_T* node) { - if (node == NULL) { return; } + // Base node cleanup is intentionally empty in the arena-based implementation. + // The node itself is arena-allocated and freed when the arena is destroyed. + // The errors array is freed by ast_free_arrays_recursive() before arena cleanup. + // This function is kept as an extension point for any future non-arena base node cleanup. +} + +<%- nodes.each do |node| -%> +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>); +<%- end -%> + +static void ast_free_arrays_recursive(AST_NODE_T* node) { + if (!node) { return; } + + switch (node->type) { + <%- nodes.each do |node| -%> + case <%= node.type %>: ast_free_arrays_<%= node.human %>((<%= node.struct_type %>*) node); break; + <%- end -%> + } +} + +<%- nodes.each do |node| -%> + +static void ast_free_arrays_<%= node.human %>(<%= node.struct_type %>* <%= node.human %>) { + if (!<%= node.human %>) { return; } - if (node->errors) { - for (size_t i = 0; i < hb_array_size(node->errors); i++) { - ERROR_T* child = hb_array_get(node->errors, i); - if (child != NULL) { error_free(child); } + <%- node.fields.each do |field| -%> + <%- case field -%> + <%- when Herb::Template::NodeField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + ast_free_arrays_recursive((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + } + <%- when Herb::Template::ArrayField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { + AST_NODE_T* child = (AST_NODE_T*) hb_array_get(<%= node.human %>-><%= field.name %>, i); + ast_free_arrays_recursive(child); } - hb_array_free(&node->errors); + hb_array_free(&<%= node.human %>-><%= field.name %>); } + <%- when Herb::Template::AnalyzedRubyField -%> + if (<%= node.human %>-><%= field.name %> != NULL) { + free_analyzed_ruby(<%= node.human %>-><%= field.name %>); + } + <%- end -%> + <%- end -%> - free(node); + if (<%= node.human %>->base.errors != NULL) { + hb_array_free(&<%= node.human %>->base.errors); + } } +<%- end -%> + <%- nodes.each do |node| -%> <%- arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join }.join(", ") : "void" -%> @@ -96,22 +149,15 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- node.fields.each do |field| -%> <%- case field -%> <%- when Herb::Template::TokenField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { token_free(<%= node.human %>-><%= field.name %>); } + // Token is arena-allocated, will be freed with arena <%- when Herb::Template::BorrowedNodeField -%> /* <%= field.name %> is a borrowed reference, not freed here (owned by another field) */ <%- when Herb::Template::NodeField -%> - ast_node_free((AST_NODE_T*) <%= node.human %>-><%= field.name %>); + // Node is arena-allocated, will be freed with arena <%- when Herb::Template::ArrayField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - for (size_t i = 0; i < hb_array_size(<%= node.human %>-><%= field.name %>); i++) { - AST_NODE_T* child = hb_array_get(<%= node.human %>-><%= field.name %>, i); - if (child) { ast_node_free(child); } - } - - hb_array_free(&<%= node.human %>-><%= field.name %>); - } + // Array freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::StringField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { free((char*) <%= node.human %>-><%= field.name %>); } + // String is arena-allocated, will be freed with arena <%- when Herb::Template::PrismNodeField -%> if (<%= node.human %>-><%= field.name %> != NULL) { // The first argument to `pm_node_destroy` is a `pm_parser_t`, but it's currently unused: @@ -120,9 +166,7 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % pm_node_destroy(NULL, <%= node.human %>-><%= field.name %>); } <%- when Herb::Template::AnalyzedRubyField -%> - if (<%= node.human %>-><%= field.name %> != NULL) { - free_analyzed_ruby(<%= node.human %>-><%= field.name %>); - } + // AnalyzedRuby freed by ast_free_arrays_recursive() before arena cleanup <%- when Herb::Template::VoidPointerField -%> free(<%= node.human %>-><%= field.name %>); <%- when Herb::Template::BooleanField -%> @@ -134,7 +178,21 @@ static void ast_free_<%= node.human %>(<%= node.struct_type %>* <%= node.human % <%- end -%> <%- end -%> + <%- if node.human == "document_node" -%> + ast_free_arrays_recursive((AST_NODE_T*)<%= node.human %>); + + hb_arena_T* arena = <%= node.human %>->arena; + bool owns_arena = <%= node.human %>->owns_arena; + + ast_free_base_node(&<%= node.human %>->base); + + if (arena != NULL && owns_arena) { + hb_arena_free(arena); + free(arena); + } + <%- else -%> ast_free_base_node(&<%= node.human %>->base); + <%- end -%> } <%- end -%> diff --git a/templates/src/errors.c.erb b/templates/src/errors.c.erb index a75aad5a0..7c09d9e9d 100644 --- a/templates/src/errors.c.erb +++ b/templates/src/errors.c.erb @@ -5,6 +5,7 @@ #include "include/token.h" #include "include/util.h" #include "include/util/hb_array.h" +#include "include/util/hb_arena.h" #include #include @@ -26,10 +27,10 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit } <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> +<%- arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> <%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>) { - <%= error.struct_type %>* <%= error.human %> = malloc(sizeof(<%= error.struct_type %>)); + <%= error.struct_type %>* <%= error.human %> = arena ? hb_arena_alloc(arena, sizeof(<%= error.struct_type %>)) : malloc(sizeof(<%= error.struct_type %>)); error_init(&<%= error.human %>->base, <%= error.type %>, start, end); @@ -37,7 +38,7 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit const char* message_template = "<%= error.message_template %>"; size_t message_size = <%= Herb::Template::PrintfMessageTemplate.estimate_buffer_size(error.message_template) %>; - char* message = (char*) malloc(message_size); + char* message = arena ? (char*) hb_arena_alloc(arena, message_size) : (char*) malloc(message_size); if (message) { <%- error.message_arguments.each_with_index do |argument, i| -%> @@ -61,13 +62,25 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- end -%> ); - <%= error.human %>->base.message = herb_strdup(message); - free(message); + if (arena) { + <%= error.human %>->base.message = hb_arena_strdup(arena, message); + } else { + <%= error.human %>->base.message = herb_strdup(message); + free(message); + } } else { - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + <%= error.human %>->base.message = hb_arena_strdup(arena, "<%= error.message_template %>"); + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } } <%- else -%> - <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + if (arena) { + <%= error.human %>->base.message = hb_arena_strdup(arena, "<%= error.message_template %>"); + } else { + <%= error.human %>->base.message = herb_strdup("<%= error.message_template %>"); + } <%- end -%> <%- error.fields.each do |field| -%> @@ -75,13 +88,13 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit <%- when Herb::Template::PositionField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::TokenField -%> - <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>); + <%= error.human %>-><%= field.name %> = token_copy(<%= field.name %>, arena); <%- when Herb::Template::TokenTypeField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::SizeTField -%> <%= error.human %>-><%= field.name %> = <%= field.name %>; <%- when Herb::Template::StringField -%> - <%= error.human %>-><%= field.name %> = herb_strdup(<%= field.name %>); + <%= error.human %>-><%= field.name %> = arena ? hb_arena_strdup(arena, <%= field.name %>) : herb_strdup(<%= field.name %>); <%- else -%> <%= field.inspect %> <%- end -%> @@ -89,8 +102,8 @@ void error_init(ERROR_T* error, const error_type_T type, position_T start, posit return <%= error.human %>; } -void append_<%= error.human %>(<%= (arguments + ["hb_array_T* errors"]).join(", ") %>) { - hb_array_append(errors, <%= error.human %>_init(<%= arguments.map { |argument| argument.split(" ").last.strip }.join(", ") %>)); +void append_<%= error.human %>(<%= (error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"]).join(", ") %>) { + hb_array_append(errors, <%= error.human %>_init(<%= (error_arguments.map { |arg| arg.split(" ").last.strip } + ["start", "end", "arena"]).join(", ") %>)); } <%- end -%> diff --git a/templates/src/include/ast_nodes.h.erb b/templates/src/include/ast_nodes.h.erb index 3958eb1b5..cd73455c9 100644 --- a/templates/src/include/ast_nodes.h.erb +++ b/templates/src/include/ast_nodes.h.erb @@ -9,6 +9,7 @@ #include "location.h" #include "position.h" #include "token_struct.h" +#include "util/hb_arena.h" #include "util/hb_array.h" #include "util/hb_buffer.h" #include "util/hb_string.h" @@ -32,12 +33,16 @@ typedef struct AST_NODE_STRUCT { typedef struct <%= node.struct_name %> { AST_NODE_T base; <%= arguments %> + <%- if node.human == "document_node" -%> + hb_arena_T* arena; + bool owns_arena; + <%- end -%> } <%= node.struct_type %>; <%- end -%> <%- nodes.each do |node| -%> <%- node_arguments = node.fields.any? ? node.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors"] -%> +<%- arguments = node_arguments + ["position_T start_position", "position_T end_position", "hb_array_T* errors", "hb_arena_T* arena"] -%> <%= node.struct_type %>* ast_<%= node.human %>_init(<%= arguments.join(", ") %>); <%- end -%> diff --git a/templates/src/include/errors.h.erb b/templates/src/include/errors.h.erb index 4676c72d3..9f7549f64 100644 --- a/templates/src/include/errors.h.erb +++ b/templates/src/include/errors.h.erb @@ -6,6 +6,7 @@ #include "position.h" #include "token.h" #include "util/hb_array.h" +#include "util/hb_arena.h" #include "util/hb_buffer.h" typedef enum { @@ -31,9 +32,10 @@ typedef struct { <%- errors.each do |error| -%> <%- error_arguments = error.fields.any? ? error.fields.map { |field| [field.c_type, " ", field.name].join } : [] -%> -<%- arguments = error_arguments + ["position_T start", "position_T end"] -%> -<%= error.struct_type %>* <%= error.human %>_init(<%= arguments.join(", ") %>); -void append_<%= error.human %>(<%= (arguments << "hb_array_T* errors").join(", ") %>); +<%- init_arguments = error_arguments + ["position_T start", "position_T end", "hb_arena_T* arena"] -%> +<%- append_arguments = error_arguments + ["position_T start", "position_T end", "hb_array_T* errors", "hb_arena_T* arena"] -%> +<%= error.struct_type %>* <%= error.human %>_init(<%= init_arguments.join(", ") %>); +void append_<%= error.human %>(<%= append_arguments.join(", ") %>); <%- end -%> void error_init(ERROR_T* error, error_type_T type, position_T start, position_T end); diff --git a/templates/src/parser_match_tags.c.erb b/templates/src/parser_match_tags.c.erb index 237d98949..4308457a5 100644 --- a/templates/src/parser_match_tags.c.erb +++ b/templates/src/parser_match_tags.c.erb @@ -25,7 +25,7 @@ bool match_tags_visitor(const AST_NODE_T* node, void* data) { <%- array_fields.each do |field| -%> if (<%= node.human %>-><%= field.name %> != NULL) { - match_tags_in_node_array(<%= node.human %>-><%= field.name %>, context->errors, context->strict); + match_tags_in_node_array(<%= node.human %>-><%= field.name %>, context->errors, context->strict, context->arena); } <%- end -%> <%- single_node_fields.each do |field| -%> diff --git a/test/arena_test.rb b/test/arena_test.rb new file mode 100644 index 000000000..f0d0789fa --- /dev/null +++ b/test/arena_test.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +class ArenaTest < Minitest::Spec + test "Arena class exists" do + assert defined?(Herb::Arena) + end + + test "creating an arena with default size" do + arena = Herb::Arena.new + assert_instance_of Herb::Arena, arena + assert arena.capacity.positive? + end + + test "creating an arena with custom size" do + arena = Herb::Arena.new(size: 1024 * 1024) + assert_instance_of Herb::Arena, arena + assert arena.capacity >= 1024 * 1024 + end + + test "arena position starts at zero" do + arena = Herb::Arena.new + assert_equal 0, arena.position + end + + test "arena position increases after parsing" do + arena = Herb::Arena.new + initial_position = arena.position + + Herb.parse("
hello
", arena: arena) + + assert arena.position > initial_position + end + + test "arena can be reused for multiple parse calls" do + arena = Herb::Arena.new + + result1 = Herb.parse("
first
", arena: arena) + position_after_first = arena.position + + result2 = Herb.parse("second", arena: arena) + position_after_second = arena.position + + assert result1 + assert result2 + assert position_after_second > position_after_first + end + + test "arena reset returns position to zero" do + arena = Herb::Arena.new + + Herb.parse("
hello
", arena: arena) + assert arena.position.positive? + + arena.reset + assert_equal 0, arena.position + end + + test "arena can be reused after reset" do + arena = Herb::Arena.new + + result1 = Herb.parse("
first
", arena: arena) + arena.reset + + result2 = Herb.parse("second", arena: arena) + + assert result1 + assert result2 + end + + test "arena stats prints stats and returns nil" do + arena = Herb::Arena.new + result = arena.stats + assert_nil result + end + + test "multiple arenas can be used independently" do + arena1 = Herb::Arena.new + arena2 = Herb::Arena.new + + Herb.parse("
first
", arena: arena1) + position1 = arena1.position + + Herb.parse("second", arena: arena2) + position2 = arena2.position + + assert position1.positive? + assert position2.positive? + assert_equal position1, arena1.position + end + + test "parsing without arena still works" do + result = Herb.parse("
hello
") + assert result + assert result.value + end + + test "parsing many templates with shared arena" do + arena = Herb::Arena.new + + 100.times do |i| + result = Herb.parse("
template #{i}
", arena: arena) + assert result + assert result.value + end + + assert arena.position.positive? + end + + test "arena reset allows reuse for batch processing" do + arena = Herb::Arena.new + + 3.times do |batch| + 10.times do |i| + result = Herb.parse("
batch #{batch} item #{i}
", arena: arena) + assert result + end + arena.reset + assert_equal 0, arena.position + end + end +end diff --git a/wasm/arena.cpp b/wasm/arena.cpp new file mode 100644 index 000000000..e27f85283 --- /dev/null +++ b/wasm/arena.cpp @@ -0,0 +1,107 @@ +#include "arena.h" + +#include +#include + +extern "C" { +#include "../src/include/macros.h" +} + +static std::map arena_registry; +static int next_arena_id = 1; + +int Herb_createArena(int initial_size) { + hb_arena_T* arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!arena) return -1; + + size_t size = initial_size > 0 ? (size_t) initial_size : KB(512); + if (!hb_arena_init(arena, size)) { + free(arena); + return -1; + } + + int id = next_arena_id++; + arena_registry[id] = arena; + return id; +} + +void Herb_resetArena(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + hb_arena_reset(it->second); + } +} + +void Herb_freeArena(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + hb_arena_free(it->second); + free(it->second); + arena_registry.erase(it); + } +} + +int Herb_arenaPosition(int arena_id) { + auto it = arena_registry.find(arena_id); + + if (it != arena_registry.end() && it->second) { + return (int) hb_arena_position(it->second); + } + + return -1; +} + +int Herb_arenaCapacity(int arena_id) { + auto it = arena_registry.find(arena_id); + if (it != arena_registry.end() && it->second) { + return (int) hb_arena_capacity(it->second); + } + return -1; +} + +hb_arena_T* get_arena_by_id(int arena_id) { + auto it = arena_registry.find(arena_id); + if (it != arena_registry.end() && it->second) { + return it->second; + } + return nullptr; +} + +hb_arena_T* get_arena_option_from_object(emscripten::val options) { + if (options.isUndefined() || options.isNull()) return nullptr; + if (options.typeOf().as() != "object") return nullptr; + if (!options.hasOwnProperty("arenaId")) return nullptr; + + int arena_id = options["arenaId"].as(); + return get_arena_by_id(arena_id); +} + +bool setup_arena_context(hb_arena_T* external_arena, arena_context_T* context) { + if (external_arena) { + context->arena = external_arena; + context->owns_arena = false; + return true; + } + + context->arena = (hb_arena_T*) malloc(sizeof(hb_arena_T)); + if (!context->arena) { return false; } + + if (!hb_arena_init(context->arena, KB(512))) { + free(context->arena); + context->arena = nullptr; + return false; + } + + context->owns_arena = true; + return true; +} + +void cleanup_arena_context(arena_context_T* context) { + if (context->owns_arena && context->arena) { + hb_arena_free(context->arena); + free(context->arena); + context->arena = nullptr; + } +} diff --git a/wasm/arena.h b/wasm/arena.h new file mode 100644 index 000000000..e1711327a --- /dev/null +++ b/wasm/arena.h @@ -0,0 +1,27 @@ +#ifndef HERB_WASM_ARENA_H +#define HERB_WASM_ARENA_H + +#include + +extern "C" { +#include "../src/include/util/hb_arena.h" +} + +int Herb_createArena(int initial_size); +void Herb_resetArena(int arena_id); +void Herb_freeArena(int arena_id); +int Herb_arenaPosition(int arena_id); +int Herb_arenaCapacity(int arena_id); + +hb_arena_T* get_arena_by_id(int arena_id); + +typedef struct { + hb_arena_T* arena; + bool owns_arena; +} arena_context_T; + +hb_arena_T* get_arena_option_from_object(emscripten::val options); +bool setup_arena_context(hb_arena_T* external_arena, arena_context_T* context); +void cleanup_arena_context(arena_context_T* context); + +#endif diff --git a/wasm/herb-wasm.cpp b/wasm/herb-wasm.cpp index 3baf167e8..70b13543f 100644 --- a/wasm/herb-wasm.cpp +++ b/wasm/herb-wasm.cpp @@ -4,9 +4,12 @@ #include #include +#include "arena.h" #include "extension_helpers.h" extern "C" { +#include "../src/include/macros.h" +#include "../src/include/util/hb_arena.h" #include "../src/include/util/hb_array.h" #include "../src/include/ast_node.h" #include "../src/include/ast_nodes.h" @@ -23,12 +26,24 @@ extern "C" { using namespace emscripten; -val Herb_lex(const std::string& source) { - hb_array_T* tokens = herb_lex(source.c_str()); +val Herb_lex(const std::string& source, val options) { + hb_arena_T* external_arena = get_arena_option_from_object(options); - val result = CreateLexResult(tokens, source); + arena_context_T context; + if (!setup_arena_context(external_arena, &context)) { + return val::null(); + } + + herb_lex_result_T* lex_result = herb_lex(source.c_str(), context.arena); + + if (!lex_result) { + cleanup_arena_context(&context); + return val::null(); + } + + val result = CreateLexResult(lex_result->tokens, source); - herb_free_tokens(&tokens); + herb_free_lex_result(&lex_result); return result; } @@ -56,7 +71,21 @@ val Herb_parse(const std::string& source, val options) { } } - AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options); + hb_arena_T* external_arena = get_arena_option_from_object(options); + + arena_context_T context; + if (!setup_arena_context(external_arena, &context)) { + return val::null(); + } + + AST_DOCUMENT_NODE_T* root = herb_parse(source.c_str(), &parser_options, context.arena); + + if (!root) { + cleanup_arena_context(&context); + return val::null(); + } + + root->owns_arena = context.owns_arena; val result = CreateParseResult(root, source); @@ -115,4 +144,10 @@ EMSCRIPTEN_BINDINGS(herb_module) { function("extractRuby", &Herb_extract_ruby); function("extractHTML", &Herb_extract_html); function("version", &Herb_version); + + function("createArena", &Herb_createArena); + function("resetArena", &Herb_resetArena); + function("freeArena", &Herb_freeArena); + function("arenaPosition", &Herb_arenaPosition); + function("arenaCapacity", &Herb_arenaCapacity); }