@@ -1557,6 +1557,36 @@ static std::unordered_set<std::string> collect_reachable_rules(
15571557
15581558// GBNF generation implementation
15591559void common_peg_arena::build_grammar (const common_grammar_builder & builder, bool lazy) const {
1560+ auto schema_delegates = [](const common_peg_schema_parser & s) -> bool {
1561+ if (!s.schema ) {
1562+ return true ;
1563+ }
1564+ if (s.raw && s.schema ->contains (" type" ) && s.schema ->at (" type" ).is_string () && s.schema ->at (" type" ) == " string" ) {
1565+ return true ;
1566+ }
1567+ return false ;
1568+ };
1569+
1570+ // Unwrap the parser so we can properly check if it's a sequence or choice
1571+ auto effective_parser = [&](common_peg_parser_id id) -> const common_peg_parser_variant & {
1572+ while (true ) {
1573+ const auto & p = parsers_.at (id);
1574+ if (const auto * tag = std::get_if<common_peg_tag_parser>(&p)) {
1575+ id = tag->child ;
1576+ } else if (const auto * atomic = std::get_if<common_peg_atomic_parser>(&p)) {
1577+ id = atomic->child ;
1578+ } else if (const auto * schema = std::get_if<common_peg_schema_parser>(&p)) {
1579+ if (schema_delegates (*schema)) {
1580+ id = schema->child ;
1581+ } else {
1582+ return p;
1583+ }
1584+ } else {
1585+ return p;
1586+ }
1587+ }
1588+ };
1589+
15601590 // Generate GBNF for a parser
15611591 std::function<std::string (common_peg_parser_id)> to_gbnf = [&](common_peg_parser_id id) -> std::string {
15621592 const auto & parser = parsers_.at (id);
@@ -1577,7 +1607,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
15771607 s += " " ;
15781608 }
15791609 auto child_gbnf = to_gbnf (child);
1580- const auto & child_parser = parsers_. at (child);
1610+ const auto & child_parser = effective_parser (child);
15811611 if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
15821612 std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
15831613 s += " (" + child_gbnf + " )" ;
@@ -1593,7 +1623,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
15931623 s += " | " ;
15941624 }
15951625 auto child_gbnf = to_gbnf (child);
1596- const auto & child_parser = parsers_. at (child);
1626+ const auto & child_parser = effective_parser (child);
15971627 if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
15981628 s += " (" + child_gbnf + " )" ;
15991629 } else {
@@ -1603,7 +1633,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
16031633 return s;
16041634 } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
16051635 auto child_gbnf = to_gbnf (p.child );
1606- const auto & child_parser = parsers_. at (p.child );
1636+ const auto & child_parser = effective_parser (p.child );
16071637 if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
16081638 std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
16091639 child_gbnf = " (" + child_gbnf + " )" ;
@@ -1663,15 +1693,10 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
16631693 }
16641694 return gbnf_excluding_pattern (p.delimiters );
16651695 } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
1666- if (p.schema ) {
1667- if (p.raw && p.schema ->contains (" type" ) && p.schema ->at (" type" ).is_string () && p.schema ->at (" type" ) == " string" ) {
1668- // TODO: Implement more comprehensive grammar generation for raw strings.
1669- // For now, use the grammar emitted from the underlying parser.
1670- return to_gbnf (p.child );
1671- }
1672- return builder.add_schema (p.name , *p.schema );
1696+ if (schema_delegates (p)) {
1697+ return to_gbnf (p.child );
16731698 }
1674- return to_gbnf (p.child );
1699+ return builder. add_schema (p.name , *p. schema );
16751700 } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
16761701 return p.name ;
16771702 } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
0 commit comments