@@ -17,10 +17,12 @@ use std::{
1717use bincode:: Encode ;
1818use cfgrammar:: {
1919 header:: {
20- GrmtoolsSectionParser , HeaderError , HeaderErrorKind , HeaderValue , Namespaced , Setting ,
21- Value ,
20+ GrmtoolsSectionParser , Header , HeaderError , HeaderErrorKind , HeaderValue , Namespaced ,
21+ Setting , Value ,
2222 } ,
23+ markmap:: MergeBehavior ,
2324 newlinecache:: NewlineCache ,
25+ span:: Location ,
2426 Spanned ,
2527} ;
2628use lazy_static:: lazy_static;
@@ -30,7 +32,7 @@ use proc_macro2::TokenStream;
3032use quote:: { format_ident, quote, ToTokens , TokenStreamExt } ;
3133use regex:: Regex ;
3234
33- use crate :: { DefaultLexerTypes , LRNonStreamingLexerDef , LexFlags , LexerDef , UNSPECIFIED_LEX_FLAGS } ;
35+ use crate :: { DefaultLexerTypes , LRNonStreamingLexerDef , LexFlags , LexerDef } ;
3436
3537const RUST_FILE_EXT : & str = "rs" ;
3638
@@ -206,8 +208,7 @@ where
206208 rule_ids_map : Option < HashMap < String , LexerTypesT :: StorageT > > ,
207209 allow_missing_terms_in_lexer : bool ,
208210 allow_missing_tokens_in_parser : bool ,
209- force_lex_flags : LexFlags ,
210- default_lex_flags : LexFlags ,
211+ header : Header < Location > ,
211212 #[ cfg( test) ]
212213 inspect_lexerkind_cb : Option < Box < dyn Fn ( LexerKind ) -> Result < ( ) , Box < dyn Error > > > > ,
213214}
@@ -242,6 +243,8 @@ where
242243 /// .build()?;
243244 /// ```
244245 pub fn new_with_lexemet ( ) -> Self {
246+ let mut header = Header :: new ( ) ;
247+ header. set_default_merge_behavior ( MergeBehavior :: Ours ) ;
245248 CTLexerBuilder {
246249 lrpar_config : None ,
247250 lexer_path : None ,
@@ -253,8 +256,7 @@ where
253256 rule_ids_map : None ,
254257 allow_missing_terms_in_lexer : false ,
255258 allow_missing_tokens_in_parser : true ,
256- force_lex_flags : UNSPECIFIED_LEX_FLAGS ,
257- default_lex_flags : UNSPECIFIED_LEX_FLAGS ,
259+ header,
258260 #[ cfg( test) ]
259261 inspect_lexerkind_cb : None ,
260262 }
@@ -443,14 +445,17 @@ where
443445 }
444446 let lex_src = read_to_string ( lexerp)
445447 . map_err ( |e| format ! ( "When reading '{}': {e}" , lexerp. display( ) ) ) ?;
446- let ( header, _) = GrmtoolsSectionParser :: new ( & lex_src, false )
448+ let mut header = self . header ;
449+ let ( parsed_header, _) = GrmtoolsSectionParser :: new ( & lex_src, false )
447450 . parse ( )
448451 . map_err ( |es| {
449452 es. iter ( )
450453 . map ( |e| e. to_string ( ) )
451454 . collect :: < Vec < _ > > ( )
452455 . join ( "\n " )
453456 } ) ?;
457+ header. merge_from ( parsed_header) ?;
458+ header. mark_used ( & "lexerkind" . to_string ( ) ) ;
454459 let lexerkind = match self . lexerkind {
455460 Some ( lexerkind) => lexerkind,
456461 None => {
@@ -469,30 +474,38 @@ where
469474 let ( mut lexerdef, lex_flags) : ( Box < dyn LexerDef < LexerTypesT > > , LexFlags ) = match lexerkind
470475 {
471476 LexerKind :: LRNonStreamingLexer => {
472- let lexerdef = LRNonStreamingLexerDef :: < LexerTypesT > :: new_with_options (
473- & lex_src ,
474- self . force_lex_flags . clone ( ) ,
475- self . default_lex_flags . clone ( ) ,
476- )
477- . map_err ( |errs | {
478- errs . iter ( )
479- . map ( |e| {
480- if let Some ( ( line , column ) ) = line_cache . byte_to_line_num_and_col_num (
481- & lex_src ,
482- e . spans ( ) . first ( ) . unwrap ( ) . start ( ) ,
483- ) {
484- format ! ( "{} at line {line} column {column}" , e)
485- } else {
486- format ! ( "{}" , e)
487- }
488- } )
489- . collect :: < Vec < _ > > ( )
490- . join ( "\n " )
491- } ) ?;
477+ let lex_flags = LexFlags :: try_from ( & mut header ) ? ;
478+ let lexerdef =
479+ LRNonStreamingLexerDef :: < LexerTypesT > :: new_with_options ( & lex_src , lex_flags )
480+ . map_err ( |errs| {
481+ errs . iter ( )
482+ . map ( |e | {
483+ if let Some ( ( line , column ) ) = line_cache
484+ . byte_to_line_num_and_col_num (
485+ & lex_src ,
486+ e . spans ( ) . first ( ) . unwrap ( ) . start ( ) ,
487+ )
488+ {
489+ format ! ( "{} at line {line} column {column}" , e)
490+ } else {
491+ format ! ( "{}" , e)
492+ }
493+ } )
494+ . collect :: < Vec < _ > > ( )
495+ . join ( "\n " )
496+ } ) ?;
492497 let lex_flags = lexerdef. lex_flags ( ) . cloned ( ) ;
493498 ( Box :: new ( lexerdef) , lex_flags. unwrap ( ) )
494499 }
495500 } ;
501+
502+ let unused_header_values = header. unused ( ) ;
503+ if !unused_header_values. is_empty ( ) {
504+ return Err (
505+ format ! ( "Unused header values: {}" , unused_header_values. join( ", " ) ) . into ( ) ,
506+ ) ;
507+ }
508+
496509 let ( missing_from_lexer, missing_from_parser) = match self . rule_ids_map {
497510 Some ( ref rim) => {
498511 // Convert from HashMap<String, _> to HashMap<&str, _>
@@ -583,18 +596,18 @@ where
583596 // Code gen for the lexerdef() `lex_flags` variable.
584597 quote ! {
585598 let mut lex_flags = :: lrlex:: DEFAULT_LEX_FLAGS ;
586- lex_flags. allow_wholeline_comments = #allow_wholeline_comments;
587- lex_flags. dot_matches_new_line = #dot_matches_new_line;
588- lex_flags. multi_line = #multi_line;
589- lex_flags. octal = #octal;
590- lex_flags. posix_escapes = #posix_escapes;
591- lex_flags. case_insensitive = #case_insensitive;
592- lex_flags. unicode = #unicode;
593- lex_flags. swap_greed = #swap_greed;
594- lex_flags. ignore_whitespace = #ignore_whitespace;
595- lex_flags. size_limit = #size_limit;
596- lex_flags. dfa_size_limit = #dfa_size_limit;
597- lex_flags. nest_limit = #nest_limit;
599+ lex_flags. allow_wholeline_comments = #allow_wholeline_comments. or ( :: lrlex :: DEFAULT_LEX_FLAGS . allow_wholeline_comments ) ;
600+ lex_flags. dot_matches_new_line = #dot_matches_new_line. or ( :: lrlex :: DEFAULT_LEX_FLAGS . dot_matches_new_line ) ;
601+ lex_flags. multi_line = #multi_line. or ( :: lrlex :: DEFAULT_LEX_FLAGS . multi_line ) ;
602+ lex_flags. octal = #octal. or ( :: lrlex :: DEFAULT_LEX_FLAGS . octal ) ;
603+ lex_flags. posix_escapes = #posix_escapes. or ( :: lrlex :: DEFAULT_LEX_FLAGS . posix_escapes ) ;
604+ lex_flags. case_insensitive = #case_insensitive. or ( :: lrlex :: DEFAULT_LEX_FLAGS . case_insensitive ) ;
605+ lex_flags. unicode = #unicode. or ( :: lrlex :: DEFAULT_LEX_FLAGS . unicode ) ;
606+ lex_flags. swap_greed = #swap_greed. or ( :: lrlex :: DEFAULT_LEX_FLAGS . swap_greed ) ;
607+ lex_flags. ignore_whitespace = #ignore_whitespace. or ( :: lrlex :: DEFAULT_LEX_FLAGS . ignore_whitespace ) ;
608+ lex_flags. size_limit = #size_limit. or ( :: lrlex :: DEFAULT_LEX_FLAGS . size_limit ) ;
609+ lex_flags. dfa_size_limit = #dfa_size_limit. or ( :: lrlex :: DEFAULT_LEX_FLAGS . dfa_size_limit ) ;
610+ lex_flags. nest_limit = #nest_limit. or ( :: lrlex :: DEFAULT_LEX_FLAGS . nest_limit ) ;
598611 let lex_flags = lex_flags;
599612 }
600613 } ;
@@ -785,7 +798,14 @@ where
785798 ///
786799 /// Setting this flag will override the same flag within a `%grmtools` section.
787800 pub fn allow_wholeline_comments ( mut self , flag : bool ) -> Self {
788- self . force_lex_flags . allow_wholeline_comments = Some ( flag) ;
801+ let key = "allow_wholeline_comments" . to_string ( ) ;
802+ self . header . insert (
803+ key,
804+ HeaderValue (
805+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
806+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
807+ ) ,
808+ ) ;
789809 self
790810 }
791811
@@ -794,7 +814,14 @@ where
794814 ///
795815 /// Setting this flag will override the same flag within a `%grmtools` section.
796816 pub fn dot_matches_new_line ( mut self , flag : bool ) -> Self {
797- self . force_lex_flags . dot_matches_new_line = Some ( flag) ;
817+ let key = "dot_matches_new_line" . to_string ( ) ;
818+ self . header . insert (
819+ key,
820+ HeaderValue (
821+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
822+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
823+ ) ,
824+ ) ;
798825 self
799826 }
800827
@@ -803,7 +830,14 @@ where
803830 ///
804831 /// Setting this flag will override the same flag within a `%grmtools` section.
805832 pub fn multi_line ( mut self , flag : bool ) -> Self {
806- self . force_lex_flags . multi_line = Some ( flag) ;
833+ let key = "multi_line" . to_string ( ) ;
834+ self . header . insert (
835+ key,
836+ HeaderValue (
837+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
838+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
839+ ) ,
840+ ) ;
807841 self
808842 }
809843
@@ -812,7 +846,14 @@ where
812846 ///
813847 /// Setting this flag will override the same flag within a `%grmtools` section.
814848 pub fn posix_escapes ( mut self , flag : bool ) -> Self {
815- self . force_lex_flags . posix_escapes = Some ( flag) ;
849+ let key = "posix_escapes" . to_string ( ) ;
850+ self . header . insert (
851+ key,
852+ HeaderValue (
853+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
854+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
855+ ) ,
856+ ) ;
816857 self
817858 }
818859
@@ -821,7 +862,14 @@ where
821862 ///
822863 /// Setting this flag will override the same flag within a `%grmtools` section.
823864 pub fn octal ( mut self , flag : bool ) -> Self {
824- self . force_lex_flags . octal = Some ( flag) ;
865+ let key = "octal" . to_string ( ) ;
866+ self . header . insert (
867+ key,
868+ HeaderValue (
869+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
870+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
871+ ) ,
872+ ) ;
825873 self
826874 }
827875
@@ -830,7 +878,14 @@ where
830878 ///
831879 /// Setting this flag will override the same flag within a `%grmtools` section.
832880 pub fn swap_greed ( mut self , flag : bool ) -> Self {
833- self . force_lex_flags . swap_greed = Some ( flag) ;
881+ let key = "swap_greed" . to_string ( ) ;
882+ self . header . insert (
883+ key,
884+ HeaderValue (
885+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
886+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
887+ ) ,
888+ ) ;
834889 self
835890 }
836891
@@ -839,7 +894,14 @@ where
839894 ///
840895 /// Setting this flag will override the same flag within a `%grmtools` section.
841896 pub fn ignore_whitespace ( mut self , flag : bool ) -> Self {
842- self . force_lex_flags . ignore_whitespace = Some ( flag) ;
897+ let key = "ignore_whitespace" . to_string ( ) ;
898+ self . header . insert (
899+ key,
900+ HeaderValue (
901+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
902+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
903+ ) ,
904+ ) ;
843905 self
844906 }
845907
@@ -848,7 +910,14 @@ where
848910 ///
849911 /// Setting this flag will override the same flag within a `%grmtools` section.
850912 pub fn unicode ( mut self , flag : bool ) -> Self {
851- self . force_lex_flags . unicode = Some ( flag) ;
913+ let key = "unicode" . to_string ( ) ;
914+ self . header . insert (
915+ key,
916+ HeaderValue (
917+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
918+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
919+ ) ,
920+ ) ;
852921 self
853922 }
854923
@@ -857,7 +926,14 @@ where
857926 ///
858927 /// Setting this flag will override the same flag within a `%grmtools` section.
859928 pub fn case_insensitive ( mut self , flag : bool ) -> Self {
860- self . force_lex_flags . case_insensitive = Some ( flag) ;
929+ let key = "case_insensitive" . to_string ( ) ;
930+ self . header . insert (
931+ key,
932+ HeaderValue (
933+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
934+ Value :: Flag ( flag, Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ) ,
935+ ) ,
936+ ) ;
861937 self
862938 }
863939
@@ -866,7 +942,17 @@ where
866942 ///
867943 /// Setting this flag will override the same flag within a `%grmtools` section.
868944 pub fn size_limit ( mut self , sz : usize ) -> Self {
869- self . force_lex_flags . size_limit = Some ( sz) ;
945+ let key = "size_limit" . to_string ( ) ;
946+ self . header . insert (
947+ key,
948+ HeaderValue (
949+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
950+ Value :: Setting ( Setting :: Num (
951+ sz as u64 ,
952+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
953+ ) ) ,
954+ ) ,
955+ ) ;
870956 self
871957 }
872958
@@ -875,7 +961,17 @@ where
875961 ///
876962 /// Setting this flag will override the same flag within a `%grmtools` section.
877963 pub fn dfa_size_limit ( mut self , sz : usize ) -> Self {
878- self . force_lex_flags . dfa_size_limit = Some ( sz) ;
964+ let key = "dfa_size_limit" . to_string ( ) ;
965+ self . header . insert (
966+ key,
967+ HeaderValue (
968+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
969+ Value :: Setting ( Setting :: Num (
970+ sz as u64 ,
971+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
972+ ) ) ,
973+ ) ,
974+ ) ;
879975 self
880976 }
881977
@@ -884,15 +980,17 @@ where
884980 ///
885981 /// Setting this flag will override the same flag within a `%grmtools` section.
886982 pub fn nest_limit ( mut self , lim : u32 ) -> Self {
887- self . force_lex_flags . nest_limit = Some ( lim) ;
888- self
889- }
890-
891- /// `Some` values in the specified `flags` will be used as a default value
892- /// unless the specified value has already been specified previously via `CTLexerBuilder`
893- /// or was specified in the `%grmtools` section of a *.l* file.
894- pub fn default_lex_flags ( mut self , flags : LexFlags ) -> Self {
895- self . default_lex_flags = flags;
983+ let key = "nest_limit" . to_string ( ) ;
984+ self . header . insert (
985+ key,
986+ HeaderValue (
987+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
988+ Value :: Setting ( Setting :: Num (
989+ lim as u64 ,
990+ Location :: Other ( "CTLexerBuilder" . to_string ( ) ) ,
991+ ) ) ,
992+ ) ,
993+ ) ;
896994 self
897995 }
898996
0 commit comments