@@ -573,7 +573,7 @@ fn translate_position(input: &str, index: usize) -> (&str, usize, usize) {
573573#[ cfg( test) ]
574574mod tests {
575575 use crate :: parser:: { parse_grammar, translate_position} ;
576- use crate :: { ExpressionKind , Grammar , RangeLimit } ;
576+ use crate :: { Character , Characters , ExpressionKind , Grammar , RangeLimit } ;
577577 use std:: path:: Path ;
578578
579579 #[ test]
@@ -778,4 +778,333 @@ mod tests {
778778 assert_eq ! ( max, Some ( 1 ) ) ;
779779 assert ! ( matches!( limit, RangeLimit :: HalfOpen ) ) ;
780780 }
781+
782+ // --- Negative lookahead tests ---
783+
784+ #[ test]
785+ fn lookahead_simple_nonterminal ( ) {
786+ let input = "Rule -> !Foo" ;
787+ let grammar = parse ( input) . unwrap ( ) ;
788+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
789+ let ExpressionKind :: NegativeLookahead ( inner) = & rule. expression . kind else {
790+ panic ! ( "expected NegativeLookahead, got {:?}" , rule. expression. kind) ;
791+ } ;
792+ assert ! ( matches!( & inner. kind, ExpressionKind :: Nt ( n) if n == "Foo" ) ) ;
793+ }
794+
795+ #[ test]
796+ fn lookahead_terminal ( ) {
797+ let input = "Rule -> !`'` Foo" ;
798+ let grammar = parse ( input) . unwrap ( ) ;
799+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
800+ let ExpressionKind :: Sequence ( seq) = & rule. expression . kind else {
801+ panic ! ( "expected Sequence, got {:?}" , rule. expression. kind) ;
802+ } ;
803+ assert_eq ! ( seq. len( ) , 2 ) ;
804+ let ExpressionKind :: NegativeLookahead ( inner) = & seq[ 0 ] . kind else {
805+ panic ! ( "expected NegativeLookahead, got {:?}" , seq[ 0 ] . kind) ;
806+ } ;
807+ assert ! ( matches!( & inner. kind, ExpressionKind :: Terminal ( t) if t == "'" ) ) ;
808+ assert ! ( matches!( & seq[ 1 ] . kind, ExpressionKind :: Nt ( n) if n == "Foo" ) ) ;
809+ }
810+
811+ #[ test]
812+ fn lookahead_charset ( ) {
813+ let input = "Rule -> ![`e` `E`] SUFFIX" ;
814+ let grammar = parse ( input) . unwrap ( ) ;
815+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
816+ let ExpressionKind :: Sequence ( seq) = & rule. expression . kind else {
817+ panic ! ( "expected Sequence, got {:?}" , rule. expression. kind) ;
818+ } ;
819+ assert_eq ! ( seq. len( ) , 2 ) ;
820+ let ExpressionKind :: NegativeLookahead ( inner) = & seq[ 0 ] . kind else {
821+ panic ! ( "expected NegativeLookahead, got {:?}" , seq[ 0 ] . kind) ;
822+ } ;
823+ let ExpressionKind :: Charset ( chars) = & inner. kind else {
824+ panic ! ( "expected Charset inside lookahead, got {:?}" , inner. kind) ;
825+ } ;
826+ assert_eq ! ( chars. len( ) , 2 ) ;
827+ assert ! ( matches!( & chars[ 0 ] , Characters :: Terminal ( t) if t == "e" ) ) ;
828+ assert ! ( matches!( & chars[ 1 ] , Characters :: Terminal ( t) if t == "E" ) ) ;
829+ }
830+
831+ #[ test]
832+ fn lookahead_grouped ( ) {
833+ let input = "Rule -> !(`.` | `_` | XID_Start)" ;
834+ let grammar = parse ( input) . unwrap ( ) ;
835+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
836+ let ExpressionKind :: NegativeLookahead ( inner) = & rule. expression . kind else {
837+ panic ! ( "expected NegativeLookahead, got {:?}" , rule. expression. kind) ;
838+ } ;
839+ let ExpressionKind :: Grouped ( grouped) = & inner. kind else {
840+ panic ! ( "expected Grouped inside lookahead, got {:?}" , inner. kind) ;
841+ } ;
842+ let ExpressionKind :: Alt ( alts) = & grouped. kind else {
843+ panic ! ( "expected Alt inside Grouped, got {:?}" , grouped. kind) ;
844+ } ;
845+ assert_eq ! ( alts. len( ) , 3 ) ;
846+ assert ! ( matches!( & alts[ 0 ] . kind, ExpressionKind :: Terminal ( t) if t == "." ) ) ;
847+ assert ! ( matches!( & alts[ 1 ] . kind, ExpressionKind :: Terminal ( t) if t == "_" ) ) ;
848+ assert ! ( matches!( & alts[ 2 ] . kind, ExpressionKind :: Nt ( n) if n == "XID_Start" ) ) ;
849+ }
850+
851+ #[ test]
852+ fn lookahead_in_sequence_middle ( ) {
853+ let input = "Rule -> A !B C" ;
854+ let grammar = parse ( input) . unwrap ( ) ;
855+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
856+ let ExpressionKind :: Sequence ( seq) = & rule. expression . kind else {
857+ panic ! ( "expected Sequence, got {:?}" , rule. expression. kind) ;
858+ } ;
859+ assert_eq ! ( seq. len( ) , 3 ) ;
860+ assert ! ( matches!( & seq[ 0 ] . kind, ExpressionKind :: Nt ( n) if n == "A" ) ) ;
861+ let ExpressionKind :: NegativeLookahead ( inner) = & seq[ 1 ] . kind else {
862+ panic ! ( "expected NegativeLookahead, got {:?}" , seq[ 1 ] . kind) ;
863+ } ;
864+ assert ! ( matches!( & inner. kind, ExpressionKind :: Nt ( n) if n == "B" ) ) ;
865+ assert ! ( matches!( & seq[ 2 ] . kind, ExpressionKind :: Nt ( n) if n == "C" ) ) ;
866+ }
867+
868+ #[ test]
869+ fn lookahead_in_repetition ( ) {
870+ let input = "Rule -> (!A B)*" ;
871+ let grammar = parse ( input) . unwrap ( ) ;
872+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
873+ let ExpressionKind :: Repeat ( rep) = & rule. expression . kind else {
874+ panic ! ( "expected Repeat, got {:?}" , rule. expression. kind) ;
875+ } ;
876+ let ExpressionKind :: Grouped ( grouped) = & rep. kind else {
877+ panic ! ( "expected Grouped inside Repeat, got {:?}" , rep. kind) ;
878+ } ;
879+ let ExpressionKind :: Sequence ( seq) = & grouped. kind else {
880+ panic ! ( "expected Sequence inside Grouped, got {:?}" , grouped. kind) ;
881+ } ;
882+ assert_eq ! ( seq. len( ) , 2 ) ;
883+ assert ! ( matches!( & seq[ 0 ] . kind, ExpressionKind :: NegativeLookahead ( _) ) ) ;
884+ assert ! ( matches!( & seq[ 1 ] . kind, ExpressionKind :: Nt ( n) if n == "B" ) ) ;
885+ }
886+
887+ #[ test]
888+ fn lookahead_in_alternation ( ) {
889+ let input = "Rule -> !A B | C" ;
890+ let grammar = parse ( input) . unwrap ( ) ;
891+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
892+ let ExpressionKind :: Alt ( alts) = & rule. expression . kind else {
893+ panic ! ( "expected Alt, got {:?}" , rule. expression. kind) ;
894+ } ;
895+ assert_eq ! ( alts. len( ) , 2 ) ;
896+ let ExpressionKind :: Sequence ( seq) = & alts[ 0 ] . kind else {
897+ panic ! ( "expected Sequence, got {:?}" , alts[ 0 ] . kind) ;
898+ } ;
899+ assert_eq ! ( seq. len( ) , 2 ) ;
900+ assert ! ( matches!( & seq[ 0 ] . kind, ExpressionKind :: NegativeLookahead ( _) ) ) ;
901+ assert ! ( matches!( & seq[ 1 ] . kind, ExpressionKind :: Nt ( n) if n == "B" ) ) ;
902+ assert ! ( matches!( & alts[ 1 ] . kind, ExpressionKind :: Nt ( n) if n == "C" ) ) ;
903+ }
904+
905+ #[ test]
906+ fn lookahead_fail_trailing ( ) {
907+ let input = "Rule -> !" ;
908+ let err = parse ( input) . unwrap_err ( ) ;
909+ assert ! ( err. contains( "expected expression after !" ) ) ;
910+ }
911+
912+ // --- Unicode tests ---
913+
914+ #[ test]
915+ fn unicode_4_digit ( ) {
916+ let input = "Rule -> U+0009" ;
917+ let grammar = parse ( input) . unwrap ( ) ;
918+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
919+ let ExpressionKind :: Unicode ( ( ch, s) ) = & rule. expression . kind else {
920+ panic ! ( "expected Unicode, got {:?}" , rule. expression. kind) ;
921+ } ;
922+ assert_eq ! ( * ch, '\t' ) ;
923+ assert_eq ! ( s, "0009" ) ;
924+ }
925+
926+ #[ test]
927+ fn unicode_5_digit ( ) {
928+ let input = "Rule -> U+E0000" ;
929+ let grammar = parse ( input) . unwrap ( ) ;
930+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
931+ let ExpressionKind :: Unicode ( ( ch, s) ) = & rule. expression . kind else {
932+ panic ! ( "expected Unicode, got {:?}" , rule. expression. kind) ;
933+ } ;
934+ assert_eq ! ( * ch, '\u{E0000}' ) ;
935+ assert_eq ! ( s, "E0000" ) ;
936+ }
937+
938+ #[ test]
939+ fn unicode_6_digit ( ) {
940+ let input = "Rule -> U+10FFFF" ;
941+ let grammar = parse ( input) . unwrap ( ) ;
942+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
943+ let ExpressionKind :: Unicode ( ( ch, s) ) = & rule. expression . kind else {
944+ panic ! ( "expected Unicode, got {:?}" , rule. expression. kind) ;
945+ } ;
946+ assert_eq ! ( * ch, '\u{10FFFF}' ) ;
947+ assert_eq ! ( s, "10FFFF" ) ;
948+ }
949+
950+ #[ test]
951+ fn unicode_in_alternation ( ) {
952+ let input = "Rule -> U+0009 | U+000A" ;
953+ let grammar = parse ( input) . unwrap ( ) ;
954+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
955+ let ExpressionKind :: Alt ( alts) = & rule. expression . kind else {
956+ panic ! ( "expected Alt, got {:?}" , rule. expression. kind) ;
957+ } ;
958+ assert_eq ! ( alts. len( ) , 2 ) ;
959+ assert ! ( matches!(
960+ & alts[ 0 ] . kind,
961+ ExpressionKind :: Unicode ( ( ch, _) ) if * ch == '\t'
962+ ) ) ;
963+ assert ! ( matches!(
964+ & alts[ 1 ] . kind,
965+ ExpressionKind :: Unicode ( ( ch, _) ) if * ch == '\n'
966+ ) ) ;
967+ }
968+
969+ // --- Character / charset range tests ---
970+
971+ #[ test]
972+ fn charset_unicode_range ( ) {
973+ let input = "Rule -> [U+0000-U+007F]" ;
974+ let grammar = parse ( input) . unwrap ( ) ;
975+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
976+ let ExpressionKind :: Charset ( chars) = & rule. expression . kind else {
977+ panic ! ( "expected Charset, got {:?}" , rule. expression. kind) ;
978+ } ;
979+ assert_eq ! ( chars. len( ) , 1 ) ;
980+ let Characters :: Range ( a, b) = & chars[ 0 ] else {
981+ panic ! ( "expected Range, got {:?}" , chars[ 0 ] ) ;
982+ } ;
983+ assert ! ( matches!( a, Character :: Unicode ( ( ch, _) ) if * ch == '\0' ) ) ;
984+ assert ! ( matches!(
985+ b,
986+ Character :: Unicode ( ( ch, _) ) if * ch == '\u{7F}'
987+ ) ) ;
988+ }
989+
990+ #[ test]
991+ fn charset_char_range ( ) {
992+ let input = "Rule -> [`a`-`z`]" ;
993+ let grammar = parse ( input) . unwrap ( ) ;
994+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
995+ let ExpressionKind :: Charset ( chars) = & rule. expression . kind else {
996+ panic ! ( "expected Charset, got {:?}" , rule. expression. kind) ;
997+ } ;
998+ assert_eq ! ( chars. len( ) , 1 ) ;
999+ let Characters :: Range ( a, b) = & chars[ 0 ] else {
1000+ panic ! ( "expected Range, got {:?}" , chars[ 0 ] ) ;
1001+ } ;
1002+ assert ! ( matches!( a, Character :: Char ( ch) if * ch == 'a' ) ) ;
1003+ assert ! ( matches!( b, Character :: Char ( ch) if * ch == 'z' ) ) ;
1004+ }
1005+
1006+ #[ test]
1007+ fn charset_mixed_range ( ) {
1008+ let input = "Rule -> [`a`-U+007A]" ;
1009+ let grammar = parse ( input) . unwrap ( ) ;
1010+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
1011+ let ExpressionKind :: Charset ( chars) = & rule. expression . kind else {
1012+ panic ! ( "expected Charset, got {:?}" , rule. expression. kind) ;
1013+ } ;
1014+ assert_eq ! ( chars. len( ) , 1 ) ;
1015+ let Characters :: Range ( a, b) = & chars[ 0 ] else {
1016+ panic ! ( "expected Range, got {:?}" , chars[ 0 ] ) ;
1017+ } ;
1018+ assert ! ( matches!( a, Character :: Char ( ch) if * ch == 'a' ) ) ;
1019+ assert ! ( matches!(
1020+ b,
1021+ Character :: Unicode ( ( ch, _) ) if * ch == 'z'
1022+ ) ) ;
1023+ }
1024+
1025+ #[ test]
1026+ fn charset_multiple_unicode_ranges ( ) {
1027+ let input = "Rule -> [U+0000-U+D7FF U+E000-U+10FFFF]" ;
1028+ let grammar = parse ( input) . unwrap ( ) ;
1029+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
1030+ let ExpressionKind :: Charset ( chars) = & rule. expression . kind else {
1031+ panic ! ( "expected Charset, got {:?}" , rule. expression. kind) ;
1032+ } ;
1033+ assert_eq ! ( chars. len( ) , 2 ) ;
1034+ let Characters :: Range ( a1, b1) = & chars[ 0 ] else {
1035+ panic ! ( "expected Range, got {:?}" , chars[ 0 ] ) ;
1036+ } ;
1037+ assert ! ( matches!( a1, Character :: Unicode ( ( ch, _) ) if * ch == '\0' ) ) ;
1038+ assert ! ( matches!( b1, Character :: Unicode ( ( ch, _) ) if * ch == '\u{D7FF}' ) ) ;
1039+ let Characters :: Range ( a2, b2) = & chars[ 1 ] else {
1040+ panic ! ( "expected Range, got {:?}" , chars[ 1 ] ) ;
1041+ } ;
1042+ assert ! ( matches!( a2, Character :: Unicode ( ( ch, _) ) if * ch == '\u{E000}' ) ) ;
1043+ assert ! ( matches!( b2, Character :: Unicode ( ( ch, _) ) if * ch == '\u{10FFFF}' ) ) ;
1044+ }
1045+
1046+ #[ test]
1047+ fn charset_terminals_and_named ( ) {
1048+ let input = "Rule -> [`a` `b` Foo]" ;
1049+ let grammar = parse ( input) . unwrap ( ) ;
1050+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
1051+ let ExpressionKind :: Charset ( chars) = & rule. expression . kind else {
1052+ panic ! ( "expected Charset, got {:?}" , rule. expression. kind) ;
1053+ } ;
1054+ assert_eq ! ( chars. len( ) , 3 ) ;
1055+ assert ! ( matches!( & chars[ 0 ] , Characters :: Terminal ( t) if t == "a" ) ) ;
1056+ assert ! ( matches!( & chars[ 1 ] , Characters :: Terminal ( t) if t == "b" ) ) ;
1057+ assert ! ( matches!( & chars[ 2 ] , Characters :: Named ( n) if n == "Foo" ) ) ;
1058+ }
1059+
1060+ // --- Negative lookahead combined with charset ---
1061+
1062+ #[ test]
1063+ fn lookahead_charset_with_named_and_terminals ( ) {
1064+ // Pattern from tokens.md: ![`'` `\` LF CR TAB] ASCII
1065+ let input = "Rule -> ![`x` `y` LF] Foo" ;
1066+ let grammar = parse ( input) . unwrap ( ) ;
1067+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
1068+ let ExpressionKind :: Sequence ( seq) = & rule. expression . kind else {
1069+ panic ! ( "expected Sequence, got {:?}" , rule. expression. kind) ;
1070+ } ;
1071+ assert_eq ! ( seq. len( ) , 2 ) ;
1072+ let ExpressionKind :: NegativeLookahead ( inner) = & seq[ 0 ] . kind else {
1073+ panic ! ( "expected NegativeLookahead, got {:?}" , seq[ 0 ] . kind) ;
1074+ } ;
1075+ let ExpressionKind :: Charset ( chars) = & inner. kind else {
1076+ panic ! ( "expected Charset, got {:?}" , inner. kind) ;
1077+ } ;
1078+ assert_eq ! ( chars. len( ) , 3 ) ;
1079+ assert ! ( matches!( & chars[ 0 ] , Characters :: Terminal ( t) if t == "x" ) ) ;
1080+ assert ! ( matches!( & chars[ 1 ] , Characters :: Terminal ( t) if t == "y" ) ) ;
1081+ assert ! ( matches!( & chars[ 2 ] , Characters :: Named ( n) if n == "LF" ) ) ;
1082+ }
1083+
1084+ // --- Negative lookahead combined with Unicode ---
1085+
1086+ #[ test]
1087+ fn lookahead_charset_with_unicode_range ( ) {
1088+ let input = "Rule -> ![U+0000-U+007F] Foo" ;
1089+ let grammar = parse ( input) . unwrap ( ) ;
1090+ let rule = grammar. productions . get ( "Rule" ) . unwrap ( ) ;
1091+ let ExpressionKind :: Sequence ( seq) = & rule. expression . kind else {
1092+ panic ! ( "expected Sequence, got {:?}" , rule. expression. kind) ;
1093+ } ;
1094+ let ExpressionKind :: NegativeLookahead ( inner) = & seq[ 0 ] . kind else {
1095+ panic ! ( "expected NegativeLookahead, got {:?}" , seq[ 0 ] . kind) ;
1096+ } ;
1097+ let ExpressionKind :: Charset ( chars) = & inner. kind else {
1098+ panic ! ( "expected Charset, got {:?}" , inner. kind) ;
1099+ } ;
1100+ assert_eq ! ( chars. len( ) , 1 ) ;
1101+ let Characters :: Range ( a, b) = & chars[ 0 ] else {
1102+ panic ! ( "expected Range, got {:?}" , chars[ 0 ] ) ;
1103+ } ;
1104+ assert ! ( matches!( a, Character :: Unicode ( ( ch, _) ) if * ch == '\0' ) ) ;
1105+ assert ! ( matches!(
1106+ b,
1107+ Character :: Unicode ( ( ch, _) ) if * ch == '\u{7F}'
1108+ ) ) ;
1109+ }
7811110}
0 commit comments