@@ -29,7 +29,8 @@ allows using the same regex engine for matching both types of patterns.
2929[Hir]: regex_syntax::hir::Hir
3030*/
3131
32- use std:: collections:: Bound ;
32+ use std:: collections:: btree_map:: Entry ;
33+ use std:: collections:: { BTreeMap , Bound } ;
3334use std:: fmt:: { Debug , Formatter } ;
3435use std:: hash:: { Hash , Hasher } ;
3536use std:: mem;
@@ -51,7 +52,7 @@ use crate::compiler::ir::dfs::{
5152 DFSIter , DFSWithScopeIter , Event , EventContext , dfs_common,
5253} ;
5354
54- use crate :: compiler:: { FilesizeBounds , RegexSetId } ;
55+ use crate :: compiler:: { FilesizeBounds , HeaderConstraint , RegexSetId } ;
5556use crate :: re;
5657use crate :: symbols:: Symbol ;
5758use crate :: types:: Value :: Const ;
@@ -310,6 +311,17 @@ impl Pattern {
310311 }
311312 }
312313 }
314+
315+ pub fn set_header_constraints ( & mut self , constraints : & HeaderConstraint ) {
316+ match self {
317+ Pattern :: Text ( literal) => {
318+ literal. header_constraints = constraints. clone ( ) ;
319+ }
320+ Pattern :: Regexp ( regexp) | Pattern :: Hex ( regexp) => {
321+ regexp. header_constraints = constraints. clone ( ) ;
322+ }
323+ }
324+ }
313325}
314326
315327#[ derive( Clone , Eq , Hash , PartialEq ) ]
@@ -321,6 +333,7 @@ pub(crate) struct LiteralPattern {
321333 pub base64_alphabet : Option < String > ,
322334 pub base64wide_alphabet : Option < String > ,
323335 pub filesize_bounds : FilesizeBounds ,
336+ pub header_constraints : HeaderConstraint ,
324337}
325338
326339#[ derive( Clone , Eq , Hash , PartialEq ) ]
@@ -329,6 +342,7 @@ pub(crate) struct RegexpPattern {
329342 pub hir : re:: hir:: Hir ,
330343 pub anchored_at : Option < usize > ,
331344 pub filesize_bounds : FilesizeBounds ,
345+ pub header_constraints : HeaderConstraint ,
332346}
333347
334348/// The index of a pattern in the rule that declares it.
@@ -992,6 +1006,251 @@ impl IR {
9921006
9931007 result
9941008 }
1009+
1010+ pub fn header_constraints (
1011+ & self ,
1012+ pattern_prefix_lookup : impl Fn ( PatternIdx ) -> Option < Vec < u8 > > ,
1013+ ) -> HeaderConstraint {
1014+ let mut constrained_bytes = BTreeMap :: new ( ) ;
1015+ let mut unsatisfiable = false ;
1016+ let mut dfs = self . dfs_iter ( self . root . unwrap ( ) ) ;
1017+
1018+ while let Some ( evt) = dfs. next ( ) {
1019+ let expr = match evt {
1020+ Event :: Enter ( ( _, expr, _) ) => expr,
1021+ _ => continue ,
1022+ } ;
1023+ match expr {
1024+ Expr :: Eq { lhs, rhs } => {
1025+ self . extract_header_constraints_from_eq (
1026+ * lhs,
1027+ * rhs,
1028+ & mut constrained_bytes,
1029+ & mut unsatisfiable,
1030+ ) ;
1031+ }
1032+ Expr :: PatternMatch { pattern, anchor } => {
1033+ if let MatchAnchor :: At ( offset_expr) = anchor
1034+ && let Some ( 0 ) =
1035+ self . get ( * offset_expr) . try_as_const_integer ( )
1036+ && let Some ( prefix_bytes) =
1037+ pattern_prefix_lookup ( * pattern)
1038+ {
1039+ for ( i, & b) in prefix_bytes. iter ( ) . enumerate ( ) {
1040+ match constrained_bytes. entry ( i) {
1041+ Entry :: Occupied ( entry) => {
1042+ if * entry. get ( ) != b {
1043+ unsatisfiable = true ;
1044+ break ;
1045+ }
1046+ }
1047+ Entry :: Vacant ( entry) => {
1048+ entry. insert ( b) ;
1049+ }
1050+ }
1051+ }
1052+ }
1053+ }
1054+ _ => { }
1055+ }
1056+ if unsatisfiable {
1057+ break ;
1058+ }
1059+ if !matches ! ( expr, Expr :: And { .. } ) {
1060+ dfs. prune ( ) ;
1061+ }
1062+ }
1063+
1064+ if unsatisfiable {
1065+ return HeaderConstraint :: Unsatisfiable ;
1066+ }
1067+
1068+ // If the first byte in `constrained_bytes` is at offset 0, we can
1069+ // return HeaderConstraint::Constrained.
1070+ if let Some ( ( 0 , _) ) = constrained_bytes. first_key_value ( ) {
1071+ HeaderConstraint :: Constrained (
1072+ // Take only the bytes at consecutive offsets starting at 0.
1073+ constrained_bytes
1074+ . into_iter ( )
1075+ . enumerate ( )
1076+ . map_while (
1077+ |( i, ( offset, byte) ) | {
1078+ if i == offset { Some ( byte) } else { None }
1079+ } ,
1080+ )
1081+ . collect ( ) ,
1082+ )
1083+ } else {
1084+ HeaderConstraint :: Unconstrained
1085+ }
1086+ }
1087+
1088+ fn extract_header_constraints_from_eq (
1089+ & self ,
1090+ lhs : ExprId ,
1091+ rhs : ExprId ,
1092+ constrained_bytes : & mut BTreeMap < usize , u8 > ,
1093+ unsatisfiable : & mut bool ,
1094+ ) {
1095+ if let Some ( val) = self . get ( rhs) . try_as_const_integer ( )
1096+ && self . apply_int_read_constraint (
1097+ constrained_bytes,
1098+ unsatisfiable,
1099+ lhs,
1100+ val,
1101+ )
1102+ {
1103+ return ;
1104+ }
1105+ if let Some ( val) = self . get ( lhs) . try_as_const_integer ( ) {
1106+ self . apply_int_read_constraint (
1107+ constrained_bytes,
1108+ unsatisfiable,
1109+ rhs,
1110+ val,
1111+ ) ;
1112+ }
1113+ }
1114+
1115+ fn add_constraint (
1116+ & self ,
1117+ constrained_bytes : & mut BTreeMap < usize , u8 > ,
1118+ unsatisfiable : & mut bool ,
1119+ offset : usize ,
1120+ value : u8 ,
1121+ ) {
1122+ if * unsatisfiable {
1123+ return ;
1124+ }
1125+ match constrained_bytes. entry ( offset) {
1126+ Entry :: Occupied ( entry) => {
1127+ if * entry. get ( ) != value {
1128+ * unsatisfiable = true ;
1129+ }
1130+ }
1131+ Entry :: Vacant ( entry) => {
1132+ entry. insert ( value) ;
1133+ }
1134+ }
1135+ }
1136+
1137+ fn apply_int_read_constraint (
1138+ & self ,
1139+ constrained_bytes : & mut BTreeMap < usize , u8 > ,
1140+ unsatisfiable : & mut bool ,
1141+ expr_id : ExprId ,
1142+ val : i64 ,
1143+ ) -> bool {
1144+ let func_call = match self . get ( expr_id) {
1145+ Expr :: FuncCall ( func_call) => func_call,
1146+ _ => return false ,
1147+ } ;
1148+
1149+ if let Some ( offset) = func_call
1150+ . args
1151+ . first ( )
1152+ . and_then ( |arg| self . get ( * arg) . try_as_const_integer ( ) )
1153+ && offset >= 0
1154+ {
1155+ match func_call. plain_name ( ) {
1156+ "uint8" | "int8" | "uint8be" | "int8be" => {
1157+ self . add_constraint (
1158+ constrained_bytes,
1159+ unsatisfiable,
1160+ offset as usize ,
1161+ val as u8 ,
1162+ ) ;
1163+ return true ;
1164+ }
1165+ "uint16" | "int16" => {
1166+ self . add_constraint (
1167+ constrained_bytes,
1168+ unsatisfiable,
1169+ offset as usize ,
1170+ ( val as u16 & 0xff ) as u8 ,
1171+ ) ;
1172+ self . add_constraint (
1173+ constrained_bytes,
1174+ unsatisfiable,
1175+ offset as usize + 1 ,
1176+ ( ( val as u16 >> 8 ) & 0xff ) as u8 ,
1177+ ) ;
1178+ return true ;
1179+ }
1180+ "uint16be" | "int16be" => {
1181+ self . add_constraint (
1182+ constrained_bytes,
1183+ unsatisfiable,
1184+ offset as usize ,
1185+ ( ( val as u16 >> 8 ) & 0xff ) as u8 ,
1186+ ) ;
1187+ self . add_constraint (
1188+ constrained_bytes,
1189+ unsatisfiable,
1190+ offset as usize + 1 ,
1191+ ( val as u16 & 0xff ) as u8 ,
1192+ ) ;
1193+ return true ;
1194+ }
1195+ "uint32" | "int32" => {
1196+ self . add_constraint (
1197+ constrained_bytes,
1198+ unsatisfiable,
1199+ offset as usize ,
1200+ ( val as u32 & 0xff ) as u8 ,
1201+ ) ;
1202+ self . add_constraint (
1203+ constrained_bytes,
1204+ unsatisfiable,
1205+ offset as usize + 1 ,
1206+ ( ( val as u32 >> 8 ) & 0xff ) as u8 ,
1207+ ) ;
1208+ self . add_constraint (
1209+ constrained_bytes,
1210+ unsatisfiable,
1211+ offset as usize + 2 ,
1212+ ( ( val as u32 >> 16 ) & 0xff ) as u8 ,
1213+ ) ;
1214+ self . add_constraint (
1215+ constrained_bytes,
1216+ unsatisfiable,
1217+ offset as usize + 3 ,
1218+ ( ( val as u32 >> 24 ) & 0xff ) as u8 ,
1219+ ) ;
1220+ return true ;
1221+ }
1222+ "uint32be" | "int32be" => {
1223+ self . add_constraint (
1224+ constrained_bytes,
1225+ unsatisfiable,
1226+ offset as usize ,
1227+ ( ( val as u32 >> 24 ) & 0xff ) as u8 ,
1228+ ) ;
1229+ self . add_constraint (
1230+ constrained_bytes,
1231+ unsatisfiable,
1232+ offset as usize + 1 ,
1233+ ( ( val as u32 >> 16 ) & 0xff ) as u8 ,
1234+ ) ;
1235+ self . add_constraint (
1236+ constrained_bytes,
1237+ unsatisfiable,
1238+ offset as usize + 2 ,
1239+ ( ( val as u32 >> 8 ) & 0xff ) as u8 ,
1240+ ) ;
1241+ self . add_constraint (
1242+ constrained_bytes,
1243+ unsatisfiable,
1244+ offset as usize + 3 ,
1245+ ( val as u32 & 0xff ) as u8 ,
1246+ ) ;
1247+ return true ;
1248+ }
1249+ _ => { }
1250+ }
1251+ }
1252+ false
1253+ }
9951254}
9961255
9971256impl IR {
@@ -2367,6 +2626,12 @@ impl FuncCall {
23672626 pub fn mangled_name ( & self ) -> & str {
23682627 self . signature ( ) . mangled_name . as_str ( )
23692628 }
2629+
2630+ /// Returns the plain function name, without argument or return type
2631+ /// information (i.e: everything before the `@` in the name).
2632+ pub fn plain_name ( & self ) -> & str {
2633+ self . signature ( ) . mangled_name . plain_name ( )
2634+ }
23702635}
23712636
23722637/// An `of` expression with a tuple of expressions (e.g. `1 of (true, false)`).
0 commit comments