Skip to content

Commit d7ff1fa

Browse files
committed
Add regex-automata back, fall back to PCRE2 for incompatible regexes
This is also generally a 50% or so performance improvement compared to just using PCRE2, even if it is worse than the original path (which didn't support look-arounds, and blew up on certain SELinux policies). Signed-off-by: Dallas Strouse <dallas.strouse2007@gmail.com>
1 parent a4c1ca7 commit d7ff1fa

2 files changed

Lines changed: 66 additions & 16 deletions

File tree

crates/composefs-boot/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ anyhow = { version = "1.0.87", default-features = false }
1515
fn-error-context = "0.2"
1616
composefs = { workspace = true }
1717
hex = { version = "0.4.0", default-features = false, features = ["std"] }
18+
regex-automata = "0.4.14"
1819
pcre2 = "0.2.11"
1920
rustix = { version = "1.0.0", default-features = false, features = ["fs", "std"] }
2021
thiserror = { version = "2.0.0", default-features = false }

crates/composefs-boot/src/selabel.rs

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@ use std::{
1414
path::{Path, PathBuf},
1515
};
1616

17-
use anyhow::{Context, Result, bail, ensure};
17+
use anyhow::{Context, Result, anyhow, bail, ensure};
1818
use fn_error_context::context;
19-
use pcre2::bytes::Regex;
19+
use pcre2::bytes::Regex as PcreRegex;
20+
// the meta regex engine might be able to beat out specifying
21+
// another engine sometimes, since it kind of just redirects to a different engine,
22+
// depending on regex, so might as well use it.
23+
// It's also fairly easy to swap out here if desired, since really only
24+
// two methods need to be supported.
25+
use regex_automata::meta::Regex as AutomataRegex;
2026
use rustix::{
2127
fd::AsFd,
2228
fs::{Mode, OFlags, openat},
@@ -101,10 +107,47 @@ fn process_spec_file(
101107
Ok(())
102108
}
103109

110+
/* Most Rust crates don't support lookarounds and other features PCRE2 does, but they perform
111+
* a lot better when the regex being parsed doesn't involve them. So, we use the Rust
112+
* regex parsers, and if they error out, we'll fall back to PCRE2. */
113+
114+
enum RegexImpl {
115+
Automata(AutomataRegex),
116+
Pcre2(PcreRegex),
117+
}
118+
119+
impl RegexImpl {
120+
#[inline(always)]
121+
fn new(pattern: &str) -> Result<Self> {
122+
AutomataRegex::new(pattern)
123+
.map(RegexImpl::Automata)
124+
.or_else(|automata_err| {
125+
PcreRegex::new(pattern)
126+
.map(RegexImpl::Pcre2)
127+
.map_err(|pcre_err| {
128+
anyhow!(
129+
"Failed to compile regex `{}` with regex_automata: {}; PCRE2 fallback failed: {}",
130+
pattern,
131+
automata_err,
132+
pcre_err
133+
)
134+
})
135+
})
136+
}
137+
138+
#[inline(always)]
139+
fn is_match(&self, input: &[u8]) -> bool {
140+
match self {
141+
RegexImpl::Automata(re) => re.is_match(input),
142+
RegexImpl::Pcre2(re) => re.is_match(input).unwrap_or(false),
143+
}
144+
}
145+
}
146+
104147
struct Policy {
105148
aliases: HashMap<OsString, OsString>,
106-
regexes: Vec<Regex>,
107-
contexts: Vec<String>,
149+
// (regex, context)
150+
rules: Vec<(RegexImpl, String)>,
108151
}
109152

110153
/// Open a file in the composefs store, handling inline vs external files.
@@ -172,29 +215,28 @@ impl Policy {
172215
regexps.reverse();
173216
contexts.reverse();
174217

175-
let mut compiled = Vec::with_capacity(regexps.len());
176-
for r in &regexps {
177-
compiled.push(Regex::new(r).with_context(|| format!("Compiling PCRE2 regex: {r}"))?);
218+
let mut rules = Vec::with_capacity(regexps.len());
219+
for (re_src, context) in regexps.into_iter().zip(contexts.into_iter()) {
220+
let regex = RegexImpl::new(&re_src)
221+
.with_context(|| format!("Compiling regex: {re_src}"))?;
222+
rules.push((regex, context));
178223
}
179224

180-
Ok(Policy {
181-
aliases,
182-
regexes: compiled,
183-
contexts,
184-
})
225+
Ok(Policy { aliases, rules })
185226
}
186227

228+
187229
pub fn check_aliased(&self, filename: &OsStr) -> Option<&OsStr> {
188230
self.aliases.get(filename).map(|x| x.as_os_str())
189231
}
190232

191233
pub fn lookup(&self, filename: &OsStr, ifmt: u8) -> Option<&str> {
192234
let key = &[filename.as_bytes(), &[ifmt]].concat();
193235

194-
for (i, re) in self.regexes.iter().enumerate() {
195-
if re.is_match(key).unwrap_or(false) {
196-
let ctx = self.contexts[i].as_str();
197-
return (ctx != "<<none>>").then_some(ctx);
236+
for rule in &self.rules {
237+
if rule.0.is_match(&key) {
238+
let context = rule.1.as_str();
239+
return (context != "<<none>>").then_some(context);
198240
}
199241
}
200242
None
@@ -613,6 +655,13 @@ mod tests {
613655
dumpfile_to_filesystem(&dumpfile).unwrap()
614656
}
615657

658+
#[test]
659+
fn selabel_regex_falls_back_to_pcre2_for_lookarounds() {
660+
let regex = RegexImpl::new(r"foo(?=bar)").expect("fallback should compile");
661+
assert!(regex.is_match(b"foobar"));
662+
assert!(!regex.is_match(b"fooqux"));
663+
}
664+
616665
/// Verify that selabel() applies the correct SELinux contexts from
617666
/// an in-memory filesystem's embedded policy files.
618667
#[test]

0 commit comments

Comments
 (0)