Skip to content

Commit a4c1ca7

Browse files
committed
selabel: Use pcre2 for regex
SELinux uses pcre2 for its regex library, and there's a Rust crate for it available, so might as well use it for compatibility. Using the regular regex crate also doesn't support look-arounds, so better to move away from it anyways. Fixes #317 Signed-off-by: Dallas Strouse <dallas.strouse2007@gmail.com>
1 parent 0ea5d52 commit a4c1ca7

2 files changed

Lines changed: 17 additions & 54 deletions

File tree

crates/composefs-boot/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ anyhow = { version = "1.0.87", default-features = false }
1515
fn-error-context = "0.2"
1616
composefs = { workspace = true }
1717
hex = { version = "0.4.0", default-features = false, features = ["std"] }
18-
regex-automata = { version = "0.4.4", default-features = false, features=["hybrid", "std", "syntax"] }
18+
pcre2 = "0.2.11"
1919
rustix = { version = "1.0.0", default-features = false, features = ["fs", "std"] }
2020
thiserror = { version = "2.0.0", default-features = false }
2121
zerocopy = { version = "0.8.0", default-features = false, features = ["derive"] }

crates/composefs-boot/src/selabel.rs

Lines changed: 16 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! This module implements SELinux policy parsing and file labeling functionality.
44
//! It reads SELinux policy files (file_contexts, file_contexts.subs, etc.) and applies
55
//! appropriate security.selinux extended attributes to filesystem nodes. The implementation
6-
//! uses regex automata for efficient pattern matching against file paths and types.
6+
//! uses pcre2 for compatibility with selinux regex (same library selinux uses)
77
88
use std::{
99
collections::HashMap,
@@ -16,7 +16,7 @@ use std::{
1616

1717
use anyhow::{Context, Result, bail, ensure};
1818
use fn_error_context::context;
19-
use regex_automata::{Anchored, Input, hybrid::dfa, util::syntax};
19+
use pcre2::bytes::Regex;
2020
use rustix::{
2121
fd::AsFd,
2222
fs::{Mode, OFlags, openat},
@@ -36,25 +36,6 @@ use composefs::{
3636
/// that should be stripped or regenerated based on the target system's policy.
3737
pub const XATTR_SECURITY_SELINUX: &str = "security.selinux";
3838

39-
/* We build the entire SELinux policy into a single "lazy DFA" such that:
40-
*
41-
* - the input string is the filename plus a single character representing the type of the file,
42-
* using the 'file type' codes listed in selabel_file(5): 'b', 'c', 'd', 'p', 'l', 's', and '-'
43-
*
44-
* - the output pattern ID is the index of the selected context
45-
*
46-
* The 'subs' mapping is handled as a hash table. We consult it each time we enter a directory and
47-
* perform the substitution a single time at that point instead of doing it for each contained
48-
* file.
49-
*
50-
* We could maybe add a string table to deduplicate contexts to save memory (as they are often
51-
* repeated). It's not an order-of-magnitude kind of gain, though, and it would increase code
52-
* complexity, and slightly decrease efficiency.
53-
*
54-
* Note: we are not 100% compatible with PCRE here, so it's theoretically possible that someone
55-
* could write a policy that we can't properly handle...
56-
*/
57-
5839
#[context("Processing SELinux substitutions file")]
5940
fn process_subs_file(file: impl Read, aliases: &mut HashMap<OsString, OsString>) -> Result<()> {
6041
// r"\s*([^\s]+)\s+([^\s]+)\s*";
@@ -122,8 +103,7 @@ fn process_spec_file(
122103

123104
struct Policy {
124105
aliases: HashMap<OsString, OsString>,
125-
dfa: dfa::DFA,
126-
cache: dfa::Cache,
106+
regexes: Vec<Regex>,
127107
contexts: Vec<String>,
128108
}
129109

@@ -188,29 +168,18 @@ impl Policy {
188168
}
189169
}
190170

191-
// The DFA matches the first-found. We want to match the last-found.
171+
// We want to match the last-found.
192172
regexps.reverse();
193173
contexts.reverse();
194174

195-
let mut builder = dfa::Builder::new();
196-
builder.syntax(
197-
syntax::Config::new()
198-
.unicode(false)
199-
.utf8(false)
200-
.line_terminator(0),
201-
);
202-
builder.configure(
203-
dfa::Config::new()
204-
.cache_capacity(10_000_000)
205-
.skip_cache_capacity_check(true),
206-
);
207-
let dfa = builder.build_many(&regexps)?;
208-
let cache = dfa.create_cache();
175+
let mut compiled = Vec::with_capacity(regexps.len());
176+
for r in &regexps {
177+
compiled.push(Regex::new(r).with_context(|| format!("Compiling PCRE2 regex: {r}"))?);
178+
}
209179

210180
Ok(Policy {
211181
aliases,
212-
dfa,
213-
cache,
182+
regexes: compiled,
214183
contexts,
215184
})
216185
}
@@ -219,22 +188,16 @@ impl Policy {
219188
self.aliases.get(filename).map(|x| x.as_os_str())
220189
}
221190

222-
// mut because it touches the cache
223-
pub fn lookup(&mut self, filename: &OsStr, ifmt: u8) -> Option<&str> {
191+
pub fn lookup(&self, filename: &OsStr, ifmt: u8) -> Option<&str> {
224192
let key = &[filename.as_bytes(), &[ifmt]].concat();
225-
let input = Input::new(&key).anchored(Anchored::Yes);
226193

227-
match self
228-
.dfa
229-
.try_search_fwd(&mut self.cache, &input)
230-
.expect("regex troubles")
231-
{
232-
Some(halfmatch) => match self.contexts[halfmatch.pattern()].as_str() {
233-
"<<none>>" => None,
234-
ctx => Some(ctx),
235-
},
236-
None => None,
194+
for (i, re) in self.regexes.iter().enumerate() {
195+
if re.is_match(key).unwrap_or(false) {
196+
let ctx = self.contexts[i].as_str();
197+
return (ctx != "<<none>>").then_some(ctx);
198+
}
237199
}
200+
None
238201
}
239202
}
240203

0 commit comments

Comments
 (0)