Skip to content

Commit 7c330db

Browse files
committed
refactor(policy): apply trevarj parser feedback
- Drop BNF grammar from doc comment per Andrew's note that it is self-explanatory. - Wrap parser state in MathParser struct over Peekable<CharIndices> to remove byte-indexing and UTF-8 magic numbers. - Allow arbitrary whitespace between tokens, except inside the digraph #{ which remains atomic. - Replace match_sep helper with From<char> for Op (unreachable! in the default arm because the caller verifies the char first). - Derive PartialEq/Eq on Kind to use == directly. - Extract consume_while(predicate) helper for digit and identifier loops in both parse_math_policy and parse_terminal.
1 parent 1384586 commit 7c330db

1 file changed

Lines changed: 160 additions & 124 deletions

File tree

src/policy/semantic.rs

Lines changed: 160 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -364,123 +364,162 @@ impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
364364
}
365365
}
366366

367-
/// Grammar:
368-
///
369-
/// ```text
370-
/// policy ::= terminal | "(" group ")" | "#{" thresh_body "}" " = " number
371-
/// terminal ::= "UNSATISFIABLE" | "TRIVIAL" | name "(" arg ")"
372-
/// name ::= "pk" | "after" | "older" | "sha256" | "hash256"
373-
/// | "ripemd160" | "hash160"
374-
/// group ::= policy ((" ∧ " policy)+ | (" ∨ " policy)+)
375-
/// thresh_body ::= policy ("," " " policy)+
376-
/// ```
377-
fn parse_math_policy<Pk: FromStrKey>(s: &str) -> Result<Policy<Pk>, Error> {
378-
#[derive(Copy, Clone, PartialEq, Eq)]
379-
enum Op {
380-
And,
381-
Or,
367+
struct MathParser<'a> {
368+
s: &'a str,
369+
iter: core::iter::Peekable<core::str::CharIndices<'a>>,
370+
}
371+
372+
impl<'a> MathParser<'a> {
373+
fn new(s: &'a str) -> Self { Self { s, iter: s.char_indices().peekable() } }
374+
375+
fn peek(&mut self) -> Option<char> { self.iter.peek().map(|&(_, c)| c) }
376+
377+
fn bump(&mut self) -> Option<char> { self.iter.next().map(|(_, c)| c) }
378+
379+
fn pos(&mut self) -> usize { self.iter.peek().map(|&(i, _)| i).unwrap_or(self.s.len()) }
380+
381+
fn skip_ws(&mut self) {
382+
while matches!(self.peek(), Some(c) if c.is_whitespace()) {
383+
self.bump();
384+
}
382385
}
383-
enum Kind {
384-
Group(Option<Op>),
385-
Thresh,
386+
387+
fn try_consume(&mut self, want: char) -> bool {
388+
if self.peek() == Some(want) {
389+
self.bump();
390+
true
391+
} else {
392+
false
393+
}
386394
}
387-
struct Frame<Pk: MiniscriptKey> {
388-
subs: Vec<Arc<Policy<Pk>>>,
389-
kind: Kind,
395+
396+
fn consume_while(&mut self, pred: impl Fn(char) -> bool) -> &'a str {
397+
let start = self.pos();
398+
while matches!(self.peek(), Some(c) if pred(c)) {
399+
self.bump();
400+
}
401+
&self.s[start..self.pos()]
390402
}
403+
}
391404

392-
fn match_sep(s: &str) -> Option<Op> {
393-
if s.starts_with(" ∧ ") {
394-
Some(Op::And)
395-
} else if s.starts_with(" ∨ ") {
396-
Some(Op::Or)
397-
} else {
398-
None
405+
#[derive(Copy, Clone, PartialEq, Eq)]
406+
enum Op {
407+
And,
408+
Or,
409+
}
410+
411+
impl From<char> for Op {
412+
fn from(c: char) -> Self {
413+
match c {
414+
'∧' => Op::And,
415+
'∨' => Op::Or,
416+
_ => unreachable!(),
399417
}
400418
}
419+
}
401420

402-
let bytes = s.as_bytes();
421+
#[derive(PartialEq, Eq)]
422+
enum Kind {
423+
Group(Option<Op>),
424+
Thresh,
425+
}
426+
427+
struct Frame<Pk: MiniscriptKey> {
428+
subs: Vec<Arc<Policy<Pk>>>,
429+
kind: Kind,
430+
}
431+
432+
/// Parses the mathematical-notation form produced by `Display`.
433+
fn parse_math_policy<Pk: FromStrKey>(s: &str) -> Result<Policy<Pk>, Error> {
434+
let mut parser = MathParser::new(s);
403435
let mut frames: Vec<Frame<Pk>> = Vec::new();
404436
let mut cur: Option<Arc<Policy<Pk>>> = None;
405-
let mut i = 0;
406437

407-
while i < bytes.len() {
438+
loop {
439+
parser.skip_ws();
440+
let Some(c) = parser.peek() else { break };
441+
408442
if cur.is_none() {
409-
// Expect: '(', '#{', or a terminal atom.
410-
if bytes[i] == b'(' {
411-
frames.push(Frame { subs: Vec::new(), kind: Kind::Group(None) });
412-
i += 1;
413-
} else if s[i..].starts_with("#{") {
414-
frames.push(Frame { subs: Vec::new(), kind: Kind::Thresh });
415-
i += 2;
416-
} else {
417-
let (policy, end) = parse_terminal(s, bytes, i)?;
418-
cur = Some(Arc::new(policy));
419-
i = end;
443+
match c {
444+
'(' => {
445+
parser.bump();
446+
frames.push(Frame { subs: Vec::new(), kind: Kind::Group(None) });
447+
}
448+
'#' => {
449+
parser.bump();
450+
if !parser.try_consume('{') {
451+
return Err(malformed_math());
452+
}
453+
frames.push(Frame { subs: Vec::new(), kind: Kind::Thresh });
454+
}
455+
_ => cur = Some(Arc::new(parse_terminal(&mut parser)?)),
420456
}
421457
continue;
422458
}
423459

424460
let frame = frames.last_mut().ok_or_else(malformed_math)?;
425-
if let Some(new_op) = match_sep(&s[i..]) {
426-
match &mut frame.kind {
427-
Kind::Group(slot @ None) => *slot = Some(new_op),
428-
Kind::Group(Some(prev)) if *prev == new_op => {}
429-
_ => return Err(malformed_math()),
430-
}
431-
frame.subs.push(cur.take().unwrap());
432-
i += 5; // " ∧ " / " ∨ " in UTF-8
433-
} else if bytes[i] == b',' && bytes.get(i + 1) == Some(&b' ') {
434-
if !matches!(frame.kind, Kind::Thresh) {
435-
return Err(malformed_math());
436-
}
437-
frame.subs.push(cur.take().unwrap());
438-
i += 2;
439-
} else if bytes[i] == b')' {
440-
let mut frame = frames.pop().unwrap();
441-
let op = match frame.kind {
442-
Kind::Group(op) => op.ok_or_else(malformed_math)?,
443-
Kind::Thresh => return Err(malformed_math()),
444-
};
445-
frame.subs.push(cur.take().unwrap());
446-
if frame.subs.len() < 2 {
447-
return Err(malformed_math());
448-
}
449-
let k = if op == Op::And { frame.subs.len() } else { 1 };
450-
cur = Some(Arc::new(Policy::Thresh(
451-
Threshold::new(k, frame.subs).map_err(Error::Threshold)?,
452-
)));
453-
i += 1;
454-
} else if bytes[i] == b'}' {
455-
let mut frame = match frames.pop() {
456-
Some(f) if matches!(f.kind, Kind::Thresh) => f,
457-
_ => return Err(malformed_math()),
458-
};
459-
frame.subs.push(cur.take().unwrap());
460-
i += 1;
461-
if !s[i..].starts_with(" = ") {
462-
return Err(malformed_math());
463-
}
464-
i += 3;
465-
let k_start = i;
466-
while i < bytes.len() && bytes[i].is_ascii_digit() {
467-
i += 1;
461+
match c {
462+
'∧' | '∨' => {
463+
parser.bump();
464+
let op = Op::from(c);
465+
match &mut frame.kind {
466+
Kind::Group(slot @ None) => *slot = Some(op),
467+
Kind::Group(Some(prev)) if *prev == op => {}
468+
_ => return Err(malformed_math()),
469+
}
470+
frame.subs.push(cur.take().unwrap());
468471
}
469-
if i == k_start {
470-
return Err(malformed_math());
472+
',' => {
473+
if frame.kind != Kind::Thresh {
474+
return Err(malformed_math());
475+
}
476+
parser.bump();
477+
frame.subs.push(cur.take().unwrap());
471478
}
472-
let k = expression::parse_num(&s[k_start..i]).map_err(|_| malformed_math())? as usize;
473-
let thresh = Threshold::new(k, frame.subs).map_err(Error::Threshold)?;
474-
// k=1 must be `∨`; k=n must be `∧`.
475-
if thresh.is_or() {
476-
return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalOr));
479+
')' => {
480+
parser.bump();
481+
let mut frame = frames.pop().unwrap();
482+
let op = match frame.kind {
483+
Kind::Group(op) => op.ok_or_else(malformed_math)?,
484+
Kind::Thresh => return Err(malformed_math()),
485+
};
486+
frame.subs.push(cur.take().unwrap());
487+
if frame.subs.len() < 2 {
488+
return Err(malformed_math());
489+
}
490+
let k = if op == Op::And { frame.subs.len() } else { 1 };
491+
cur = Some(Arc::new(Policy::Thresh(
492+
Threshold::new(k, frame.subs).map_err(Error::Threshold)?,
493+
)));
477494
}
478-
if thresh.is_and() {
479-
return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalAnd));
495+
'}' => {
496+
parser.bump();
497+
let mut frame = match frames.pop() {
498+
Some(f) if f.kind == Kind::Thresh => f,
499+
_ => return Err(malformed_math()),
500+
};
501+
frame.subs.push(cur.take().unwrap());
502+
parser.skip_ws();
503+
if !parser.try_consume('=') {
504+
return Err(malformed_math());
505+
}
506+
parser.skip_ws();
507+
let k_str = parser.consume_while(|c| c.is_ascii_digit());
508+
if k_str.is_empty() {
509+
return Err(malformed_math());
510+
}
511+
let k = expression::parse_num(k_str).map_err(|_| malformed_math())? as usize;
512+
let thresh = Threshold::new(k, frame.subs).map_err(Error::Threshold)?;
513+
// k=1 must be `∨`; k=n must be `∧`.
514+
if thresh.is_or() {
515+
return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalOr));
516+
}
517+
if thresh.is_and() {
518+
return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalAnd));
519+
}
520+
cur = Some(Arc::new(Policy::Thresh(thresh)));
480521
}
481-
cur = Some(Arc::new(Policy::Thresh(thresh)));
482-
} else {
483-
return Err(malformed_math());
522+
_ => return Err(malformed_math()),
484523
}
485524
}
486525

@@ -491,40 +530,26 @@ fn parse_math_policy<Pk: FromStrKey>(s: &str) -> Result<Policy<Pk>, Error> {
491530
Ok(Arc::try_unwrap(root).unwrap_or_else(|arc| (*arc).clone()))
492531
}
493532

494-
/// Parses a single terminal starting at `start` and returns the policy and
495-
/// the byte index immediately after it.
496-
fn parse_terminal<Pk: FromStrKey>(
497-
s: &str,
498-
bytes: &[u8],
499-
start: usize,
500-
) -> Result<(Policy<Pk>, usize), Error> {
501-
let mut i = start;
502-
while i < bytes.len() && bytes[i].is_ascii_alphanumeric() {
503-
i += 1;
504-
}
505-
if i == start {
533+
fn parse_terminal<Pk: FromStrKey>(parser: &mut MathParser<'_>) -> Result<Policy<Pk>, Error> {
534+
let name = parser.consume_while(|c| c.is_ascii_alphanumeric());
535+
if name.is_empty() {
506536
return Err(malformed_math());
507537
}
508-
let name = &s[start..i];
509538

510-
if i >= bytes.len() || bytes[i] != b'(' {
539+
if !parser.try_consume('(') {
511540
return match name {
512-
"UNSATISFIABLE" => Ok((Policy::Unsatisfiable, i)),
513-
"TRIVIAL" => Ok((Policy::Trivial, i)),
541+
"UNSATISFIABLE" => Ok(Policy::Unsatisfiable),
542+
"TRIVIAL" => Ok(Policy::Trivial),
514543
_ => Err(malformed_math()),
515544
};
516545
}
517546

518-
let arg_start = i + 1;
519-
let arg_end = bytes[arg_start..]
520-
.iter()
521-
.position(|&b| b == b')')
522-
.map(|p| p + arg_start)
523-
.ok_or_else(malformed_math)?;
524-
let arg = &s[arg_start..arg_end];
525-
let end = arg_end + 1;
547+
let arg = parser.consume_while(|c| c != ')');
548+
if !parser.try_consume(')') {
549+
return Err(malformed_math());
550+
}
526551

527-
let policy = match name {
552+
Ok(match name {
528553
"pk" => Policy::Key(parse_arg(arg)?),
529554
"after" => {
530555
let n = expression::parse_num(arg)
@@ -551,8 +576,7 @@ fn parse_terminal<Pk: FromStrKey>(
551576
"ripemd160" => Policy::Ripemd160(parse_arg(arg)?),
552577
"hash160" => Policy::Hash160(parse_arg(arg)?),
553578
_ => return Err(malformed_math()),
554-
};
555-
Ok((policy, end))
579+
})
556580
}
557581

558582
fn parse_arg<T: core::str::FromStr>(arg: &str) -> Result<T, Error>
@@ -1016,6 +1040,18 @@ mod tests {
10161040
assert!(StringPolicy::from_str("(pk(A) ∧ pk(B)").is_err());
10171041
assert!(StringPolicy::from_str("#{pk(A), pk(B)} = 1").is_err());
10181042
assert!(StringPolicy::from_str("(and(pk(A),pk(B)) ∧ pk(C))").is_err());
1043+
1044+
// Whitespace flexibility: any amount of whitespace between tokens.
1045+
let canonical = StringPolicy::from_str("(pk(A) ∧ pk(B))").unwrap();
1046+
assert_eq!(StringPolicy::from_str("(pk(A)∧pk(B))").unwrap(), canonical);
1047+
assert_eq!(StringPolicy::from_str("( pk(A) ∧ pk(B) )").unwrap(), canonical);
1048+
1049+
let thresh = StringPolicy::from_str("#{pk(A), pk(B), pk(C)} = 2").unwrap();
1050+
assert_eq!(StringPolicy::from_str("#{pk(A),pk(B),pk(C)}=2").unwrap(), thresh);
1051+
assert_eq!(StringPolicy::from_str("#{ pk(A) , pk(B) , pk(C) } = 2").unwrap(), thresh);
1052+
1053+
// `#{` is a digraph; whitespace between `#` and `{` is rejected.
1054+
assert!(StringPolicy::from_str("# {pk(A), pk(B), pk(C)} = 2").is_err());
10191055
}
10201056

10211057
#[test]

0 commit comments

Comments
 (0)