refactor(policy): rewrite semantic policy FromStr as direct single-pass parser

febyeji · febyeji · commit a927af6b4eba · 2026-04-23T19:05:18.000+09:00
Replace the recursive `rewrite_math_notation` that rescanned the input at every depth with a single linear pass that builds `Policy<Pk>` directly using an explicit frame stack. Matches `expression::Tree`'s non-recursive conventions. Addresses apoelstra's review on rust-bitcoin#914.
diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs
@@ -358,140 +358,178 @@ impl<Pk: MiniscriptKey> Policy<Pk> {
 
 impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
     type Err = Error;
-    fn from_str(s: &str) -> Result<Policy<Pk>, Error> {
-        let normalized;
-        let input = if s.contains('∧') || s.contains('∨') || s.contains("#{") {
-            normalized = rewrite_math_notation(s).map_err(Error::Parse)?;
-            normalized.as_str()
-        } else {
-            s
-        };
-        let tree = expression::Tree::from_str(input)?;
-        expression::FromTree::from_tree(tree.root())
-    }
-}
-
-/// Rewrites Display's mathematical form into the function-call form consumed by `expression::Tree`.
-fn rewrite_math_notation(s: &str) -> Result<String, crate::ParseError> {
-    let mut out = String::with_capacity(s.len());
-    rewrite_expr(s.trim(), &mut out)?;
-    Ok(out)
+    fn from_str(s: &str) -> Result<Policy<Pk>, Error> { parse_policy(s) }
 }
 
-fn rewrite_expr(s: &str, out: &mut String) -> Result<(), crate::ParseError> {
-    let s = s.trim();
-    if let Some(body) = s.strip_prefix("#{") {
-        let close = find_matching_close(body, b'}')?;
-        let inside = &body[..close];
-        let k_str = body[close + 1..]
-            .trim_start()
-            .strip_prefix('=')
-            .ok_or_else(malformed_math)?
-            .trim();
-        if k_str.is_empty() || !k_str.bytes().all(|b| b.is_ascii_digit()) {
-            return Err(malformed_math());
-        }
-        out.push_str("thresh(");
-        out.push_str(k_str);
-        for child in split_top_level(inside, ", ") {
-            out.push(',');
-            rewrite_expr(child, out)?;
-        }
-        out.push(')');
-        Ok(())
-    } else if let Some(body) = s.strip_prefix('(') {
-        let close = find_matching_close(body, b')')?;
-        let inside = &body[..close];
-        if !body[close + 1..].is_empty() {
-            return Err(malformed_math());
-        }
-        let (op, sep) = if has_top_level(inside, " ∧ ") {
-            ("and", " ∧ ")
-        } else if has_top_level(inside, " ∨ ") {
-            ("or", " ∨ ")
-        } else {
-            return rewrite_expr(inside, out);
-        };
-        out.push_str(op);
-        out.push('(');
-        for (i, child) in split_top_level(inside, sep).into_iter().enumerate() {
-            if i > 0 {
-                out.push(',');
-            }
-            rewrite_expr(child, out)?;
-        }
-        out.push(')');
-        Ok(())
-    } else {
-        out.push_str(s);
-        Ok(())
+/// Parses a semantic policy from either the mathematical Display form
+/// (`(a ∧ b)`, `(a ∨ b)`, `#{a, b, ..} = k`) or the legacy function-call
+/// form (`and(..)`, `or(..)`, `thresh(k, ..)`).
+///
+/// This is a single linear pass: each byte of input is visited at most
+/// twice (once by the outer scan and, for bytes that belong to a terminal
+/// or to a nested function-call atom, once by [`expression::Tree::from_str`]
+/// which is itself non-recursive). The parser uses explicit frame and
+/// operand stacks rather than recursion.
+fn parse_policy<Pk: FromStrKey>(s: &str) -> Result<Policy<Pk>, Error> {
+    // UTF-8 byte sequences for the mathematical operators.
+    const AND_SEP: &[u8] = b" \xE2\x88\xA7 ";
+    const OR_SEP: &[u8] = b" \xE2\x88\xA8 ";
+
+    #[derive(Copy, Clone, PartialEq, Eq)]
+    enum Op {
+        And,
+        Or,
+    }
+    struct Frame<Pk: MiniscriptKey> {
+        subs: Vec<Arc<Policy<Pk>>>,
+        op: Option<Op>,
+        is_thresh: bool,
     }
-}
 
-fn find_matching_close(s: &str, expected: u8) -> Result<usize, crate::ParseError> {
     let bytes = s.as_bytes();
-    let mut depth: i32 = 1;
-    for (i, &ch) in bytes.iter().enumerate() {
-        match ch {
-            b'(' | b'{' => depth += 1,
-            b')' | b'}' => {
-                depth -= 1;
-                if depth == 0 {
-                    return if ch == expected {
-                        Ok(i)
-                    } else {
-                        Err(malformed_math())
-                    };
-                }
+    let n = bytes.len();
+    let mut frames: Vec<Frame<Pk>> = Vec::new();
+    let mut cur: Option<Arc<Policy<Pk>>> = None;
+    let mut i = 0;
+
+    while i < n {
+        let b = bytes[i];
+        if cur.is_none() {
+            // Expecting a fresh operand: a math group `(`, a threshold `#{`,
+            // or an atom like `pk(..)`, `UNSATISFIABLE`, etc.
+            if b == b'(' {
+                frames.push(Frame { subs: Vec::new(), op: None, is_thresh: false });
+                i += 1;
+            } else if b == b'#' && bytes.get(i + 1) == Some(&b'{') {
+                frames.push(Frame { subs: Vec::new(), op: None, is_thresh: true });
+                i += 2;
+            } else {
+                let end = scan_atom(bytes, i).ok_or_else(|| Error::Parse(malformed_math()))?;
+                let atom = &s[i..end];
+                let tree = expression::Tree::from_str(atom)?;
+                let policy: Policy<Pk> = expression::FromTree::from_tree(tree.root())?;
+                cur = Some(Arc::new(policy));
+                i = end;
+            }
+        } else if bytes[i..].starts_with(AND_SEP) || bytes[i..].starts_with(OR_SEP) {
+            let new_op = if bytes[i..].starts_with(AND_SEP) {
+                Op::And
+            } else {
+                Op::Or
+            };
+            let frame = frames
+                .last_mut()
+                .ok_or_else(|| Error::Parse(malformed_math()))?;
+            if frame.is_thresh {
+                return Err(Error::Parse(malformed_math()));
+            }
+            match frame.op {
+                None => frame.op = Some(new_op),
+                Some(existing) if existing == new_op => {}
+                Some(_) => return Err(Error::Parse(malformed_math())),
+            }
+            frame.subs.push(cur.take().unwrap());
+            i += AND_SEP.len();
+        } else if b == b',' && bytes.get(i + 1) == Some(&b' ') {
+            let frame = frames
+                .last_mut()
+                .ok_or_else(|| Error::Parse(malformed_math()))?;
+            if !frame.is_thresh {
+                return Err(Error::Parse(malformed_math()));
+            }
+            frame.subs.push(cur.take().unwrap());
+            i += 2;
+        } else if b == b')' {
+            let mut frame = frames.pop().ok_or_else(|| Error::Parse(malformed_math()))?;
+            if frame.is_thresh {
+                return Err(Error::Parse(malformed_math()));
+            }
+            frame.subs.push(cur.take().unwrap());
+            if frame.subs.len() < 2 {
+                return Err(Error::Parse(malformed_math()));
+            }
+            let op = frame.op.ok_or_else(|| Error::Parse(malformed_math()))?;
+            let k = match op {
+                Op::And => frame.subs.len(),
+                Op::Or => 1,
+            };
+            let thresh = Threshold::new(k, frame.subs).map_err(Error::Threshold)?;
+            cur = Some(Arc::new(Policy::Thresh(thresh)));
+            i += 1;
+        } else if b == b'}' {
+            let mut frame = frames.pop().ok_or_else(|| Error::Parse(malformed_math()))?;
+            if !frame.is_thresh {
+                return Err(Error::Parse(malformed_math()));
             }
-            _ => {}
+            frame.subs.push(cur.take().unwrap());
+            i += 1;
+            if bytes.get(i..i + 3) != Some(b" = ") {
+                return Err(Error::Parse(malformed_math()));
+            }
+            i += 3;
+            let k_start = i;
+            while i < n && bytes[i].is_ascii_digit() {
+                i += 1;
+            }
+            if i == k_start {
+                return Err(Error::Parse(malformed_math()));
+            }
+            let k = expression::parse_num(&s[k_start..i])
+                .map_err(|_| Error::Parse(malformed_math()))? as usize;
+            let thresh = Threshold::new(k, frame.subs).map_err(Error::Threshold)?;
+            // In semantic policies we reserve `#{..} = k` for k strictly between
+            // 1 and n; k=1 must be spelled `∨` and k=n must be spelled `∧`.
+            if thresh.is_or() {
+                return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalOr));
+            }
+            if thresh.is_and() {
+                return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalAnd));
+            }
+            cur = Some(Arc::new(Policy::Thresh(thresh)));
+        } else {
+            return Err(Error::Parse(malformed_math()));
         }
     }
-    Err(malformed_math())
-}
 
-fn has_top_level(s: &str, sep: &str) -> bool {
-    let bytes = s.as_bytes();
-    let sep_bytes = sep.as_bytes();
-    let mut depth: i32 = 0;
-    let mut i = 0;
-    while i < bytes.len() {
-        if depth == 0 && bytes[i..].starts_with(sep_bytes) {
-            return true;
-        }
-        match bytes[i] {
-            b'(' | b'{' => depth += 1,
-            b')' | b'}' => depth -= 1,
-            _ => {}
-        }
-        i += 1;
+    if !frames.is_empty() {
+        return Err(Error::Parse(malformed_math()));
     }
-    false
+    let root = cur.ok_or_else(|| Error::Parse(malformed_math()))?;
+    // `cur` is the sole outstanding reference to `root`, so `try_unwrap` always succeeds.
+    Ok(Arc::try_unwrap(root).unwrap_or_else(|arc| (*arc).clone()))
 }
 
-fn split_top_level<'a>(s: &'a str, sep: &str) -> Vec<&'a str> {
-    let bytes = s.as_bytes();
-    let sep_bytes = sep.as_bytes();
-    let mut depth: i32 = 0;
-    let mut start = 0;
-    let mut i = 0;
-    let mut out = Vec::new();
-    while i < bytes.len() {
-        if depth == 0 && bytes[i..].starts_with(sep_bytes) {
-            out.push(&s[start..i]);
-            i += sep_bytes.len();
-            start = i;
-            continue;
+/// Finds the end of a single atom starting at `start`.
+///
+/// An atom is a name (any run of non-delimiter bytes) optionally followed
+/// by a balanced `(...)` argument list. Returns `None` if the atom is
+/// empty or its parentheses are unbalanced.
+fn scan_atom(bytes: &[u8], start: usize) -> Option<usize> {
+    fn is_delim(b: u8) -> bool { matches!(b, b' ' | b',' | b'(' | b')' | b'{' | b'}' | b'#') }
+
+    let mut i = start;
+    while i < bytes.len() && !is_delim(bytes[i]) {
+        i += 1;
+    }
+    if i == start {
+        return None;
+    }
+    if i < bytes.len() && bytes[i] == b'(' {
+        let mut depth: u32 = 1;
+        i += 1;
+        while i < bytes.len() && depth > 0 {
+            match bytes[i] {
+                b'(' => depth += 1,
+                b')' => depth -= 1,
+                _ => {}
+            }
+            i += 1;
         }
-        match bytes[i] {
-            b'(' | b'{' => depth += 1,
-            b')' | b'}' => depth -= 1,
-            _ => {}
+        if depth != 0 {
+            return None;
         }
-        i += 1;
     }
-    out.push(&s[start..]);
-    out
+    Some(i)
 }
 
 fn malformed_math() -> crate::ParseError {
@@ -931,6 +969,39 @@ mod tests {
         .is_ok());
     }
 
+    #[test]
+    fn parse_math_notation() {
+        // Nested groups round-trip through the direct parser.
+        let a = StringPolicy::from_str("((pk(A) ∧ pk(B)) ∨ pk(C))").unwrap();
+        let b = StringPolicy::from_str("or(and(pk(A),pk(B)),pk(C))").unwrap();
+        assert_eq!(a, b);
+
+        // Thresh with explicit k strictly between 1 and n.
+        let a = StringPolicy::from_str("#{pk(A), pk(B), pk(C), pk(D)} = 3").unwrap();
+        let b = StringPolicy::from_str("thresh(3,pk(A),pk(B),pk(C),pk(D))").unwrap();
+        assert_eq!(a, b);
+
+        // Thresh nested inside a math group.
+        let a = StringPolicy::from_str("(pk(A) ∧ #{pk(B), pk(C), pk(D)} = 2)").unwrap();
+        let b = StringPolicy::from_str("and(pk(A),thresh(2,pk(B),pk(C),pk(D)))").unwrap();
+        assert_eq!(a, b);
+
+        // Top-level terminals still parse.
+        assert_eq!(StringPolicy::from_str("UNSATISFIABLE").unwrap(), Policy::Unsatisfiable);
+        assert_eq!(StringPolicy::from_str("TRIVIAL").unwrap(), Policy::Trivial);
+
+        // Mixing ∧ and ∨ in the same group is not valid (Display never emits it).
+        assert!(StringPolicy::from_str("(pk(A) ∧ pk(B) ∨ pk(C))").is_err());
+        // Unbalanced / unterminated math groups.
+        assert!(StringPolicy::from_str("(pk(A) ∧ pk(B)").is_err());
+        assert!(StringPolicy::from_str("#{pk(A), pk(B)} = ").is_err());
+        // k=1 and k=n spelled as thresh must be rejected in semantic policies.
+        assert!(StringPolicy::from_str("#{pk(A), pk(B)} = 1").is_err());
+        assert!(StringPolicy::from_str("#{pk(A), pk(B)} = 2").is_err());
+        // Trailing garbage after a valid expression.
+        assert!(StringPolicy::from_str("pk(A)xyz").is_err());
+    }
+
     #[test]
     fn semantic_analysis() {
         let policy = StringPolicy::from_str("pk()").unwrap();