Convert from working with comment char to String

Manuel Mendez · Manuel Mendez · commit d759ac6552b5 · 2025-03-31T16:44:00.000-04:00
Git since v2.45 has supported multi character comment prefixes, so we
need to convert from char to String.
diff --git a/src/commitmsgfmt.rs b/src/commitmsgfmt.rs
@@ -7,20 +7,20 @@ use unicode_segmentation::UnicodeSegmentation;
 pub struct CommitMsgFmt {
     /// Max width of the message body; not used for the subject line.
     width: usize,
-    /// The character that identifies a comment when used in column 0 of a line.
-    comment_char: char,
+    /// The string that identifies a comment when started in column 0 of a line.
+    comment_string: String,
 }
 
 impl CommitMsgFmt {
-    pub fn new(width: usize, comment_char: char) -> CommitMsgFmt {
+    pub fn new(width: usize, comment_string: &str) -> CommitMsgFmt {
         CommitMsgFmt {
             width,
-            comment_char,
+            comment_string: comment_string.into(),
         }
     }
 
     pub fn filter(&self, input: &str) -> String {
-        let msg = parse(input, self.comment_char);
+        let msg = parse(input, &self.comment_string);
         // The output size can be less than the input size only if the input contains characters
         // that will be trimmed, such as leading whitespace, which is improbable. It is more likely
         // the output size will exceed the input size due to injected linefeeds and continuation
@@ -76,7 +76,7 @@ impl CommitMsgFmt {
             None => self.width,
         };
         let mut cur_line_len = 0;
-        for word in WordIter::new(paragraph, self.comment_char) {
+        for word in WordIter::new(paragraph, &self.comment_string) {
             let word_len = word.graphemes(true).count();
 
             // Not a new line so we need to fiddle with whitespace.
@@ -105,7 +105,7 @@ mod tests {
     use pretty_assertions::assert_eq;
 
     fn filter(w: usize, s: &str) -> String {
-        CommitMsgFmt::new(w, '#').filter(s)
+        CommitMsgFmt::new(w, "#").filter(s)
     }
 
     #[test]
@@ -657,7 +657,7 @@ foo
     }
 
     #[test]
-    fn preserves_scissored_content_with_custom_comment_char() {
+    fn preserves_scissored_content_with_custom_comment_string() {
         let input = "
 foo
 
@@ -683,7 +683,7 @@ preserve
 
 content
 ";
-        let fmt = CommitMsgFmt::new(72, ';');
+        let fmt = CommitMsgFmt::new(72, ";");
         assert_eq!(fmt.filter(input), expected);
     }
 
diff --git a/src/main.rs b/src/main.rs
@@ -144,7 +144,7 @@ enum ConfigArgument<'a> {
 #[derive(Debug, Eq, PartialEq)]
 pub struct Config {
     width: usize,
-    comment_char: char,
+    comment_string: String,
 }
 
 impl Config {
@@ -167,7 +167,7 @@ impl Config {
 
         let cfg = Config {
             width: width as usize,
-            comment_char: parse_git_config_commentchar(git_config_commentchar()),
+            comment_string: parse_git_config_commentchar(git_config_commentchar()),
         };
 
         Ok(cfg)
@@ -181,16 +181,16 @@ fn git_config_commentchar() -> Result<Vec<u8>, io::Error> {
         .map(|o| o.stdout)
 }
 
-fn parse_git_config_commentchar(git_output: Result<Vec<u8>, io::Error>) -> char {
+fn parse_git_config_commentchar(git_output: Result<Vec<u8>, io::Error>) -> String {
     let output: Vec<u8> = git_output.unwrap_or_else(|_| "#".into());
 
     // The setting is either unset, "auto", or precisely 1 ASCII character;
     // Git won't commit with an invalid configuration value. "auto" support
     // can be added on-demand, it requires at least 2 passes.
     if output.is_empty() || output == b"auto" {
-        '#'
+        "#".into()
     } else {
-        output[0].into()
+        (output[0] as char).into()
     }
 }
 
@@ -202,7 +202,7 @@ fn main() -> ExitCode {
     }
     let cfg = cfg.unwrap();
 
-    let commitmsgfmt = commitmsgfmt::CommitMsgFmt::new(cfg.width, cfg.comment_char);
+    let commitmsgfmt = commitmsgfmt::CommitMsgFmt::new(cfg.width, &cfg.comment_string);
 
     let result = read_all_bytes_from_stdin()
         .and_then(to_utf8)
@@ -503,49 +503,49 @@ mod tests {
             vec!["binary"],
             Ok(Config {
                 width: 72,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "--width"],
             Ok(Config {
                 width: 72,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "--width", "10"],
             Ok(Config {
                 width: 10,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "--width=21"],
             Ok(Config {
                 width: 21,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "-w"],
             Ok(Config {
                 width: 72,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "-w37"],
             Ok(Config {
                 width: 37,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
             vec!["binary", "-w37", "-w42"],
             Ok(Config {
                 width: 42,
-                comment_char: '#',
+                comment_string: "#".into(),
             }),
         ));
         matrix.push((
@@ -620,18 +620,18 @@ mod tests {
     #[test]
     fn parses_git_config_commentchar() {
         let matrix = vec![
-            (Ok("".into()), '#'),
-            (Ok("auto".into()), '#'),
-            (Ok("#".into()), '#'),
-            (Ok("xy".into()), 'x'),
-            (Err(io::Error::from(io::ErrorKind::PermissionDenied)), '#'),
+            (Ok("".into()), "#"),
+            (Ok("auto".into()), "#"),
+            (Ok("#".into()), "#"),
+            (Ok("xy".into()), "x"),
+            (Err(io::Error::from(io::ErrorKind::PermissionDenied)), "#"),
         ];
         let (actual, expected): (Vec<_>, Vec<_>) = matrix
             .into_iter()
             .map(|(input, expected)| {
                 let x = format!("{:?}", &input);
                 let actual = parse_git_config_commentchar(input);
-                ((x.clone(), actual), (x, expected))
+                ((x.clone(), actual), (x, expected.into()))
             })
             .unzip();
         assert_eq!(expected, actual);
diff --git a/src/parser.rs b/src/parser.rs
@@ -46,7 +46,7 @@ impl CodeFence<'_> {
     }
 }
 
-pub fn parse(input: &str, comment_char: char) -> Vec<Token> {
+pub fn parse<'a>(input: &'a str, comment_string: &str) -> Vec<Token<'a>> {
     let mut toks = Vec::new();
 
     let mut has_subject = false;
@@ -65,7 +65,7 @@ pub fn parse(input: &str, comment_char: char) -> Vec<Token> {
         } else if let Some(fence) = line_as_code_fence(line) {
             toks.push(Token::FencedCodeBlock(line));
             in_code_fence = Some(fence);
-        } else if line.starts_with(comment_char) {
+        } else if line.starts_with(comment_string) {
             let t = if &line[1..] == " ------------------------ >8 ------------------------" {
                 has_scissors = true;
                 Token::Scissored(line)
@@ -419,7 +419,7 @@ mod tests {
     use pretty_assertions::assert_eq;
 
     fn parse(s: &str) -> Vec<Token> {
-        super::parse(s, '#')
+        super::parse(s, "#")
     }
 
     #[test]
@@ -503,13 +503,13 @@ mod tests {
 
     #[test]
     fn parses_default_comment() {
-        assert_eq!(super::parse("# foo", '#'), [Comment("# foo")]);
+        assert_eq!(super::parse("# foo", "#"), [Comment("# foo")]);
     }
 
     #[test]
     fn parses_custom_comment() {
-        assert_eq!(super::parse("@ foo", '@'), [Comment("@ foo")]);
-        assert_eq!(super::parse("# foo", '@'), [Subject("# foo")]);
+        assert_eq!(super::parse("@ foo", "@"), [Comment("@ foo")]);
+        assert_eq!(super::parse("# foo", "@"), [Subject("# foo")]);
     }
 
     #[test]
@@ -1713,7 +1713,7 @@ do
     }
 
     #[test]
-    fn parses_scissored_content_with_custom_comment_char() {
+    fn parses_scissored_content_with_custom_comment_string() {
         assert_eq!(
             super::parse(
                 "
@@ -1727,7 +1727,7 @@ $ ------------------------ >8 ------------------------
 do
  not
   format
-", '$'
+", "$"
             ),
             [
                 VerticalSpace,
diff --git a/src/worditer.rs b/src/worditer.rs
@@ -5,9 +5,9 @@ use std::borrow::Cow;
 /// definition and the exact details are unspecified. Rather, the iterator promises to produce
 /// results that wrap safely and sensibly.
 pub(crate) struct WordIter<'text> {
-    /// The commit message comment character, to avoid creating a word that starts with the comment
-    /// character lest that word degenerates into a comment.
-    comment_char: char,
+    /// The commit message comment string, to avoid creating a word that starts with the comment
+    /// string lest that word degenerates into a comment.
+    comment_string: String,
     /// The "next word", if present, may be considered the head of a list whose rest is
     /// [`Self::naive_words`] before the next invocation of [`Self::next()`]. It was the last word
     /// extracted from `naive_words` on the previous invocation of `next()` that was determined to
@@ -19,16 +19,16 @@ pub(crate) struct WordIter<'text> {
 }
 
 impl<'text> WordIter<'text> {
-    pub fn new(text: &'text str, comment_char: char) -> Self {
+    pub fn new(text: &'text str, comment_string: &'text str) -> Self {
         WordIter {
-            comment_char,
+            comment_string: comment_string.into(),
             next_word: None,
             naive_words: text.split(' '),
         }
     }
 
     fn is_non_breaking_word(&self, word: &str) -> bool {
-        word.starts_with(self.comment_char)
+        word.starts_with(&self.comment_string)
             || match WordIter::describe_word(word) {
                 WordJoinerState::FootnoteRefUnseen => true,
                 WordJoinerState::FootnoteRefOpen => false,
@@ -121,7 +121,7 @@ mod tests {
     type Item<'text> = <WordIter<'text> as Iterator>::Item;
 
     fn iter(text: &str) -> WordIter {
-        WordIter::new(text, '#')
+        WordIter::new(text, "#")
     }
 
     fn collect(it: WordIter) -> Vec<Item> {
@@ -132,9 +132,12 @@ mod tests {
         collect(iter(text))
     }
 
-    fn some_comment_char() -> char {
-        let some_comment_chars = ['#', ';', '!', '%'];
-        *some_comment_chars.choose(&mut rand::thread_rng()).unwrap()
+    fn some_comment_string() -> String {
+        let some_comment_strings = ["#", ";", "!", "%"].map(|c| c.to_string());
+        some_comment_strings
+            .choose(&mut rand::thread_rng())
+            .unwrap()
+            .clone()
     }
 
     #[test]
@@ -190,15 +193,15 @@ mod tests {
     }
 
     #[test]
-    fn merges_comment_char() {
-        let comment_char = some_comment_char();
+    fn merges_comment_string() {
+        let comment_string = some_comment_string();
 
-        let text = format!("a {}1b d", comment_char);
+        let text = format!("a {}1b d", comment_string);
         let res = {
-            let it = WordIter::new(&text, comment_char);
+            let it = WordIter::new(&text, &comment_string);
             collect(it)
         };
-        let expect = [&format!("a {}1b", comment_char), "d"];
+        let expect = [&format!("a {}1b", comment_string), "d"];
 
         assert_eq!(res, expect);
     }
@@ -212,29 +215,29 @@ mod tests {
     }
 
     #[test]
-    fn lone_comment_char_binds_left() {
+    fn lone_comment_string_binds_left() {
         // This is a limitation of the text analysis heuristic. The test case is
-        // a special-case of "merges_comment_char()" made explicit.
+        // a special-case of "merges_comment_string()" made explicit.
         //
-        // The first space after a comment character marks the end of a word,
-        // that comment character being the last part of the word.
+        // The first space after a comment string marks the end of a word,
+        // that comment string being the last part of the word.
         // This means that "a #1" and "a # 1" behave differently, producing
-        // 1 respectively 2 words, the comment character always in the same word
+        // 1 respectively 2 words, the comment string always in the same word
         // as the preceding token:
-        // - The comment character must join the preceding token to avoid being
+        // - The comment string must join the preceding token to avoid being
         //   pushed onto its own line and accidentally degrading into a comment.
-        // - The token after the comment character cannot join the preceding
+        // - The token after the comment string cannot join the preceding
         //   token because we have no way to determine that that token or any of
         //   the subsequent tokens should be individual words or parts of the
         //   first token -- this is the least surprising heuristic we can apply.
-        let comment_char = some_comment_char();
+        let comment_string = some_comment_string();
 
-        let text = format!("a {} b", comment_char);
+        let text = format!("a {} b", comment_string);
         let res = {
-            let it = WordIter::new(&text, comment_char);
+            let it = WordIter::new(&text, &comment_string);
             collect(it)
         };
-        let expect = [&format!("a {}", comment_char), "b"];
+        let expect = [&format!("a {}", comment_string), "b"];
 
         assert_eq!(res, expect);
     }