@@ -35,6 +35,7 @@ type Spelling struct {
3535 Dicpath string
3636 Threshold int
3737 exceptRe * regexp2.Regexp
38+ phraseRe * regexp2.Regexp
3839 gs * spell.Checker
3940 Custom bool
4041 Append bool
@@ -85,6 +86,12 @@ func addExceptions(s *Spelling, generic baseCheck, cfg *core.Config) error { //n
8586 ignoreCase + strings .Join (s .Exceptions , "|" ))
8687 }
8788
89+ // A multi-word term (e.g. `mea culpa`) is accepted only as a phrase; its
90+ // component words are still spell-checked on their own. We mask these in
91+ // `Run` via `phraseRe`, built from the same vocabulary as every other
92+ // Vocab-aware rule. See #1035.
93+ s .phraseRe = buildPhraseRe (nil , cfg .AcceptedTokens , true )
94+
8895 return nil
8996}
9097
@@ -173,16 +180,30 @@ func (s Spelling) Run(blk nlp.Block, _ *core.File, _ *core.Config) ([]core.Alert
173180 // See https://github.com/errata-ai/vale/v2/issues/148.
174181 txt = s .gs .Convert (txt )
175182
183+ // Mask any accepted multi-word phrases (e.g. `mea culpa`) so their
184+ // component words aren't spell-checked individually, while the same words
185+ // elsewhere still are. We replace each match with an equal-length run of
186+ // spaces, which preserves the byte offsets of every other word. See #1035.
187+ checkTxt := txt
188+ if s .phraseRe != nil {
189+ masked , err := s .phraseRe .ReplaceFunc (txt , func (m regexp2.Match ) string {
190+ return strings .Repeat (" " , len (m .String ()))
191+ }, - 1 , - 1 )
192+ if err == nil {
193+ checkTxt = masked
194+ }
195+ }
196+
176197OUTER:
177- for _ , word := range nlp .WordTokenizer .Tokenize (txt ) {
198+ for _ , word := range nlp .WordTokenizer .Tokenize (checkTxt ) {
178199 for _ , filter := range s .Filters {
179200 if filter .MatchString (word ) {
180201 continue OUTER
181202 }
182203 }
183204
184205 if ! s .gs .Spell (word ) && ! isMatch (s .exceptRe , word ) {
185- offset := strings .Index (txt , word )
206+ offset := strings .Index (checkTxt , word )
186207 loc := []int {offset , offset + len (word )}
187208
188209 a := core.Alert {Check : s .Name , Severity : s .Level , Span : loc ,
0 commit comments