@@ -2,12 +2,16 @@ package routes
22
33import (
44 "context"
5+ "encoding/json"
56 "errors"
7+ "fmt"
68 "regexp"
9+ "strconv"
10+ "strings"
711 "unicode/utf8"
812
9- f1mspellchecker "github.com/f1monkey/spellchecker"
1013 "github.com/f1monkey/spellchecker-web/internal/spellchecker"
14+ f1mspellchecker "github.com/f1monkey/spellchecker/v2"
1115 "github.com/swaggest/usecase"
1216 "github.com/swaggest/usecase/status"
1317)
@@ -17,10 +21,12 @@ type dictionaryGetter interface {
1721}
1822
1923type DictionaryFixRequest struct {
20- Code string `path:"code" minLength:"1"`
21-
22- Text string `json:"text" description:"Phrase to be checked"`
23- Limit int `json:"limit" default:"5" desciption:"Max suggestions per word"`
24+ Code string `path:"code" minLength:"1" description:"Dictionary code to use for spellchecking."`
25+ Text string `json:"text" description:"Input text to be checked and corrected."`
26+ Limit int `json:"limit" default:"5" description:"Maximum number of suggestions to return per word."`
27+ MaxErrors int `json:"maxErrors" default:"2" description:"Maximum number of bit-level differences allowed between the input word and a dictionary word. Examples: deletion=1 bit (proble→problem), insertion=1 bit (problemm→problem), substitution=2 bits (problam→problem), transposition=0 bits (problme→problem). Not recommended to set higher than 2, as it can impact performance."`
28+ Fuzziness FuzzinessValue `json:"fuzziness" description:"Maximum allowed Levenshtein edit distance. Allowed values: '0','1','2'... (fixed distance), 'AUTO' (auto by word length, default AUTO:3,6), 'AUTO:low,high' (custom range). See: https://www.elastic.co/docs/reference/elasticsearch/rest-apis/common-options#fuzziness"`
29+ SimilarityThreshold float64 `json:"similarityThreshold" minimum:"0" maximum:"1" description:"Required similarity ratio between input word and candidate suggestion (0.0–1.0). Example: 0.6 = candidate must be at least 60% similar to input."`
2430}
2531
2632type DictionaryFixResponse struct {
@@ -51,72 +57,122 @@ func dictionaryFix(registry dictionaryGetter, splitter *regexp.Regexp) usecase.I
5157 errorInvalidWord = "invalid_word"
5258 )
5359
54- u := usecase .NewInteractor (func (ctx context.Context , input DictionaryFixRequest , output * DictionaryFixResponse ) error {
55- sc , err := registry .Get (input .Code )
56- if errors .Is (spellchecker .ErrNotFound , err ) {
57- return status .Wrap (err , status .NotFound )
58- } else if err != nil {
59- return status .Wrap (err , status .Internal )
60- }
61-
62- if input .Text == "" {
63- output .Fixes = make ([]Fix , 0 )
64- return nil
65- }
66-
67- matches := splitter .FindAllStringIndex (input .Text , - 1 )
68- fixes := make ([]Fix , 0 , len (matches ))
69- correct := make ([]Correct , 0 , len (matches ))
60+ u := usecase .NewInteractor (
61+ func (ctx context.Context , input DictionaryFixRequest , output * DictionaryFixResponse ) error {
62+ sc , err := registry .Get (input .Code )
63+ if errors .Is (spellchecker .ErrNotFound , err ) {
64+ return status .Wrap (err , status .NotFound )
65+ } else if err != nil {
66+ return status .Wrap (err , status .Internal )
67+ }
7068
71- for _ , match := range matches {
72- startByte , endByte := match [ 0 ], match [ 1 ]
73- startRune := utf8 . RuneCountInString ( input . Text [: startByte ] )
74- endRune := startRune + utf8 . RuneCountInString ( input . Text [ startByte : endByte ])
69+ fuzziness , err := input . Fuzziness . Parse ()
70+ if err != nil {
71+ return status . Wrap ( err , status . InvalidArgument )
72+ }
7573
76- fix := Fix {
77- Start : startRune ,
78- End : endRune ,
74+ if input . Text == "" {
75+ output . Fixes = make ([] Fix , 0 )
76+ return nil
7977 }
8078
81- word := input .Text [startByte :endByte ]
79+ matches := splitter .FindAllStringIndex (input .Text , - 1 )
80+ fixes := make ([]Fix , 0 , len (matches ))
81+ correct := make ([]Correct , 0 , len (matches ))
8282
83- suggestions := sc .SuggestScore (word , input .Limit )
83+ for _ , match := range matches {
84+ startByte , endByte := match [0 ], match [1 ]
85+ startRune := utf8 .RuneCountInString (input .Text [:startByte ])
86+ endRune := startRune + utf8 .RuneCountInString (input .Text [startByte :endByte ])
8487
85- if suggestions .ExactMatch {
86- correct = append (correct , Correct {
88+ fix := Fix {
8789 Start : startRune ,
8890 End : endRune ,
89- })
91+ }
9092
91- continue
92- }
93+ word := input .Text [startByte :endByte ]
9394
94- if len (suggestions .Suggestions ) == 0 {
95- fix .Error = errorUnknownWord
96- } else {
97- fix .Error = errorInvalidWord
98- fix .Suggestions = make ([]SpellcheckerSuggestion , 0 , len (suggestions .Suggestions ))
95+ suggestions := sc .Suggest (& f1mspellchecker.SearchOptions {
96+ MaxErrors : input .MaxErrors ,
97+ FilterFunc : spellchecker .ScoringFunc (fuzziness , input .SimilarityThreshold ),
98+ }, word , input .Limit )
9999
100- for _ , s := range suggestions .Suggestions {
101- fix . Suggestions = append (fix . Suggestions , SpellcheckerSuggestion {
102- Text : s . Value ,
103- Score : s . Score ,
100+ if suggestions .ExactMatch {
101+ correct = append (correct , Correct {
102+ Start : startRune ,
103+ End : endRune ,
104104 })
105+
106+ continue
105107 }
106- }
107108
108- fixes = append (fixes , fix )
109- }
109+ if len (suggestions .Suggestions ) == 0 {
110+ fix .Error = errorUnknownWord
111+ } else {
112+ fix .Error = errorInvalidWord
113+ fix .Suggestions = make ([]SpellcheckerSuggestion , 0 , len (suggestions .Suggestions ))
114+
115+ for _ , s := range suggestions .Suggestions {
116+ fix .Suggestions = append (fix .Suggestions , SpellcheckerSuggestion {
117+ Text : s .Value ,
118+ Score : s .Score ,
119+ })
120+ }
121+ }
110122
111- output .Fixes = fixes
112- output .Correct = correct
123+ fixes = append (fixes , fix )
124+ }
125+
126+ output .Fixes = fixes
127+ output .Correct = correct
113128
114- return nil
115- })
129+ return nil
130+ },
131+ )
116132
117133 u .SetTitle ("Fix text" )
118- u .SetDescription ("Performs spellchecking on the given input text. Returns misspelled words along with suggested corrections, up to the specified limit per word." )
134+ u .SetDescription (
135+ "Performs spellchecking on the given input text. Returns misspelled words along with suggested corrections, up to the specified limit per word." ,
136+ )
119137 u .SetExpectedErrors (status .Internal , status .NotFound )
120138
121139 return u
122140}
141+
142+ type FuzzinessValue string
143+
144+ // Parse converts FuzzinessValue into spellchecker.Fuzziness.
145+ func (fv FuzzinessValue ) Parse () (spellchecker.Fuzziness , error ) {
146+ raw := strings .TrimSpace (strings .ToUpper (string (fv )))
147+
148+ switch {
149+ case raw == "" , raw == "AUTO" :
150+ return spellchecker.AutoFuzziness {Low : 3 , High : 6 }, nil
151+ case strings .HasPrefix (raw , "AUTO:" ):
152+ parts := strings .Split (strings .TrimPrefix (raw , "AUTO:" ), "," )
153+ if len (parts ) != 2 {
154+ return nil , fmt .Errorf ("invalid AUTO fuzziness format: %q" , raw )
155+ }
156+ low , err1 := strconv .Atoi (parts [0 ])
157+ high , err2 := strconv .Atoi (parts [1 ])
158+ if err1 != nil || err2 != nil {
159+ return nil , fmt .Errorf ("invalid AUTO fuzziness values: %q" , raw )
160+ }
161+ return spellchecker.AutoFuzziness {Low : low , High : high }, nil
162+ default :
163+ if n , err := strconv .Atoi (raw ); err == nil {
164+ return spellchecker .FixedFuzziness (n ), nil
165+ }
166+ return nil , fmt .Errorf ("unknown fuzziness value: %q" , raw )
167+ }
168+ }
169+
170+ // UnmarshalJSON validates that fuzziness is passed as a JSON string.
171+ func (fv * FuzzinessValue ) UnmarshalJSON (data []byte ) error {
172+ var raw string
173+ if err := json .Unmarshal (data , & raw ); err != nil {
174+ return fmt .Errorf ("fuzziness must be a string: %w" , err )
175+ }
176+ * fv = FuzzinessValue (raw )
177+ return nil
178+ }
0 commit comments