Skip to content

Commit b8c61cc

Browse files
authored
Merge pull request #3 from f1monkey/feature/improve-scoring
Feature/improve scoring
2 parents c89033d + 002513e commit b8c61cc

13 files changed

Lines changed: 224 additions & 108 deletions

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ module github.com/f1monkey/spellchecker-web
33
go 1.24
44

55
require (
6-
github.com/f1monkey/spellchecker v1.2.0
6+
github.com/agext/levenshtein v1.2.3
7+
github.com/f1monkey/spellchecker/v2 v2.0.1
78
github.com/go-chi/chi/v5 v5.2.2
89
github.com/stretchr/testify v1.8.4
910
github.com/swaggest/openapi-go v0.2.59
@@ -13,7 +14,6 @@ require (
1314
)
1415

1516
require (
16-
github.com/agnivade/levenshtein v1.2.1 // indirect
1717
github.com/davecgh/go-spew v1.1.1 // indirect
1818
github.com/f1monkey/bitmap v1.4.0 // indirect
1919
github.com/pmezard/go-difflib v1.0.0 // indirect

go.sum

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
2-
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
1+
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
2+
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
33
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
44
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
5-
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
6-
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
75
github.com/bool64/dev v0.2.25/go.mod h1:iJbh1y/HkunEPhgebWRNcs8wfGq7sjvJ6W5iabL8ACg=
86
github.com/bool64/dev v0.2.40 h1:LUSD+Aq+WB3KwVntqXstevJ0wB12ig1bEgoG8ZafsZU=
97
github.com/bool64/dev v0.2.40/go.mod h1:iJbh1y/HkunEPhgebWRNcs8wfGq7sjvJ6W5iabL8ACg=
@@ -12,12 +10,10 @@ github.com/bool64/shared v0.1.5/go.mod h1:081yz68YC9jeFB3+Bbmno2RFWvGKv1lPKkMP6M
1210
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1311
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
1412
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
15-
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
16-
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
1713
github.com/f1monkey/bitmap v1.4.0 h1:Is1PqZWrTawUowD/qE7Vnlh9fzXrEs/qxJHDQ47jZ3g=
1814
github.com/f1monkey/bitmap v1.4.0/go.mod h1:qOc9q5FQxdvMyjVDnmvfJxUtz8JIryqOGxpg4Vtg4nY=
19-
github.com/f1monkey/spellchecker v1.2.0 h1:kGQyLp8ZpVikKVSPkVG/Xpc5FRWXCqD0PUZ9ZMhPy8I=
20-
github.com/f1monkey/spellchecker v1.2.0/go.mod h1:uryb3bLmUmHcPeHIze8Joq4Dq2/ApYfeWn6SQ28URKI=
15+
github.com/f1monkey/spellchecker/v2 v2.0.1 h1:bUJmlmgn/75koynPY4yIRXHC27kHDUb83fZkQpauho4=
16+
github.com/f1monkey/spellchecker/v2 v2.0.1/go.mod h1:fuw+e0Ibat071gwjzQBjVSrQ3+5uq3v2Ovqy5engZ+k=
2117
github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
2218
github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
2319
github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc=

internal/routes/dictionary_create.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import (
44
"context"
55
"errors"
66

7-
f1mspellchecker "github.com/f1monkey/spellchecker"
87
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
8+
f1mspellchecker "github.com/f1monkey/spellchecker/v2"
99
"github.com/swaggest/usecase"
1010
"github.com/swaggest/usecase/status"
1111
)
@@ -17,16 +17,15 @@ type registryAdder interface {
1717
type DictionaryCreateRequest struct {
1818
Code string `path:"code" minLength:"1"`
1919

20-
Alphabet string `json:"alphabet" minLength:"1"`
21-
MaxErrors uint `json:"maxErrors" minimum:"0" maximum:"5"`
20+
Alphabet string `json:"alphabet" minLength:"1"`
2221
}
2322

2423
func dictionaryCreate(registry registryAdder) usecase.Interactor {
2524
u := usecase.NewInteractor(func(ctx context.Context, input DictionaryCreateRequest, output *Empty) error {
2625
_, err := registry.Add(input.Code, spellchecker.Options{
27-
Alphabet: input.Alphabet,
28-
MaxErrors: input.MaxErrors,
26+
Alphabet: input.Alphabet,
2927
})
28+
3029
if errors.Is(spellchecker.ErrAlreadyExists, err) {
3130
return status.Wrap(err, status.AlreadyExists)
3231
} else if err != nil {

internal/routes/dictionary_create_test.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import (
55
"errors"
66
"testing"
77

8-
f1mspellchecker "github.com/f1monkey/spellchecker"
98
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
9+
f1mspellchecker "github.com/f1monkey/spellchecker/v2"
1010
"github.com/stretchr/testify/require"
1111
"github.com/swaggest/usecase/status"
1212
)
@@ -37,7 +37,6 @@ func Test_DictionaryCreate(t *testing.T) {
3737
input: DictionaryCreateRequest{
3838
Code: "en",
3939
Alphabet: "abcdefghijklmnopqrstuvwxyz",
40-
MaxErrors: 2,
4140
},
4241
wantErr: false,
4342
wantCode: status.OK,
@@ -50,7 +49,6 @@ func Test_DictionaryCreate(t *testing.T) {
5049
input: DictionaryCreateRequest{
5150
Code: "en",
5251
Alphabet: "abcdefghijklmnopqrstuvwxyz",
53-
MaxErrors: 2,
5452
},
5553
wantErr: true,
5654
wantCode: status.AlreadyExists,
@@ -63,7 +61,6 @@ func Test_DictionaryCreate(t *testing.T) {
6361
input: DictionaryCreateRequest{
6462
Code: "fr",
6563
Alphabet: "abcdefghijklmnopqrstuvwxyz",
66-
MaxErrors: 2,
6764
},
6865
wantErr: true,
6966
wantCode: status.Internal,

internal/routes/dictionary_fix.go

Lines changed: 108 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@ package routes
22

33
import (
44
"context"
5+
"encoding/json"
56
"errors"
7+
"fmt"
68
"regexp"
9+
"strconv"
10+
"strings"
711
"unicode/utf8"
812

9-
f1mspellchecker "github.com/f1monkey/spellchecker"
1013
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
14+
f1mspellchecker "github.com/f1monkey/spellchecker/v2"
1115
"github.com/swaggest/usecase"
1216
"github.com/swaggest/usecase/status"
1317
)
@@ -17,10 +21,12 @@ type dictionaryGetter interface {
1721
}
1822

1923
type DictionaryFixRequest struct {
20-
Code string `path:"code" minLength:"1"`
21-
22-
Text string `json:"text" description:"Phrase to be checked"`
23-
Limit int `json:"limit" default:"5" desciption:"Max suggestions per word"`
24+
Code string `path:"code" minLength:"1" description:"Dictionary code to use for spellchecking."`
25+
Text string `json:"text" description:"Input text to be checked and corrected."`
26+
Limit int `json:"limit" default:"5" description:"Maximum number of suggestions to return per word."`
27+
MaxErrors int `json:"maxErrors" default:"2" description:"Maximum number of bit-level differences allowed between the input word and a dictionary word. Examples: deletion=1 bit (proble→problem), insertion=1 bit (problemm→problem), substitution=2 bits (problam→problem), transposition=0 bits (problme→problem). Not recommended to set higher than 2, as it can impact performance."`
28+
Fuzziness FuzzinessValue `json:"fuzziness" description:"Maximum allowed Levenshtein edit distance. Allowed values: '0','1','2'... (fixed distance), 'AUTO' (auto by word length, default AUTO:3,6), 'AUTO:low,high' (custom range). See: https://www.elastic.co/docs/reference/elasticsearch/rest-apis/common-options#fuzziness"`
29+
SimilarityThreshold float64 `json:"similarityThreshold" minimum:"0" maximum:"1" description:"Required similarity ratio between input word and candidate suggestion (0.0–1.0). Example: 0.6 = candidate must be at least 60% similar to input."`
2430
}
2531

2632
type DictionaryFixResponse struct {
@@ -51,72 +57,122 @@ func dictionaryFix(registry dictionaryGetter, splitter *regexp.Regexp) usecase.I
5157
errorInvalidWord = "invalid_word"
5258
)
5359

54-
u := usecase.NewInteractor(func(ctx context.Context, input DictionaryFixRequest, output *DictionaryFixResponse) error {
55-
sc, err := registry.Get(input.Code)
56-
if errors.Is(spellchecker.ErrNotFound, err) {
57-
return status.Wrap(err, status.NotFound)
58-
} else if err != nil {
59-
return status.Wrap(err, status.Internal)
60-
}
61-
62-
if input.Text == "" {
63-
output.Fixes = make([]Fix, 0)
64-
return nil
65-
}
66-
67-
matches := splitter.FindAllStringIndex(input.Text, -1)
68-
fixes := make([]Fix, 0, len(matches))
69-
correct := make([]Correct, 0, len(matches))
60+
u := usecase.NewInteractor(
61+
func(ctx context.Context, input DictionaryFixRequest, output *DictionaryFixResponse) error {
62+
sc, err := registry.Get(input.Code)
63+
if errors.Is(spellchecker.ErrNotFound, err) {
64+
return status.Wrap(err, status.NotFound)
65+
} else if err != nil {
66+
return status.Wrap(err, status.Internal)
67+
}
7068

71-
for _, match := range matches {
72-
startByte, endByte := match[0], match[1]
73-
startRune := utf8.RuneCountInString(input.Text[:startByte])
74-
endRune := startRune + utf8.RuneCountInString(input.Text[startByte:endByte])
69+
fuzziness, err := input.Fuzziness.Parse()
70+
if err != nil {
71+
return status.Wrap(err, status.InvalidArgument)
72+
}
7573

76-
fix := Fix{
77-
Start: startRune,
78-
End: endRune,
74+
if input.Text == "" {
75+
output.Fixes = make([]Fix, 0)
76+
return nil
7977
}
8078

81-
word := input.Text[startByte:endByte]
79+
matches := splitter.FindAllStringIndex(input.Text, -1)
80+
fixes := make([]Fix, 0, len(matches))
81+
correct := make([]Correct, 0, len(matches))
8282

83-
suggestions := sc.SuggestScore(word, input.Limit)
83+
for _, match := range matches {
84+
startByte, endByte := match[0], match[1]
85+
startRune := utf8.RuneCountInString(input.Text[:startByte])
86+
endRune := startRune + utf8.RuneCountInString(input.Text[startByte:endByte])
8487

85-
if suggestions.ExactMatch {
86-
correct = append(correct, Correct{
88+
fix := Fix{
8789
Start: startRune,
8890
End: endRune,
89-
})
91+
}
9092

91-
continue
92-
}
93+
word := input.Text[startByte:endByte]
9394

94-
if len(suggestions.Suggestions) == 0 {
95-
fix.Error = errorUnknownWord
96-
} else {
97-
fix.Error = errorInvalidWord
98-
fix.Suggestions = make([]SpellcheckerSuggestion, 0, len(suggestions.Suggestions))
95+
suggestions := sc.Suggest(&f1mspellchecker.SearchOptions{
96+
MaxErrors: input.MaxErrors,
97+
FilterFunc: spellchecker.ScoringFunc(fuzziness, input.SimilarityThreshold),
98+
}, word, input.Limit)
9999

100-
for _, s := range suggestions.Suggestions {
101-
fix.Suggestions = append(fix.Suggestions, SpellcheckerSuggestion{
102-
Text: s.Value,
103-
Score: s.Score,
100+
if suggestions.ExactMatch {
101+
correct = append(correct, Correct{
102+
Start: startRune,
103+
End: endRune,
104104
})
105+
106+
continue
105107
}
106-
}
107108

108-
fixes = append(fixes, fix)
109-
}
109+
if len(suggestions.Suggestions) == 0 {
110+
fix.Error = errorUnknownWord
111+
} else {
112+
fix.Error = errorInvalidWord
113+
fix.Suggestions = make([]SpellcheckerSuggestion, 0, len(suggestions.Suggestions))
114+
115+
for _, s := range suggestions.Suggestions {
116+
fix.Suggestions = append(fix.Suggestions, SpellcheckerSuggestion{
117+
Text: s.Value,
118+
Score: s.Score,
119+
})
120+
}
121+
}
110122

111-
output.Fixes = fixes
112-
output.Correct = correct
123+
fixes = append(fixes, fix)
124+
}
125+
126+
output.Fixes = fixes
127+
output.Correct = correct
113128

114-
return nil
115-
})
129+
return nil
130+
},
131+
)
116132

117133
u.SetTitle("Fix text")
118-
u.SetDescription("Performs spellchecking on the given input text. Returns misspelled words along with suggested corrections, up to the specified limit per word.")
134+
u.SetDescription(
135+
"Performs spellchecking on the given input text. Returns misspelled words along with suggested corrections, up to the specified limit per word.",
136+
)
119137
u.SetExpectedErrors(status.Internal, status.NotFound)
120138

121139
return u
122140
}
141+
142+
type FuzzinessValue string
143+
144+
// Parse converts FuzzinessValue into spellchecker.Fuzziness.
145+
func (fv FuzzinessValue) Parse() (spellchecker.Fuzziness, error) {
146+
raw := strings.TrimSpace(strings.ToUpper(string(fv)))
147+
148+
switch {
149+
case raw == "", raw == "AUTO":
150+
return spellchecker.AutoFuzziness{Low: 3, High: 6}, nil
151+
case strings.HasPrefix(raw, "AUTO:"):
152+
parts := strings.Split(strings.TrimPrefix(raw, "AUTO:"), ",")
153+
if len(parts) != 2 {
154+
return nil, fmt.Errorf("invalid AUTO fuzziness format: %q", raw)
155+
}
156+
low, err1 := strconv.Atoi(parts[0])
157+
high, err2 := strconv.Atoi(parts[1])
158+
if err1 != nil || err2 != nil {
159+
return nil, fmt.Errorf("invalid AUTO fuzziness values: %q", raw)
160+
}
161+
return spellchecker.AutoFuzziness{Low: low, High: high}, nil
162+
default:
163+
if n, err := strconv.Atoi(raw); err == nil {
164+
return spellchecker.FixedFuzziness(n), nil
165+
}
166+
return nil, fmt.Errorf("unknown fuzziness value: %q", raw)
167+
}
168+
}
169+
170+
// UnmarshalJSON validates that fuzziness is passed as a JSON string.
171+
func (fv *FuzzinessValue) UnmarshalJSON(data []byte) error {
172+
var raw string
173+
if err := json.Unmarshal(data, &raw); err != nil {
174+
return fmt.Errorf("fuzziness must be a string: %w", err)
175+
}
176+
*fv = FuzzinessValue(raw)
177+
return nil
178+
}

internal/routes/dictionary_fix_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ import (
66
"regexp"
77
"testing"
88

9-
f1mspellchecker "github.com/f1monkey/spellchecker"
109
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
10+
f1mspellchecker "github.com/f1monkey/spellchecker/v2"
1111
"github.com/stretchr/testify/assert"
1212
"github.com/stretchr/testify/require"
1313
"github.com/swaggest/usecase/status"
@@ -34,7 +34,7 @@ func Test_DictionaryFix(t *testing.T) {
3434
sc, err := f1mspellchecker.New(f1mspellchecker.DefaultAlphabet)
3535
require.NoError(t, err)
3636

37-
sc.Add("hello")
37+
sc.Add(nil, "hello")
3838

3939
tests := []struct {
4040
name string

0 commit comments

Comments
 (0)