Skip to content

Commit bab96b7

Browse files
committed
Allow to set custom regex to split phrases into words
1 parent c5f649b commit bab96b7

8 files changed

Lines changed: 109 additions & 64 deletions

File tree

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66

77
Before running the service, set the following environment variables:
88

9-
|Variable | Description | Example |
10-
|----------------|------------- |---------|
11-
|SPELLCHECKER_DIR| Directory to store dictionaries | /tmp/spellchecker |
12-
|SPELLCHECKER_AUTOSAVE_INTERVAL| Auto-save interval (Go time.Duration) | 5m |
13-
|HTTP_ADDR| HTTP server address and port | localhost:8011 |
14-
|LOG_LEVEL| Logging level | info, debug |
9+
|Variable | Description | Example | Default value | Required |
10+
|----------------|------------- |---------|---------------|----------|
11+
|SPELLCHECKER_DIR| Directory to store dictionaries | /tmp/spellchecker | none | yes |
12+
|SPELLCHECKER_AUTOSAVE_INTERVAL| Auto-save interval (Go time.Duration) | 5m | none | no |
13+
|SPELLCHECKER_WORD_SPLIT_REGEXP| Regular expression used to split phrases by words | ['\pL]+ | ['\pL]+| no |
14+
|SPELLCHECKER_HTTP_ADDR| HTTP server address and port | localhost:8011 | localhost:8011 | no |
15+
|SPELLCHECKER_LOG_LEVEL| Logging level | error | info | no |
1516

1617
## Swagger Docs
1718

cmd/web/main.go

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net/http"
88
"os"
99
"os/signal"
10+
"regexp"
1011
"syscall"
1112
"time"
1213

@@ -33,7 +34,7 @@ func main() {
3334

3435
ctx = logger.WithContext(
3536
ctx,
36-
logger.New(GitCommit, os.Getenv("LOG_LEVEL")),
37+
logger.New(GitCommit, os.Getenv("SPELLCHECKER_LOG_LEVEL")),
3738
)
3839

3940
registry, err := initRegistry(ctx)
@@ -42,12 +43,20 @@ func main() {
4243
os.Exit(1)
4344
}
4445

46+
splitter, err := initWordSpliter()
47+
if err != nil {
48+
logger.FromContext(ctx).Error("init spellchecker error", "error", err)
49+
os.Exit(1)
50+
}
51+
52+
fmt.Println(splitter)
53+
4554
defer registry.SaveAll(ctx)
4655

47-
server := server.NewServer(ctx, registry)
56+
server := server.NewServer(ctx, registry, splitter)
4857

4958
addr := defaultServerAddr
50-
if a := os.Getenv("HTTP_ADDR"); a != "" {
59+
if a := os.Getenv("SPELLCHECKER_HTTP_ADDR"); a != "" {
5160
addr = a
5261
}
5362

@@ -116,3 +125,19 @@ func initRegistry(ctx context.Context) (*spellchecker.Registry, error) {
116125

117126
return result, nil
118127
}
128+
129+
var defaultRegexp = regexp.MustCompile(`['\pL]+`)
130+
131+
func initWordSpliter() (*regexp.Regexp, error) {
132+
value := os.Getenv("SPELLCHECKER_WORD_SPLIT_REGEXP")
133+
if value == "" {
134+
return defaultRegexp, nil
135+
}
136+
137+
result, err := regexp.Compile(value)
138+
if err != nil {
139+
return nil, fmt.Errorf("invalid SPELLCHECKER_WORD_SPLIT_REGEXP: %w", err)
140+
}
141+
142+
return result, nil
143+
}

internal/routes/dictionary_fix.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ type SpellcheckerSuggestion struct {
3434
Score float64 `json:"score" description:"Confidence score of the suggestion."`
3535
}
3636

37-
var wordSymbols = regexp.MustCompile(`[-\pL]+`)
38-
39-
func dictionaryFix(registry *spellchecker.Registry) usecase.Interactor {
37+
func dictionaryFix(registry *spellchecker.Registry, splitter *regexp.Regexp) usecase.Interactor {
4038
const (
4139
errorUnknownWord = "unknown_word"
4240
errorInvalidWord = "invalid_word"
@@ -54,7 +52,7 @@ func dictionaryFix(registry *spellchecker.Registry) usecase.Interactor {
5452
return nil
5553
}
5654

57-
matches := wordSymbols.FindAllStringIndex(input.Text, -1)
55+
matches := splitter.FindAllStringIndex(input.Text, -1)
5856
fixes := make([]Fix, 0, len(matches))
5957

6058
for _, match := range matches {

internal/routes/dictionary_item.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package routes
33
import (
44
"context"
55
"errors"
6+
"regexp"
67

78
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
89
"github.com/swaggest/usecase"
@@ -24,7 +25,7 @@ type DictionaryItemAddResponse struct {
2425
Words int `json:"words" description:"Number of phrases successfully added."`
2526
}
2627

27-
func dictionaryItemAdd(registry *spellchecker.Registry) usecase.Interactor {
28+
func dictionaryItemAdd(registry *spellchecker.Registry, splitter *regexp.Regexp) usecase.Interactor {
2829
u := usecase.NewInteractor(func(ctx context.Context, input DictionaryItemAddRequest, output *DictionaryItemAddResponse) error {
2930
sc, err := registry.Get(input.Code)
3031
if errors.Is(spellchecker.ErrNotFound, err) {
@@ -37,7 +38,7 @@ func dictionaryItemAdd(registry *spellchecker.Registry) usecase.Interactor {
3738

3839
for i := range input.Phrases {
3940

40-
words := wordSymbols.FindAllString(input.Phrases[i].Text, -1)
41+
words := splitter.FindAllString(input.Phrases[i].Text, -1)
4142
if len(words) == 0 {
4243
continue
4344
}

internal/routes/routes.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package routes
22

33
import (
44
"net/http"
5+
"regexp"
56

67
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
78
"github.com/go-chi/chi/v5"
@@ -10,32 +11,32 @@ import (
1011

1112
type EmptyResponse struct{}
1213

13-
func Routes(registry *spellchecker.Registry) func(r chi.Router) {
14+
func Routes(registry *spellchecker.Registry, splitter *regexp.Regexp) func(r chi.Router) {
1415
return func(r chi.Router) {
15-
r.Route("/dictionaries", dictionaryRoutes(registry))
16+
r.Route("/dictionaries", dictionaryRoutes(registry, splitter))
1617
}
1718
}
1819

19-
func dictionaryRoutes(registry *spellchecker.Registry) func(r chi.Router) {
20+
func dictionaryRoutes(registry *spellchecker.Registry, splitter *regexp.Regexp) func(r chi.Router) {
2021
return func(r chi.Router) {
2122
r.Method(http.MethodPost, "/{code}", nethttp.NewHandler(
22-
dictionaryCreate(registry)),
23-
)
23+
dictionaryCreate(registry),
24+
))
2425

2526
r.Method(http.MethodDelete, "/{code}", nethttp.NewHandler(
26-
dictionaryDelete(registry)),
27-
)
27+
dictionaryDelete(registry),
28+
))
2829

2930
r.Method(http.MethodPost, "/{code}/save", nethttp.NewHandler(
30-
dictionarySave(registry)),
31-
)
31+
dictionarySave(registry),
32+
))
3233

3334
r.Method(http.MethodPost, "/{code}/add", nethttp.NewHandler(
34-
dictionaryItemAdd(registry)),
35-
)
35+
dictionaryItemAdd(registry, splitter),
36+
))
3637

3738
r.Method(http.MethodPost, "/{code}/fix", nethttp.NewHandler(
38-
dictionaryFix(registry)),
39-
)
39+
dictionaryFix(registry, splitter),
40+
))
4041
}
4142
}

internal/spellchecker/registry.go

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,33 +36,26 @@ func NewRegistry(ctx context.Context, dir string) (*Registry, error) {
3636
return nil, err
3737
}
3838

39-
items := make(map[string]RegistryItem)
39+
result := &Registry{
40+
dir: dir,
41+
items: make(map[string]RegistryItem),
42+
}
4043

4144
for _, f := range files {
42-
buf, err := os.ReadFile(path.Join(dir, f.Name()))
43-
if err != nil {
44-
logger.FromContext(ctx).Error("registry: read file err", "file", f, "error", err)
45-
continue
46-
}
47-
48-
var item RegistryItem
45+
code, _ := strings.CutSuffix(f.Name(), extension)
4946

50-
if err := json.Unmarshal(buf, &item); err != nil {
51-
logger.FromContext(ctx).Error("registry: unable to initalize registry item", "file", f, "error", err)
47+
item, err := result.doLoad(code)
48+
if err != nil {
49+
logger.FromContext(ctx).Error("registry: dictionary load error", "code", code, "error", err)
5250
continue
5351
}
5452

55-
code, _ := strings.CutSuffix(f.Name(), extension)
56-
5753
logger.FromContext(ctx).Info("registry: loaded dictionary", "dictionary", code)
5854

59-
items[code] = item
55+
result.items[code] = item
6056
}
6157

62-
return &Registry{
63-
dir: dir,
64-
items: items,
65-
}, nil
58+
return result, nil
6659
}
6760

6861
func (r *Registry) AutoSave(ctx context.Context, interval time.Duration) {
@@ -182,6 +175,21 @@ func (r *Registry) doSave(code string) error {
182175
return nil
183176
}
184177

178+
func (r *Registry) doLoad(code string) (RegistryItem, error) {
179+
buf, err := os.ReadFile(fullPath(r.dir, code))
180+
if err != nil {
181+
return RegistryItem{}, err
182+
}
183+
184+
var item RegistryItem
185+
186+
if err := json.Unmarshal(buf, &item); err != nil {
187+
return RegistryItem{}, err
188+
}
189+
190+
return item, nil
191+
}
192+
185193
func findDictionaries(dir string) ([]fs.DirEntry, error) {
186194
files, err := os.ReadDir(dir)
187195
if err != nil {

internal/spellchecker/registry_test.go

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,9 @@ import (
1414
func Test_NewRegistry(t *testing.T) {
1515
t.Parallel()
1616

17-
createTestFile := func(t *testing.T, dir string, name string) {
18-
t.Helper()
19-
20-
sc, err := spellchecker.New("abc")
21-
require.NoError(t, err)
22-
23-
item := RegistryItem{
24-
Spellchecker: sc,
25-
}
26-
27-
data, err := json.Marshal(&item)
28-
require.NoError(t, err)
29-
30-
err = os.WriteFile(path.Join(dir, fileName(name)), data, 0755)
31-
require.NoError(t, err)
32-
}
33-
3417
t.Run("no files", func(t *testing.T) {
18+
t.Parallel()
19+
3520
dir := t.TempDir()
3621

3722
result, err := NewRegistry(context.Background(), dir)
@@ -41,6 +26,8 @@ func Test_NewRegistry(t *testing.T) {
4126
})
4227

4328
t.Run("one file", func(t *testing.T) {
29+
t.Parallel()
30+
4431
dir := t.TempDir()
4532

4633
createTestFile(t, dir, "code")
@@ -52,6 +39,8 @@ func Test_NewRegistry(t *testing.T) {
5239
})
5340

5441
t.Run("two files, ok", func(t *testing.T) {
42+
t.Parallel()
43+
5544
dir := t.TempDir()
5645

5746
createTestFile(t, dir, "code1")
@@ -65,6 +54,8 @@ func Test_NewRegistry(t *testing.T) {
6554
})
6655

6756
t.Run("two files, one has invalid extension", func(t *testing.T) {
57+
t.Parallel()
58+
6859
dir := t.TempDir()
6960

7061
createTestFile(t, dir, "code1")
@@ -81,6 +72,8 @@ func Test_NewRegistry(t *testing.T) {
8172
})
8273

8374
t.Run("two files, one is corrupted", func(t *testing.T) {
75+
t.Parallel()
76+
8477
dir := t.TempDir()
8578

8679
createTestFile(t, dir, "code1")
@@ -261,3 +254,20 @@ func Test_Registry_Save(t *testing.T) {
261254
require.Contains(t, r2.items, code)
262255
})
263256
}
257+
258+
func createTestFile(t *testing.T, dir string, name string) {
259+
t.Helper()
260+
261+
sc, err := spellchecker.New("abc")
262+
require.NoError(t, err)
263+
264+
item := RegistryItem{
265+
Spellchecker: sc,
266+
}
267+
268+
data, err := json.Marshal(&item)
269+
require.NoError(t, err)
270+
271+
err = os.WriteFile(path.Join(dir, fileName(name)), data, 0755)
272+
require.NoError(t, err)
273+
}

server.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package web
22

33
import (
44
"context"
5+
"regexp"
56

67
"github.com/f1monkey/spellchecker-web/internal/routes"
78
"github.com/f1monkey/spellchecker-web/internal/spellchecker"
@@ -10,14 +11,14 @@ import (
1011
swgui "github.com/swaggest/swgui/v5emb"
1112
)
1213

13-
func NewServer(appCtx context.Context, registry *spellchecker.Registry) *web.Service {
14+
func NewServer(appCtx context.Context, registry *spellchecker.Registry, splitter *regexp.Regexp) *web.Service {
1415
s := web.NewService(openapi31.NewReflector())
1516

1617
s.OpenAPISchema().SetTitle("Spellchecker")
1718
s.OpenAPISchema().SetDescription("To fix words")
1819
s.OpenAPISchema().SetVersion("v1")
1920

20-
s.Route("/v1", routes.Routes(registry))
21+
s.Route("/v1", routes.Routes(registry, splitter))
2122

2223
// Swagger UI endpoint at /docs.
2324
s.Docs("/docs", swgui.New)

0 commit comments

Comments
 (0)