Skip to content

Commit 0cab112

Browse files
committed
MEDIUM: load acceptable words from embedded dictionary files
Replace the hardcoded acceptable words map with embedded text files using go:embed. This change introduces an init function that automatically parses categorized text-based dictionaries (e.g., cloud, security) at runtime. Moving away from a hardcoded Go map makes it significantly easier to maintain, extend, and categorize the allowed technical vocabulary for spell checking without cluttering the source code.
1 parent b401fdb commit 0cab112

File tree

17 files changed

+2137
-114
lines changed

17 files changed

+2137
-114
lines changed

aspell/aspell.go

Lines changed: 116 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -24,40 +24,18 @@ type RemoteFile struct {
2424
}
2525

2626
type Aspell struct {
27-
RemoteFile RemoteFile `yaml:"remote_file"`
28-
Mode mode `yaml:"mode"`
29-
HelpText string `yaml:"-"`
30-
IgnoreFiles []string `yaml:"ignore_files"`
31-
AllowedWords []string `yaml:"allowed"`
32-
MinLength int `yaml:"min_length"`
33-
NoIgnoreIdentifiers bool `yaml:"no_ignore_identifiers"`
27+
RemoteFile RemoteFile `yaml:"remote_file"`
28+
Mode mode `yaml:"mode"`
29+
HelpText string `yaml:"-"`
30+
IgnoreFiles []string `yaml:"ignore_files"`
31+
AllowedWords []string `yaml:"allowed"`
32+
MinLength int `yaml:"min_length"`
33+
NoIgnoreIdentifiers bool `yaml:"no_ignore_identifiers"`
3434
}
3535

3636
var (
37-
acceptableWordsGlobal = map[string]struct{}{
38-
"haproxy": {},
39-
"golang": {},
40-
"ascii": {},
41-
"api": {},
42-
"goreleaser": {},
43-
"github": {},
44-
"gitlab": {},
45-
"yaml": {},
46-
"env": {},
47-
"config": {},
48-
"workdir": {},
49-
"entrypoint": {},
50-
"sudo": {},
51-
"dockerfile": {},
52-
"ghcr": {},
53-
"sed": {},
54-
"stdin": {},
55-
"args": {},
56-
"arg": {},
57-
"dev": {},
58-
"vcs": {},
59-
}
60-
badWordsGlobal = map[string]struct{}{}
37+
acceptableWordsGlobal = map[string]struct{}{}
38+
badWordsGlobal = map[string]struct{}{}
6139
)
6240

6341
func (a Aspell) checkSingle(data string, allowedWords []string) error {
@@ -124,119 +102,144 @@ func (a Aspell) checkSingle(data string, allowedWords []string) error {
124102
}
125103

126104
func (a Aspell) Check(subjects []string, commitsFull []string, content []map[string]string, junitSuite junit.Interface, gitHashes map[string]struct{}) error {
105+
commitsFullData := a.prepareCommits(commitsFull, gitHashes)
106+
identifierWords := a.collectIdentifiers(content)
107+
108+
var response strings.Builder
109+
switch a.Mode {
110+
case modeDisabled:
111+
return nil
112+
case modeSubject:
113+
a.checkSubjects(subjects, junitSuite, &response)
114+
case modeCommit, modeAll:
115+
if a.Mode == modeAll {
116+
a.checkFiles(content, identifierWords, junitSuite, &response)
117+
}
118+
a.checkCommitMessages(commitsFullData, identifierWords, junitSuite, &response)
119+
}
120+
121+
if len(response.String()) > 0 {
122+
return fmt.Errorf("%s", response.String())
123+
}
124+
return nil
125+
}
126+
127+
func (Aspell) prepareCommits(commitsFull []string, gitHashes map[string]struct{}) []string {
127128
var commitsFullData []string
128129
for _, c := range commitsFull {
129130
commit := []string{}
130-
lines := strings.SplitSeq(c, "\n")
131-
for l := range lines {
131+
for l := range strings.SplitSeq(c, "\n") {
132132
c2 := strings.TrimSpace(l)
133-
if strings.HasPrefix(c2, "Signed-off-by:") ||
134-
strings.HasPrefix(c2, "Reviewed-by:") ||
135-
strings.HasPrefix(c2, "Tested-by:") ||
136-
strings.HasPrefix(c2, "Helped-by:") ||
137-
strings.HasPrefix(c2, "Reported-by:") ||
138-
strings.HasPrefix(c2, "Author:") ||
139-
strings.HasPrefix(c2, "Co-authored-by:") {
133+
if isSignatureLine(c2) {
140134
continue
141135
}
142-
143136
commit = append(commit, l)
144137
}
145138
commitsFullData = append(commitsFullData, strings.Join(commit, "\n"))
146139
}
147-
148-
// Remove known git commit hashes from body portions of commit messages
149-
// so they are not flagged by spell checking. Subject lines are preserved.
150140
if len(gitHashes) > 0 {
151141
for i, c := range commitsFullData {
152142
commitsFullData[i] = removeKnownHashesFromBody(c, gitHashes)
153143
}
154144
}
145+
return commitsFullData
146+
}
147+
148+
func isSignatureLine(line string) bool {
149+
prefixes := []string{
150+
"Signed-off-by:",
151+
"Reviewed-by:",
152+
"Tested-by:",
153+
"Helped-by:",
154+
"Reported-by:",
155+
"Author:",
156+
"Co-authored-by:",
157+
}
158+
for _, p := range prefixes {
159+
if strings.HasPrefix(line, p) {
160+
return true
161+
}
162+
}
163+
return false
164+
}
155165

156-
// Collect identifiers (function names, variable names, etc.) from diff
157-
// content so they can be ignored during spell checking.
166+
func (a Aspell) collectIdentifiers(content []map[string]string) []string {
167+
if a.NoIgnoreIdentifiers {
168+
return nil
169+
}
158170
var identifierWords []string
159-
if !a.NoIgnoreIdentifiers {
160-
seen := map[string]struct{}{}
161-
for _, file := range content {
162-
for name, v := range file {
163-
for _, word := range match.GetIdentifiersFromContent(name, v) {
164-
if _, ok := seen[word]; !ok {
165-
seen[word] = struct{}{}
166-
identifierWords = append(identifierWords, word)
167-
}
171+
seen := map[string]struct{}{}
172+
for _, file := range content {
173+
for name, v := range file {
174+
for _, word := range match.GetIdentifiersFromContent(name, v) {
175+
if _, ok := seen[word]; !ok {
176+
seen[word] = struct{}{}
177+
identifierWords = append(identifierWords, word)
168178
}
169179
}
170180
}
171-
if len(identifierWords) > 0 {
172-
log.Printf("collected %d identifiers from diff content for spell check filtering", len(identifierWords))
181+
}
182+
if len(identifierWords) > 0 {
183+
log.Printf("collected %d identifiers from diff content for spell check filtering", len(identifierWords))
184+
}
185+
return identifierWords
186+
}
187+
188+
func (a Aspell) checkSubjects(subjects []string, junitSuite junit.Interface, response *strings.Builder) {
189+
for _, subject := range subjects {
190+
if err := a.checkSingle(subject, []string{}); err != nil {
191+
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
192+
log.Println("commit message", err.Error())
193+
_, _ = fmt.Fprintf(response, "%s\n", err)
173194
}
174195
}
196+
}
175197

176-
var response strings.Builder
177-
switch a.Mode {
178-
case modeDisabled:
179-
return nil
180-
case modeSubject:
181-
for _, subject := range subjects {
182-
if err := a.checkSingle(subject, []string{}); err != nil {
183-
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
184-
log.Println("commit message", err.Error())
185-
response.WriteString(fmt.Sprintf("%s\n", err))
186-
}
198+
func (a Aspell) isIgnoredFile(name string) bool {
199+
for _, filter := range a.IgnoreFiles {
200+
if match.MatchFilter(name, filter) {
201+
return true
187202
}
188-
case modeCommit, modeAll:
189-
if a.Mode == modeAll {
190-
for _, file := range content {
191-
for name, v := range file {
192-
nextFile := false
193-
for _, filter := range a.IgnoreFiles {
194-
if match.MatchFilter(name, filter) {
195-
// log.Println("File", name, "in ignore list")
196-
nextFile = true
197-
continue
198-
}
199-
}
200-
if nextFile {
201-
continue
202-
}
203-
var imports []string
204-
if strings.HasSuffix(name, ".go") {
205-
imports = match.GetImportWordsFromGoFile(name)
206-
}
207-
imports = append(imports, identifierWords...)
208-
if err := a.checkSingle(v, imports); err != nil {
209-
junitSuite.AddMessageFailed(name, "aspell check failed", err.Error())
210-
log.Println(name, err.Error())
211-
response.WriteString(fmt.Sprintf("%s\n", err))
212-
}
213-
}
203+
}
204+
return false
205+
}
206+
207+
func (a Aspell) checkFiles(content []map[string]string, identifierWords []string, junitSuite junit.Interface, response *strings.Builder) {
208+
for _, file := range content {
209+
for name, v := range file {
210+
if a.isIgnoredFile(name) {
211+
continue
214212
}
215-
}
216-
// Check commit messages: subject without identifiers, body with identifiers
217-
for _, msg := range commitsFullData {
218-
parts := strings.SplitN(msg, "\n\n", 2)
219-
// Subject — no identifier filtering (same as hash behavior)
220-
if err := a.checkSingle(parts[0], []string{}); err != nil {
221-
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
222-
log.Println("commit message", err.Error())
223-
response.WriteString(fmt.Sprintf("%s\n", err))
213+
var imports []string
214+
if strings.HasSuffix(name, ".go") {
215+
imports = match.GetImportWordsFromGoFile(name)
224216
}
225-
// Body — identifier filtering allowed
226-
if len(parts) > 1 {
227-
if err := a.checkSingle(parts[1], identifierWords); err != nil {
228-
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
229-
log.Println("commit message body", err.Error())
230-
response.WriteString(fmt.Sprintf("%s\n", err))
231-
}
217+
imports = append(imports, identifierWords...)
218+
if err := a.checkSingle(v, imports); err != nil {
219+
junitSuite.AddMessageFailed(name, "aspell check failed", err.Error())
220+
log.Println(name, err.Error())
221+
_, _ = fmt.Fprintf(response, "%s\n", err)
232222
}
233223
}
234224
}
225+
}
235226

236-
if len(response.String()) > 0 {
237-
return fmt.Errorf("%s", response.String())
227+
func (a Aspell) checkCommitMessages(commitsFullData []string, identifierWords []string, junitSuite junit.Interface, response *strings.Builder) {
228+
for _, msg := range commitsFullData {
229+
parts := strings.SplitN(msg, "\n\n", 2)
230+
if err := a.checkSingle(parts[0], []string{}); err != nil {
231+
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
232+
log.Println("commit message", err.Error())
233+
_, _ = fmt.Fprintf(response, "%s\n", err)
234+
}
235+
if len(parts) > 1 {
236+
if err := a.checkSingle(parts[1], identifierWords); err != nil {
237+
junitSuite.AddMessageFailed("commit message", "aspell check failed", err.Error())
238+
log.Println("commit message body", err.Error())
239+
_, _ = fmt.Fprintf(response, "%s\n", err)
240+
}
241+
}
238242
}
239-
return nil
240243
}
241244

242245
var hexStringRe = regexp.MustCompile(`[0-9a-fA-F]{7,40}`)

aspell/dictionaries.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package aspell
2+
3+
import (
4+
"embed"
5+
"io/fs"
6+
"strings"
7+
)
8+
9+
//go:embed dictionaries/*.txt
10+
var embeddedDictionaries embed.FS
11+
12+
func init() {
13+
entries, err := fs.ReadDir(embeddedDictionaries, "dictionaries")
14+
if err != nil {
15+
return
16+
}
17+
for _, entry := range entries {
18+
if entry.IsDir() {
19+
continue
20+
}
21+
data, err := embeddedDictionaries.ReadFile("dictionaries/" + entry.Name())
22+
if err != nil {
23+
continue
24+
}
25+
for line := range strings.SplitSeq(string(data), "\n") {
26+
word := strings.TrimSpace(line)
27+
if word == "" || strings.HasPrefix(word, "#") {
28+
continue
29+
}
30+
acceptableWordsGlobal[strings.ToLower(word)] = struct{}{}
31+
}
32+
}
33+
}

0 commit comments

Comments
 (0)