Skip to content

Commit bd74fac

Browse files
kalbasitclaude
andcommitted
fix: use word-boundary-aware regex for acronym replacement
Replace strings.ReplaceAll with regexp that matches acronyms only when preceded by a lowercase letter and followed by a capital letter or end of string. This prevents corrupting words like "Identifier" (which would become "IDentifier" with simple ReplaceAll). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6b51e0d commit bd74fac

1 file changed

Lines changed: 39 additions & 28 deletions

File tree

generator/helpers.go

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"log"
66
"os"
77
"path/filepath"
8+
"regexp"
89
"strings"
910
"unicode"
1011

@@ -52,41 +53,51 @@ func extractBulkFor(comment string) string {
5253

5354
func toSingular(s string) string { return inflection.Singular(s) }
5455

55-
// fixAcronyms corrects common Go acronym casing issues.
56+
// fixAcronyms corrects common Go acronym casing issues using word-boundary-aware
57+
// regex replacements to avoid corrupting words that contain acronyms as substrings.
5658
// For example: Id -> ID, Api -> API, Sql -> SQL, Url -> URL.
5759
func fixAcronyms(content []byte) []byte {
58-
// Common Go acronyms that should be all caps.
59-
acronyms := []string{
60-
"Api", "API",
61-
"Id", "ID",
62-
"Sql", "SQL",
63-
"Url", "URL",
64-
"Html", "HTML",
65-
"Xml", "XML",
66-
"Json", "JSON",
67-
"Jwt", "JWT",
68-
"Cpu", "CPU",
69-
"Io", "IO",
70-
"Ip", "IP",
71-
"Tcp", "TCP",
72-
"Udp", "UDP",
73-
"Ssh", "SSH",
74-
"TLS", "TLS", // already correct
75-
"Acl", "ACL",
76-
"S3", "S3", // already correct
77-
"Ec2", "EC2",
78-
"Ebs", "EBS",
60+
// Common Go acronyms that should be all caps, with their correct form.
61+
acronymReplacements := []struct {
62+
pattern string
63+
replacement string
64+
}{
65+
{"Acl", "ACL"},
66+
{"Api", "API"},
67+
{"Cpu", "CPU"},
68+
{"Ec2", "EC2"},
69+
{"Ebs", "EBS"},
70+
{"Html", "HTML"},
71+
{"Id", "ID"},
72+
{"Io", "IO"},
73+
{"Ip", "IP"},
74+
{"Json", "JSON"},
75+
{"Jwt", "JWT"},
76+
{"S3", "S3"}, // already correct, included for completeness
77+
{"Sql", "SQL"},
78+
{"Ssh", "SSH"},
79+
{"Tcp", "TCP"},
80+
{"Tls", "TLS"},
81+
{"Udp", "UDP"},
82+
{"Url", "URL"},
83+
{"Xml", "XML"},
7984
}
8085

8186
result := string(content)
8287

83-
for i := 0; i < len(acronyms)-1; i += 2 {
84-
wrong := acronyms[i]
85-
right := acronyms[i+1]
86-
// Only replace if not already correct (avoid infinite loops).
87-
if wrong != right {
88-
result = strings.ReplaceAll(result, wrong, right)
88+
for _, r := range acronymReplacements {
89+
// Only process if the pattern differs from replacement (skip already-correct cases)
90+
if r.pattern == r.replacement {
91+
continue
8992
}
93+
94+
// Match acronym when preceded by a lowercase letter and followed by
95+
// a capital letter or end of string. This prevents replacing "Id" in
96+
// "Identifier" (where it should stay as "Id") but correctly handles
97+
// "userId" -> "userID" and "myId" -> "myID".
98+
regex := regexp.MustCompile(`([a-z])(` + r.pattern + `)([A-Z]|$)`)
99+
repl := "$1" + r.replacement + "$3"
100+
result = regex.ReplaceAllString(result, repl)
90101
}
91102

92103
return []byte(result)

0 commit comments

Comments
 (0)