Skip to content

Commit e6b2a16

Browse files
kalbasitclaude
andcommitted
fix: handle initialisms at start of words (Xml, Html)
Add patterns to fix acronyms when they appear at the start of a word: - `XmlParser` -> `XMLParser` - `HtmlDocument` -> `HTMLDocument` These are initialisms (XML, HTML) that should be all caps in Go exports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6f783f5 commit e6b2a16

2 files changed

Lines changed: 20 additions & 12 deletions

File tree

generator/helpers.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func toSingular(s string) string { return inflection.Singular(s) }
5555

5656
// FixAcronyms corrects common Go acronym casing issues using word-boundary-aware
5757
// regex replacements to avoid corrupting words that contain acronyms as substrings.
58-
// For example: Id -> ID, Api -> API, Sql -> SQL, Url -> URL.
58+
// For example: Id -> ID, Api -> API, Sql -> SQL, Url -> URL, Xml -> XML.
5959
func FixAcronyms(content []byte) []byte {
6060
// Common Go acronyms that should be all caps, with their correct form.
6161
acronyms := []struct {
@@ -86,19 +86,27 @@ func FixAcronyms(content []byte) []byte {
8686

8787
for _, a := range acronyms {
8888
// Pre-compile regexes once per acronym (not inside inner loop).
89-
// Use three patterns to handle acronyms in different positions:
90-
// 1. Mid: `([a-z])(Acronym)([A-Z])` - acronym in middle of camelCase.
91-
// 2. End: `([a-z])(Acronym)$` - acronym at end of identifier.
92-
// 3. NonLetter: `([a-z])(Acronym)([^A-Za-z])` - acronym followed by non-letter.
89+
// Use patterns to handle acronyms in different positions:
90+
// 1. Start: `^(Acronym)([A-Z])` - acronym at start followed by uppercase, e.g., Xml in XMLParser.
91+
// 2. AfterUpper: `([A-Z])(Acronym)([A-Z])` - acronym between uppercase, e.g., Html in UserHTMLDoc.
92+
// 3. Mid: `([a-z])(Acronym)([A-Z])` - acronym in middle of camelCase, e.g., Id in userIdMore.
93+
// 4. End: `([a-z])(Acronym)$` - acronym at end of identifier, e.g., Id in userId.
94+
// 5. NonLetter: `([a-z])(Acronym)([^A-Za-z])` - acronym followed by non-letter.
95+
regexStart := regexp.MustCompile(`^(` + a.pattern + `)([A-Z])`)
96+
regexAfterUpper := regexp.MustCompile(`([A-Z])(` + a.pattern + `)([A-Z])`)
9397
regexMid := regexp.MustCompile(`([a-z])(` + a.pattern + `)([A-Z])`)
9498
regexEnd := regexp.MustCompile(`([a-z])(` + a.pattern + `)$`)
9599
regexNonLetter := regexp.MustCompile(`([a-z])(` + a.pattern + `)([^A-Za-z])`)
96100

97-
// For middle case: preserve the following uppercase letter via ${3}.
101+
// Start case: replace with replacement followed by ${2} (the uppercase after).
102+
result = regexStart.ReplaceAllString(result, a.replacement+"${2}")
103+
// After uppercase case: preserve surrounding uppercase via ${1} and ${3}.
104+
result = regexAfterUpper.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
105+
// Middle case: preserve the following uppercase letter via ${3}.
98106
result = regexMid.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
99-
// For non-letter case: preserve the following character via ${3}.
107+
// Non-letter case: preserve the following character via ${3}.
100108
result = regexNonLetter.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
101-
// For end case: no ${3} since there's no following letter.
109+
// End case: no ${3} since there's no following letter.
102110
result = regexEnd.ReplaceAllString(result, "${1}"+a.replacement)
103111
}
104112

generator/helpers_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ func TestFixAcronyms(t *testing.T) {
5050
expected: "Curling",
5151
},
5252
{
53-
name: "XmlParser should not be corrupted",
53+
name: "XmlParser should become XMLParser",
5454
input: "XmlParser",
55-
expected: "XmlParser",
55+
expected: "XMLParser",
5656
},
5757
{
58-
name: "HtmlDocument should not be corrupted",
58+
name: "HtmlDocument should become HTMLDocument",
5959
input: "HtmlDocument",
60-
expected: "HtmlDocument",
60+
expected: "HTMLDocument",
6161
},
6262
{
6363
name: "Multiple acronyms in one string",

0 commit comments

Comments
 (0)