fix: handle initialisms at start of words (Xml, Html)

kalbasit · claude · kalbasit · commit ebe6f89d74d8 · 2026-03-22T20:50:41.000-07:00
Add patterns to fix acronyms when they appear at the start of a word:
- `XmlParser` -&gt; `XMLParser`
- `HtmlDocument` -&gt; `HTMLDocument`

These are initialisms (XML, HTML) that should be all caps in Go exports.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/generator/helpers.go b/generator/helpers.go
@@ -55,7 +55,7 @@ func toSingular(s string) string { return inflection.Singular(s) }
 
 // FixAcronyms corrects common Go acronym casing issues using word-boundary-aware
 // regex replacements to avoid corrupting words that contain acronyms as substrings.
-// For example: Id -> ID, Api -> API, Sql -> SQL, Url -> URL.
+// For example: Id -> ID, Api -> API, Sql -> SQL, Url -> URL, Xml -> XML.
 func FixAcronyms(content []byte) []byte {
 	// Common Go acronyms that should be all caps, with their correct form.
 	acronyms := []struct {
@@ -86,19 +86,27 @@ func FixAcronyms(content []byte) []byte {
 
 	for _, a := range acronyms {
 		// Pre-compile regexes once per acronym (not inside inner loop).
-		// Use three patterns to handle acronyms in different positions:
-		// 1. Mid: `([a-z])(Acronym)([A-Z])` - acronym in middle of camelCase.
-		// 2. End: `([a-z])(Acronym)$` - acronym at end of identifier.
-		// 3. NonLetter: `([a-z])(Acronym)([^A-Za-z])` - acronym followed by non-letter.
+		// Use patterns to handle acronyms in different positions:
+		// 1. Start: `^(Acronym)([A-Z])` - acronym at start followed by uppercase, e.g., Xml in XMLParser.
+		// 2. AfterUpper: `([A-Z])(Acronym)([A-Z])` - acronym between uppercase, e.g., Html in UserHTMLDoc.
+		// 3. Mid: `([a-z])(Acronym)([A-Z])` - acronym in middle of camelCase, e.g., Id in userIdMore.
+		// 4. End: `([a-z])(Acronym)$` - acronym at end of identifier, e.g., Id in userId.
+		// 5. NonLetter: `([a-z])(Acronym)([^A-Za-z])` - acronym followed by non-letter.
+		regexStart := regexp.MustCompile(`^(` + a.pattern + `)([A-Z])`)
+		regexAfterUpper := regexp.MustCompile(`([A-Z])(` + a.pattern + `)([A-Z])`)
 		regexMid := regexp.MustCompile(`([a-z])(` + a.pattern + `)([A-Z])`)
 		regexEnd := regexp.MustCompile(`([a-z])(` + a.pattern + `)$`)
 		regexNonLetter := regexp.MustCompile(`([a-z])(` + a.pattern + `)([^A-Za-z])`)
 
-		// For middle case: preserve the following uppercase letter via ${3}.
+		// Start case: replace with replacement followed by ${2} (the uppercase after).
+		result = regexStart.ReplaceAllString(result, a.replacement+"${2}")
+		// After uppercase case: preserve surrounding uppercase via ${1} and ${3}.
+		result = regexAfterUpper.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
+		// Middle case: preserve the following uppercase letter via ${3}.
 		result = regexMid.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
-		// For non-letter case: preserve the following character via ${3}.
+		// Non-letter case: preserve the following character via ${3}.
 		result = regexNonLetter.ReplaceAllString(result, "${1}"+a.replacement+"${3}")
-		// For end case: no ${3} since there's no following letter.
+		// End case: no ${3} since there's no following letter.
 		result = regexEnd.ReplaceAllString(result, "${1}"+a.replacement)
 	}
 
diff --git a/generator/helpers_test.go b/generator/helpers_test.go
@@ -50,14 +50,14 @@ func TestFixAcronyms(t *testing.T) {
 			expected: "Curling",
 		},
 		{
-			name:     "XmlParser should not be corrupted",
+			name:     "XmlParser should become XMLParser",
 			input:    "XmlParser",
-			expected: "XmlParser",
+			expected: "XMLParser",
 		},
 		{
-			name:     "HtmlDocument should not be corrupted",
+			name:     "HtmlDocument should become HTMLDocument",
 			input:    "HtmlDocument",
-			expected: "HtmlDocument",
+			expected: "HTMLDocument",
 		},
 		{
 			name:     "Multiple acronyms in one string",

Original file line number	Diff line number	Diff line change
`@@ -50,14 +50,14 @@ func TestFixAcronyms(t *testing.T) {`
`50`	`50`	`expected: "Curling",`
`51`	`51`	`},`
`52`	`52`	`{`
`53`		`- name: "XmlParser should not be corrupted",`
	`53`	`+ name: "XmlParser should become XMLParser",`
`54`	`54`	`input: "XmlParser",`
`55`		`- expected: "XmlParser",`
	`55`	`+ expected: "XMLParser",`
`56`	`56`	`},`
`57`	`57`	`{`
`58`		`- name: "HtmlDocument should not be corrupted",`
	`58`	`+ name: "HtmlDocument should become HTMLDocument",`
`59`	`59`	`input: "HtmlDocument",`
`60`		`- expected: "HtmlDocument",`
	`60`	`+ expected: "HTMLDocument",`
`61`	`61`	`},`
`62`	`62`	`{`
`63`	`63`	`name: "Multiple acronyms in one string",`