diff --git a/benchmark_test.go b/benchmark_test.go index 9d0ee5c..6f3e8ab 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -2,6 +2,7 @@ package strings2 import ( "testing" + "unicode" ) func BenchmarkUpperCaseFirst_ASCII_Short(b *testing.B) { @@ -181,3 +182,19 @@ func BenchmarkSplitMixCase(b *testing.B) { _, _ = WordsToFormattedCase(words, OptionMixCaseSupport(), OptionDelimiter("-")) } } + +func BenchmarkPerformCaseFirst(b *testing.B) { + s := "test" + fn := unicode.ToUpper + for i := 0; i < b.N; i++ { + performCaseFirst(s, fn) + } +} + +func BenchmarkPerformCaseFirst_Long(b *testing.B) { + s := "teststringwithmorecharacters" + fn := unicode.ToUpper + for i := 0; i < b.N; i++ { + performCaseFirst(s, fn) + } +} diff --git a/parts_num_test.go b/parts_num_test.go deleted file mode 100644 index 81b9993..0000000 --- a/parts_num_test.go +++ /dev/null @@ -1,51 +0,0 @@ -package strings2 - -import ( - "reflect" - "testing" -) - -func TestNumberMode(t *testing.T) { - tests := []struct { - name string - input string - mode NumberMode - expected []string - }{ - // None - {"None_User123ID", "User123ID", NumberModeNone, []string{"User123ID"}}, - {"None_UPPER123", "UPPER123", NumberModeNone, []string{"UPPER123"}}, - {"None_123test", "123test", NumberModeNone, []string{"123test"}}, - - // SplitAlways - {"SplitAlways_User123ID", "User123ID", NumberModeSplitAlways, []string{"User", "123", "ID"}}, - {"SplitAlways_UPPER123", "UPPER123", NumberModeSplitAlways, []string{"UPPER", "123"}}, - {"SplitAlways_123test", "123test", NumberModeSplitAlways, []string{"123", "test"}}, - - // MergeWithWord - {"MergeWithWord_User123ID", "User123ID", NumberModeMergeWithWord, []string{"User123", "ID"}}, - {"MergeWithWord_UPPER123", "UPPER123", NumberModeMergeWithWord, []string{"UPPER123"}}, - {"MergeWithWord_123test", "123test", NumberModeMergeWithWord, []string{"123test"}}, - - // TreatAsLowercase - {"TreatAsLowercase_User123ID", "User123ID", NumberModeTreatAsLowercase, []string{"User123", "ID"}}, - {"TreatAsLowercase_UPPER123", "UPPER123", NumberModeTreatAsLowercase, []string{"UPPE", "R123"}}, - {"TreatAsLowercase_123test", "123test", NumberModeTreatAsLowercase, []string{"123test"}}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - words, err := Parse(tt.input, WithNumberMode(tt.mode)) - if err != nil { - t.Fatalf("Parse failed: %v", err) - } - var got []string - for _, w := range words { - got = append(got, w.String()) - } - if !reflect.DeepEqual(got, tt.expected) { - t.Errorf("Parse(%q) with mode %v = %v; want %v", tt.input, tt.mode, got, tt.expected) - } - }) - } -} diff --git a/perform_case_first_bench_test.go b/perform_case_first_bench_test.go deleted file mode 100644 index fdd7c83..0000000 --- a/perform_case_first_bench_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package strings2 - -import ( - "testing" - "unicode" -) - -func BenchmarkPerformCaseFirst(b *testing.B) { - s := "test" - fn := unicode.ToUpper - for i := 0; i < b.N; i++ { - performCaseFirst(s, fn) - } -} - -func BenchmarkPerformCaseFirst_Long(b *testing.B) { - s := "teststringwithmorecharacters" - fn := unicode.ToUpper - for i := 0; i < b.N; i++ { - performCaseFirst(s, fn) - } -} diff --git a/types.go b/types.go index 1766082..d537a5f 100644 --- a/types.go +++ b/types.go @@ -187,6 +187,26 @@ func upperCaseFirstLower(s string, mode UTF8Mode) (string, error) { func (w ExactCaseWord) String() string { return string(w) } +// WordLength returns the string length of the given Word type without allocating. +func WordLength(word Word) (int, error) { + switch w := word.(type) { + case SingleCaseWord: + return len(w), nil + case FirstUpperCaseWord: + return len(w), nil + case ExactCaseWord: + return len(w), nil + case AcronymWord: + return len(w), nil + case UpperCaseWord: + return len(w), nil + case SeparatorWord: + return len(w), nil + default: + return 0, fmt.Errorf("unknown word type: %T", word) + } +} + // Options type Option func(*caseConfig) @@ -318,8 +338,28 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) { cfg.firstUpper = true } - result := make([]string, 0, len(words)) + size := 0 for _, word := range words { + l, err := WordLength(word) + if err != nil { + return "", err + } + // heuristic: add 5 to allow for transformations like splitMixCase + size += l + 5 + } + delimiterLen := len(cfg.delimiter) + if len(words) > 1 { + size += delimiterLen * (len(words) - 1) + } + + var b strings.Builder + b.Grow(size) + + for i, word := range words { + if i > 0 { + b.WriteString(cfg.delimiter) + } + var w string switch word := word.(type) { case SingleCaseWord: @@ -393,10 +433,10 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) { w = word.String() } - result = append(result, w) + b.WriteString(w) } - final := strings.Join(result, cfg.delimiter) + final := b.String() if cfg.firstUpper { final = UpperCaseFirst(final) diff --git a/types_internal_test.go b/types_internal_test.go deleted file mode 100644 index a8e423d..0000000 --- a/types_internal_test.go +++ /dev/null @@ -1,197 +0,0 @@ -package strings2 - -import ( - "errors" - "testing" -) - -func TestUpperCaseFirstLower_Correctness(t *testing.T) { - tests := []struct { - name string - input string - expected string - }{ - { - name: "Empty String", - input: "", - expected: "", - }, - { - name: "ASCII Lower", - input: "test", - expected: "Test", - }, - { - name: "ASCII Mixed", - input: "tEsT", - expected: "Test", - }, - { - name: "ASCII Upper", - input: "TEST", - expected: "Test", - }, - { - name: "Already Correct", - input: "Test", - expected: "Test", - }, - { - name: "Unicode Lower", - input: "äpfel", - expected: "Äpfel", - }, - { - name: "Unicode Upper", - input: "ÄPFEL", - expected: "Äpfel", - }, - { - name: "Unicode Mixed", - input: "äPfEl", - expected: "Äpfel", - }, - { - name: "Special Char Start", - input: "!test", - expected: "!test", - }, - { - name: "Number Start", - input: "1test", - expected: "1test", - }, - { - name: "Invalid UTF-8", - input: "\xff\xfe\xfd", - expected: "\uFFFD\uFFFD\uFFFD", - }, - { - name: "Partial Invalid UTF-8", - input: "test\xff", - expected: "Test\uFFFD", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := upperCaseFirstLower(tt.input, UTF8Replace) - if err != nil { - t.Errorf("upperCaseFirstLower(%q, UTF8Replace) returned unexpected error: %v", tt.input, err) - } - if got != tt.expected { - t.Errorf("upperCaseFirstLower(%q) = %q, want %q", tt.input, got, tt.expected) - } - }) - } -} - -func TestUpperCaseFirstLower_Strict(t *testing.T) { - tests := []struct { - name string - input string - expectErr bool - }{ - { - name: "Valid ASCII", - input: "test", - expectErr: false, - }, - { - name: "Valid Unicode", - input: "äpfel", - expectErr: false, - }, - { - name: "Invalid UTF-8 Start", - input: "\xfftest", - expectErr: true, - }, - { - name: "Invalid UTF-8 Middle", - input: "te\xffst", - expectErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := upperCaseFirstLower(tt.input, UTF8Strict) - if tt.expectErr { - if err == nil { - t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected error, got nil", tt.input) - } - if !errors.Is(err, ErrRune) { - t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected ErrRune, got %v", tt.input, err) - } - } else { - if err != nil { - t.Errorf("upperCaseFirstLower(%q, UTF8Strict) unexpected error: %v", tt.input, err) - } - } - }) - } -} - -func TestUpperCaseFirstLower_Loose(t *testing.T) { - tests := []struct { - name string - input string - expected string - }{ - { - name: "Invalid UTF-8 Start", - input: "\xfftest", - expected: "\xfftest", // Preserves invalid byte - }, - { - name: "Invalid UTF-8 Middle", - input: "te\xffst", - expected: "Te\xffst", // Preserves invalid byte, title cases valid parts - }, - { - name: "Mixed Invalid", - input: "\xffT\xff", - expected: "\xfft\xff", // Start invalid kept, 'T' -> 't', 't' lowercased? No wait. - // upperCaseFirstLower Logic: - // 1. Decode first rune. If invalid: write byte. - // 2. Loop rest. If invalid: write byte. Else toLower. - // Input: \xff T \xff - // 1. First: \xff. Invalid. Write \xff. - // 2. Rest: "T\xff". - // - 'T': ToLower -> 't'. - // - \xff: Invalid. Write \xff. - // Result: "\xfft\xff". - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := upperCaseFirstLower(tt.input, UTF8Ignore) - if err != nil { - t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) returned unexpected error: %v", tt.input, err) - } - if got != tt.expected { - t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) = %q (bytes: %x), want %q (bytes: %x)", tt.input, got, []byte(got), tt.expected, []byte(tt.expected)) - } - }) - } -} - -func TestUpperCaseFirstLower_Allocations(t *testing.T) { - // Tests that no allocation occurs if the string is already correct - input := "Test" - if testing.AllocsPerRun(10, func() { - _, _ = upperCaseFirstLower(input, UTF8Replace) - }) > 0 { - t.Errorf("upperCaseFirstLower(%q) allocated memory when no change was needed", input) - } - - // Test that allocation occurs when change IS needed - input2 := "test" - if testing.AllocsPerRun(10, func() { - _, _ = upperCaseFirstLower(input2, UTF8Replace) - }) == 0 { - t.Errorf("upperCaseFirstLower(%q) did not allocate memory when change was needed", input2) - } -} diff --git a/types_test.go b/types_test.go index 714c698..52f5d1c 100644 --- a/types_test.go +++ b/types_test.go @@ -555,3 +555,194 @@ func TestUpperCaseWord_Verbatim_Bug(t *testing.T) { t.Errorf("UpperCaseWord (SmartAcronyms=false) did not preserve case. Got %q, want %q", res2, expected) } } + +func TestUpperCaseFirstLower_Correctness(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Empty String", + input: "", + expected: "", + }, + { + name: "ASCII Lower", + input: "test", + expected: "Test", + }, + { + name: "ASCII Mixed", + input: "tEsT", + expected: "Test", + }, + { + name: "ASCII Upper", + input: "TEST", + expected: "Test", + }, + { + name: "Already Correct", + input: "Test", + expected: "Test", + }, + { + name: "Unicode Lower", + input: "äpfel", + expected: "Äpfel", + }, + { + name: "Unicode Upper", + input: "ÄPFEL", + expected: "Äpfel", + }, + { + name: "Unicode Mixed", + input: "äPfEl", + expected: "Äpfel", + }, + { + name: "Special Char Start", + input: "!test", + expected: "!test", + }, + { + name: "Number Start", + input: "1test", + expected: "1test", + }, + { + name: "Invalid UTF-8", + input: "\xff\xfe\xfd", + expected: "\uFFFD\uFFFD\uFFFD", + }, + { + name: "Partial Invalid UTF-8", + input: "test\xff", + expected: "Test\uFFFD", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := upperCaseFirstLower(tt.input, UTF8Replace) + if err != nil { + t.Errorf("upperCaseFirstLower(%q, UTF8Replace) returned unexpected error: %v", tt.input, err) + } + if got != tt.expected { + t.Errorf("upperCaseFirstLower(%q) = %q, want %q", tt.input, got, tt.expected) + } + }) + } +} + +func TestUpperCaseFirstLower_Strict(t *testing.T) { + tests := []struct { + name string + input string + expectErr bool + }{ + { + name: "Valid ASCII", + input: "test", + expectErr: false, + }, + { + name: "Valid Unicode", + input: "äpfel", + expectErr: false, + }, + { + name: "Invalid UTF-8 Start", + input: "\xfftest", + expectErr: true, + }, + { + name: "Invalid UTF-8 Middle", + input: "te\xffst", + expectErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := upperCaseFirstLower(tt.input, UTF8Strict) + if tt.expectErr { + if err == nil { + t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected error, got nil", tt.input) + } + if !errors.Is(err, ErrRune) { + t.Errorf("upperCaseFirstLower(%q, UTF8Strict) expected ErrRune, got %v", tt.input, err) + } + } else { + if err != nil { + t.Errorf("upperCaseFirstLower(%q, UTF8Strict) unexpected error: %v", tt.input, err) + } + } + }) + } +} + +func TestUpperCaseFirstLower_Loose(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Invalid UTF-8 Start", + input: "\xfftest", + expected: "\xfftest", // Preserves invalid byte + }, + { + name: "Invalid UTF-8 Middle", + input: "te\xffst", + expected: "Te\xffst", // Preserves invalid byte, title cases valid parts + }, + { + name: "Mixed Invalid", + input: "\xffT\xff", + expected: "\xfft\xff", // Start invalid kept, 'T' -> 't', 't' lowercased? No wait. + // upperCaseFirstLower Logic: + // 1. Decode first rune. If invalid: write byte. + // 2. Loop rest. If invalid: write byte. Else toLower. + // Input: \xff T \xff + // 1. First: \xff. Invalid. Write \xff. + // 2. Rest: "T\xff". + // - 'T': ToLower -> 't'. + // - \xff: Invalid. Write \xff. + // Result: "\xfft\xff". + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := upperCaseFirstLower(tt.input, UTF8Ignore) + if err != nil { + t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) returned unexpected error: %v", tt.input, err) + } + if got != tt.expected { + t.Errorf("upperCaseFirstLower(%q, UTF8Ignore) = %q (bytes: %x), want %q (bytes: %x)", tt.input, got, []byte(got), tt.expected, []byte(tt.expected)) + } + }) + } +} + +func TestUpperCaseFirstLower_Allocations(t *testing.T) { + // Tests that no allocation occurs if the string is already correct + input := "Test" + if testing.AllocsPerRun(10, func() { + _, _ = upperCaseFirstLower(input, UTF8Replace) + }) > 0 { + t.Errorf("upperCaseFirstLower(%q) allocated memory when no change was needed", input) + } + + // Test that allocation occurs when change IS needed + input2 := "test" + if testing.AllocsPerRun(10, func() { + _, _ = upperCaseFirstLower(input2, UTF8Replace) + }) == 0 { + t.Errorf("upperCaseFirstLower(%q) did not allocate memory when change was needed", input2) + } +}