@@ -237,3 +237,343 @@ func splitChars(s string) []string {
237237
238238 return chars
239239}
240+
241+ // TestSequenceMatcherCaching tests that GetMatchingBlocks and GetOpCodes
242+ // return cached results when called multiple times.
243+ func TestSequenceMatcherCaching (t * testing.T ) {
244+ a := splitChars ("abc" )
245+ b := splitChars ("abd" )
246+
247+ sm := NewMatcher (a , b )
248+
249+ // Call GetMatchingBlocks twice - second call should use cache
250+ blocks1 := sm .GetMatchingBlocks ()
251+ blocks2 := sm .GetMatchingBlocks ()
252+ assertEqual (t , blocks1 , blocks2 )
253+
254+ // Call GetOpCodes twice - second call should use cache
255+ codes1 := sm .GetOpCodes ()
256+ codes2 := sm .GetOpCodes ()
257+ assertEqual (t , codes1 , codes2 )
258+ }
259+
260+ // TestSetSeqSamePointer tests that SetSeq1 and SetSeq2 do NOT reset caches
261+ // when the same slice pointer is passed (early return optimization).
262+ func TestSetSeqSamePointer (t * testing.T ) {
263+ a := []string {"a" , "b" , "c" }
264+ b := []string {"x" , "y" , "z" }
265+
266+ sm := NewMatcher (a , b )
267+
268+ // Get initial blocks
269+ blocks1 := sm .GetMatchingBlocks ()
270+
271+ // Set the same sequences again using SetSeqs
272+ // Since we pass the same slice pointers, the caches should NOT be reset
273+ // (the implementation checks pointer equality for early return)
274+ sm .SetSeq1 (a )
275+ sm .SetSeq2 (b )
276+
277+ // Blocks should remain cached (not nil) after setting the same sequences
278+ // so GetMatchingBlocks returns the cached result
279+ blocks2 := sm .GetMatchingBlocks ()
280+ assertEqual (t , blocks1 , blocks2 )
281+ }
282+
283+ // TestSequenceMatcherWithIsJunk tests the junk filtering functionality.
284+ func TestSequenceMatcherWithIsJunk (t * testing.T ) {
285+ // Test with a simple IsJunk function that marks whitespace as junk
286+ a := []string {"a" , " " , "b" , " " , "c" }
287+ b := []string {"a" , "b" , "c" }
288+
289+ sm := NewMatcher (nil , nil )
290+ sm .IsJunk = func (s string ) bool {
291+ return s == " "
292+ }
293+ sm .SetSeqs (a , b )
294+
295+ // The matcher should still find matches but handle junk elements
296+ blocks := sm .GetMatchingBlocks ()
297+ if len (blocks ) == 0 {
298+ t .Error ("expected some matching blocks with junk filter" )
299+ }
300+ }
301+
302+ // TestAutoJunkWithLargeSequence tests the autoJunk feature with sequences >= 200 elements.
303+ func TestAutoJunkWithLargeSequence (t * testing.T ) {
304+ // Create a sequence with more than 200 elements where one element appears
305+ // more than 1% of the time (which makes it "popular" and gets filtered)
306+ a := make ([]string , 250 )
307+ b := make ([]string , 250 )
308+
309+ // Fill with unique elements
310+ for i := range 250 {
311+ a [i ] = fmt .Sprintf ("a%d" , i )
312+ b [i ] = fmt .Sprintf ("a%d" , i )
313+ }
314+
315+ // Make element "common" appear more than 1% (3+ times out of 250)
316+ for i := range 10 {
317+ b [i ] = "common"
318+ }
319+
320+ sm := NewMatcher (a , b )
321+ // The popular element "common" should be filtered
322+ if len (sm .bPopular ) == 0 {
323+ t .Log ("bPopular might be empty if 'common' doesn't exceed threshold, which is expected" )
324+ }
325+
326+ // The matcher should still work
327+ blocks := sm .GetMatchingBlocks ()
328+ if blocks == nil {
329+ t .Error ("expected matching blocks" )
330+ }
331+ }
332+
333+ // TestFindLongestMatchWithJunk tests finding longest match with junk elements.
334+ func TestFindLongestMatchWithJunk (t * testing.T ) {
335+ // Create sequences where junk elements are adjacent to interesting matches
336+ a := []string {"x" , "a" , "b" , "c" , "y" }
337+ b := []string {"a" , "b" , "c" }
338+
339+ sm := NewMatcher (nil , nil )
340+ // Mark x and y as junk
341+ sm .IsJunk = func (s string ) bool {
342+ return s == "x" || s == "y"
343+ }
344+ sm .SetSeqs (a , b )
345+
346+ blocks := sm .GetMatchingBlocks ()
347+ // Should find the "a", "b", "c" match
348+ found := false
349+ for _ , block := range blocks {
350+ if block .Size == 3 {
351+ found = true
352+ break
353+ }
354+ }
355+ if ! found {
356+ t .Error ("expected to find a match of size 3" )
357+ }
358+ }
359+
360+ // TestFindLongestMatchExtension tests the extension of matches past popular elements.
361+ func TestFindLongestMatchExtension (t * testing.T ) {
362+ // Test cases that exercise the match extension loops in findLongestMatch
363+ a := []string {"a" , "b" , "c" , "d" , "e" }
364+ b := []string {"x" , "b" , "c" , "d" , "y" }
365+
366+ sm := NewMatcher (a , b )
367+ blocks := sm .GetMatchingBlocks ()
368+
369+ // Should find the "b", "c", "d" match
370+ found := false
371+ for _ , block := range blocks {
372+ if block .Size >= 3 {
373+ found = true
374+ break
375+ }
376+ }
377+ if ! found {
378+ t .Error ("expected to find a match of size >= 3" )
379+ }
380+ }
381+
382+ // TestJunkFilteringInChainB tests the IsJunk function in chainB.
383+ func TestJunkFilteringInChainB (t * testing.T ) {
384+ // Create a matcher with junk filtering
385+ a := []string {"line1" , "junk" , "line2" , "junk" , "line3" }
386+ b := []string {"line1" , "junk" , "line2" , "junk" , "line3" , "junk" }
387+
388+ sm := NewMatcher (nil , nil )
389+ sm .IsJunk = func (s string ) bool {
390+ return s == "junk"
391+ }
392+ sm .SetSeqs (a , b )
393+
394+ // Verify junk is correctly identified
395+ if ! sm .isBJunk ("junk" ) {
396+ t .Error ("expected 'junk' to be identified as junk" )
397+ }
398+
399+ // Non-junk should not be identified as junk
400+ if sm .isBJunk ("line1" ) {
401+ t .Error ("expected 'line1' to not be junk" )
402+ }
403+
404+ // Should still be able to find matches
405+ blocks := sm .GetMatchingBlocks ()
406+ if len (blocks ) == 0 {
407+ t .Error ("expected some matching blocks" )
408+ }
409+ }
410+
411+ // TestMatchExtensionWithJunkOnBothSides tests junk matching extension.
412+ func TestMatchExtensionWithJunkOnBothSides (t * testing.T ) {
413+ // Create sequences where junk elements surround interesting matches
414+ // to exercise the junk extension loops in findLongestMatch
415+ a := []string {"junk1" , "junk2" , "a" , "b" , "c" , "junk3" , "junk4" }
416+ b := []string {"junk1" , "junk2" , "a" , "b" , "c" , "junk3" , "junk4" }
417+
418+ sm := NewMatcher (nil , nil )
419+ sm .IsJunk = func (s string ) bool {
420+ return strings .HasPrefix (s , "junk" )
421+ }
422+ sm .SetSeqs (a , b )
423+
424+ blocks := sm .GetMatchingBlocks ()
425+ // Should find matches including junk elements that are identical
426+ totalSize := 0
427+ for _ , block := range blocks {
428+ totalSize += block .Size
429+ }
430+ if totalSize < 3 {
431+ t .Errorf ("expected total match size >= 3, got %d" , totalSize )
432+ }
433+ }
434+
435+ // TestFindLongestMatchBreakCondition tests the j >= bhi break condition.
436+ func TestFindLongestMatchBreakCondition (t * testing.T ) {
437+ // Create sequences that will trigger the j >= bhi condition
438+ // This happens when b2j has indices that exceed the search range
439+ a := []string {"x" , "y" , "z" }
440+ b := []string {"a" , "b" , "x" , "y" , "z" }
441+
442+ sm := NewMatcher (a , b )
443+ blocks := sm .GetMatchingBlocks ()
444+
445+ // Should find the "x", "y", "z" match
446+ found := false
447+ for _ , block := range blocks {
448+ if block .Size == 3 {
449+ found = true
450+ break
451+ }
452+ }
453+ if ! found {
454+ t .Error ("expected to find a match of size 3" )
455+ }
456+ }
457+
458+ // TestAutoJunkPopularElements tests the autoJunk filtering of popular elements.
459+ func TestAutoJunkPopularElements (t * testing.T ) {
460+ // Create a sequence with > 200 elements where one element appears
461+ // more than 1% of the time
462+ n := 250
463+ a := make ([]string , n )
464+ b := make ([]string , n )
465+
466+ // Fill with mostly unique elements
467+ for i := range n {
468+ a [i ] = fmt .Sprintf ("line%d" , i )
469+ b [i ] = fmt .Sprintf ("line%d" , i )
470+ }
471+
472+ // Make "popular" appear more than 1% (more than 2-3 times)
473+ // We need it to appear > n/100 + 1 times = 3+ times
474+ for i := range 10 {
475+ b [i * 25 ] = "popular"
476+ }
477+
478+ sm := NewMatcher (a , b )
479+
480+ // The element "popular" should be filtered as popular
481+ if len (sm .bPopular ) == 0 {
482+ t .Log ("bPopular might be empty if threshold not exceeded" )
483+ }
484+
485+ // Matcher should still produce valid results
486+ blocks := sm .GetMatchingBlocks ()
487+ if blocks == nil {
488+ t .Error ("expected non-nil matching blocks" )
489+ }
490+ }
491+
492+ // TestFindLongestMatchWithJunkExtension tests the junk extension loops
493+ // at the end of findLongestMatch function.
494+ func TestFindLongestMatchWithJunkExtension (t * testing.T ) {
495+ // Create sequences where junk elements are adjacent to matches
496+ // This should trigger the junk extension loops
497+ a := []string {"junk" , "a" , "b" , "c" , "junk" }
498+ b := []string {"junk" , "a" , "b" , "c" , "junk" }
499+
500+ sm := NewMatcher (nil , nil )
501+ sm .IsJunk = func (s string ) bool {
502+ return s == "junk"
503+ }
504+ sm .SetSeqs (a , b )
505+
506+ blocks := sm .GetMatchingBlocks ()
507+ // Should find matches including junk extension
508+ totalSize := 0
509+ for _ , block := range blocks {
510+ totalSize += block .Size
511+ }
512+ // The non-junk elements (a, b, c) should definitely match.
513+ // Junk elements may or may not be included depending on extension behavior.
514+ if totalSize < 3 {
515+ t .Errorf ("expected total match size >= 3, got %d" , totalSize )
516+ }
517+ }
518+
519+ // TestFindLongestMatchEdgeCases tests edge cases in findLongestMatch.
520+ func TestFindLongestMatchEdgeCases (t * testing.T ) {
521+ // Test case where matches are found at the end of sequences
522+ a := []string {"unique1" , "unique2" , "match" }
523+ b := []string {"other1" , "other2" , "match" }
524+
525+ sm := NewMatcher (a , b )
526+ blocks := sm .GetMatchingBlocks ()
527+
528+ // Should find the "match" element
529+ found := false
530+ for _ , block := range blocks {
531+ if block .Size == 1 && block .A == 2 && block .B == 2 {
532+ found = true
533+ break
534+ }
535+ }
536+ if ! found {
537+ t .Error ("expected to find a match at the end" )
538+ }
539+ }
540+
541+ // TestMatcherWithBothSequencesSame tests the matcher with identical sequences.
542+ func TestMatcherWithBothSequencesSame (t * testing.T ) {
543+ a := []string {"line1" , "line2" , "line3" }
544+ b := []string {"line1" , "line2" , "line3" }
545+
546+ sm := NewMatcher (a , b )
547+ blocks := sm .GetMatchingBlocks ()
548+
549+ // Should find all lines match
550+ if len (blocks ) < 1 {
551+ t .Error ("expected at least one matching block" )
552+ }
553+
554+ // The last block is always a sentinel with size 0
555+ for _ , block := range blocks [:len (blocks )- 1 ] {
556+ if block .Size != 3 {
557+ t .Errorf ("expected matching block of size 3, got %d" , block .Size )
558+ }
559+ }
560+ }
561+
562+ // TestWriteUnifiedDiffWithDefaultEol tests that default EOL is applied.
563+ func TestWriteUnifiedDiffWithDefaultEol (t * testing.T ) {
564+ // Test that when Eol is empty, it defaults to "\n"
565+ diff := UnifiedDiff {
566+ A : splitChars ("abc" ),
567+ B : splitChars ("abd" ),
568+ FromFile : "file1" ,
569+ ToFile : "file2" ,
570+ // Eol not set - should default to "\n"
571+ }
572+ result , err := GetUnifiedDiffString (diff )
573+ if err != nil {
574+ t .Fatalf ("unexpected error: %v" , err )
575+ }
576+ if ! strings .Contains (result , "\n " ) {
577+ t .Error ("expected newlines in output" )
578+ }
579+ }
0 commit comments