Skip to content

Commit 6ed8e73

Browse files
Copilotfredbi
andauthored
Add unit tests for internal/difflib package (#52)
* Initial plan * Add unit tests to improve test coverage in internal/difflib package Co-authored-by: fredbi <14262513+fredbi@users.noreply.github.com> * Address code review feedback for difflib tests Co-authored-by: fredbi <14262513+fredbi@users.noreply.github.com> * Fix CI linting: use range-based for loops Co-authored-by: fredbi <14262513+fredbi@users.noreply.github.com> * Fix loop iteration in difflib_test.go addressed linting issu Signed-off-by: fredbi <fredbi@yahoo.com> --------- Signed-off-by: fredbi <fredbi@yahoo.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: fredbi <14262513+fredbi@users.noreply.github.com> Co-authored-by: fredbi <fredbi@yahoo.com>
1 parent b33472c commit 6ed8e73

File tree

4 files changed

+471
-0
lines changed

4 files changed

+471
-0
lines changed

internal/difflib/difflib_test.go

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,343 @@ func splitChars(s string) []string {
237237

238238
return chars
239239
}
240+
241+
// TestSequenceMatcherCaching tests that GetMatchingBlocks and GetOpCodes
242+
// return cached results when called multiple times.
243+
func TestSequenceMatcherCaching(t *testing.T) {
244+
a := splitChars("abc")
245+
b := splitChars("abd")
246+
247+
sm := NewMatcher(a, b)
248+
249+
// Call GetMatchingBlocks twice - second call should use cache
250+
blocks1 := sm.GetMatchingBlocks()
251+
blocks2 := sm.GetMatchingBlocks()
252+
assertEqual(t, blocks1, blocks2)
253+
254+
// Call GetOpCodes twice - second call should use cache
255+
codes1 := sm.GetOpCodes()
256+
codes2 := sm.GetOpCodes()
257+
assertEqual(t, codes1, codes2)
258+
}
259+
260+
// TestSetSeqSamePointer tests that SetSeq1 and SetSeq2 do NOT reset caches
261+
// when the same slice pointer is passed (early return optimization).
262+
func TestSetSeqSamePointer(t *testing.T) {
263+
a := []string{"a", "b", "c"}
264+
b := []string{"x", "y", "z"}
265+
266+
sm := NewMatcher(a, b)
267+
268+
// Get initial blocks
269+
blocks1 := sm.GetMatchingBlocks()
270+
271+
// Set the same sequences again using SetSeqs
272+
// Since we pass the same slice pointers, the caches should NOT be reset
273+
// (the implementation checks pointer equality for early return)
274+
sm.SetSeq1(a)
275+
sm.SetSeq2(b)
276+
277+
// Blocks should remain cached (not nil) after setting the same sequences
278+
// so GetMatchingBlocks returns the cached result
279+
blocks2 := sm.GetMatchingBlocks()
280+
assertEqual(t, blocks1, blocks2)
281+
}
282+
283+
// TestSequenceMatcherWithIsJunk tests the junk filtering functionality.
284+
func TestSequenceMatcherWithIsJunk(t *testing.T) {
285+
// Test with a simple IsJunk function that marks whitespace as junk
286+
a := []string{"a", " ", "b", " ", "c"}
287+
b := []string{"a", "b", "c"}
288+
289+
sm := NewMatcher(nil, nil)
290+
sm.IsJunk = func(s string) bool {
291+
return s == " "
292+
}
293+
sm.SetSeqs(a, b)
294+
295+
// The matcher should still find matches but handle junk elements
296+
blocks := sm.GetMatchingBlocks()
297+
if len(blocks) == 0 {
298+
t.Error("expected some matching blocks with junk filter")
299+
}
300+
}
301+
302+
// TestAutoJunkWithLargeSequence tests the autoJunk feature with sequences >= 200 elements.
303+
func TestAutoJunkWithLargeSequence(t *testing.T) {
304+
// Create a sequence with more than 200 elements where one element appears
305+
// more than 1% of the time (which makes it "popular" and gets filtered)
306+
a := make([]string, 250)
307+
b := make([]string, 250)
308+
309+
// Fill with unique elements
310+
for i := range 250 {
311+
a[i] = fmt.Sprintf("a%d", i)
312+
b[i] = fmt.Sprintf("a%d", i)
313+
}
314+
315+
// Make element "common" appear more than 1% (3+ times out of 250)
316+
for i := range 10 {
317+
b[i] = "common"
318+
}
319+
320+
sm := NewMatcher(a, b)
321+
// The popular element "common" should be filtered
322+
if len(sm.bPopular) == 0 {
323+
t.Log("bPopular might be empty if 'common' doesn't exceed threshold, which is expected")
324+
}
325+
326+
// The matcher should still work
327+
blocks := sm.GetMatchingBlocks()
328+
if blocks == nil {
329+
t.Error("expected matching blocks")
330+
}
331+
}
332+
333+
// TestFindLongestMatchWithJunk tests finding longest match with junk elements.
334+
func TestFindLongestMatchWithJunk(t *testing.T) {
335+
// Create sequences where junk elements are adjacent to interesting matches
336+
a := []string{"x", "a", "b", "c", "y"}
337+
b := []string{"a", "b", "c"}
338+
339+
sm := NewMatcher(nil, nil)
340+
// Mark x and y as junk
341+
sm.IsJunk = func(s string) bool {
342+
return s == "x" || s == "y"
343+
}
344+
sm.SetSeqs(a, b)
345+
346+
blocks := sm.GetMatchingBlocks()
347+
// Should find the "a", "b", "c" match
348+
found := false
349+
for _, block := range blocks {
350+
if block.Size == 3 {
351+
found = true
352+
break
353+
}
354+
}
355+
if !found {
356+
t.Error("expected to find a match of size 3")
357+
}
358+
}
359+
360+
// TestFindLongestMatchExtension tests the extension of matches past popular elements.
361+
func TestFindLongestMatchExtension(t *testing.T) {
362+
// Test cases that exercise the match extension loops in findLongestMatch
363+
a := []string{"a", "b", "c", "d", "e"}
364+
b := []string{"x", "b", "c", "d", "y"}
365+
366+
sm := NewMatcher(a, b)
367+
blocks := sm.GetMatchingBlocks()
368+
369+
// Should find the "b", "c", "d" match
370+
found := false
371+
for _, block := range blocks {
372+
if block.Size >= 3 {
373+
found = true
374+
break
375+
}
376+
}
377+
if !found {
378+
t.Error("expected to find a match of size >= 3")
379+
}
380+
}
381+
382+
// TestJunkFilteringInChainB tests the IsJunk function in chainB.
383+
func TestJunkFilteringInChainB(t *testing.T) {
384+
// Create a matcher with junk filtering
385+
a := []string{"line1", "junk", "line2", "junk", "line3"}
386+
b := []string{"line1", "junk", "line2", "junk", "line3", "junk"}
387+
388+
sm := NewMatcher(nil, nil)
389+
sm.IsJunk = func(s string) bool {
390+
return s == "junk"
391+
}
392+
sm.SetSeqs(a, b)
393+
394+
// Verify junk is correctly identified
395+
if !sm.isBJunk("junk") {
396+
t.Error("expected 'junk' to be identified as junk")
397+
}
398+
399+
// Non-junk should not be identified as junk
400+
if sm.isBJunk("line1") {
401+
t.Error("expected 'line1' to not be junk")
402+
}
403+
404+
// Should still be able to find matches
405+
blocks := sm.GetMatchingBlocks()
406+
if len(blocks) == 0 {
407+
t.Error("expected some matching blocks")
408+
}
409+
}
410+
411+
// TestMatchExtensionWithJunkOnBothSides tests junk matching extension.
412+
func TestMatchExtensionWithJunkOnBothSides(t *testing.T) {
413+
// Create sequences where junk elements surround interesting matches
414+
// to exercise the junk extension loops in findLongestMatch
415+
a := []string{"junk1", "junk2", "a", "b", "c", "junk3", "junk4"}
416+
b := []string{"junk1", "junk2", "a", "b", "c", "junk3", "junk4"}
417+
418+
sm := NewMatcher(nil, nil)
419+
sm.IsJunk = func(s string) bool {
420+
return strings.HasPrefix(s, "junk")
421+
}
422+
sm.SetSeqs(a, b)
423+
424+
blocks := sm.GetMatchingBlocks()
425+
// Should find matches including junk elements that are identical
426+
totalSize := 0
427+
for _, block := range blocks {
428+
totalSize += block.Size
429+
}
430+
if totalSize < 3 {
431+
t.Errorf("expected total match size >= 3, got %d", totalSize)
432+
}
433+
}
434+
435+
// TestFindLongestMatchBreakCondition tests the j >= bhi break condition.
436+
func TestFindLongestMatchBreakCondition(t *testing.T) {
437+
// Create sequences that will trigger the j >= bhi condition
438+
// This happens when b2j has indices that exceed the search range
439+
a := []string{"x", "y", "z"}
440+
b := []string{"a", "b", "x", "y", "z"}
441+
442+
sm := NewMatcher(a, b)
443+
blocks := sm.GetMatchingBlocks()
444+
445+
// Should find the "x", "y", "z" match
446+
found := false
447+
for _, block := range blocks {
448+
if block.Size == 3 {
449+
found = true
450+
break
451+
}
452+
}
453+
if !found {
454+
t.Error("expected to find a match of size 3")
455+
}
456+
}
457+
458+
// TestAutoJunkPopularElements tests the autoJunk filtering of popular elements.
459+
func TestAutoJunkPopularElements(t *testing.T) {
460+
// Create a sequence with > 200 elements where one element appears
461+
// more than 1% of the time
462+
n := 250
463+
a := make([]string, n)
464+
b := make([]string, n)
465+
466+
// Fill with mostly unique elements
467+
for i := range n {
468+
a[i] = fmt.Sprintf("line%d", i)
469+
b[i] = fmt.Sprintf("line%d", i)
470+
}
471+
472+
// Make "popular" appear more than 1% (more than 2-3 times)
473+
// We need it to appear > n/100 + 1 times = 3+ times
474+
for i := range 10 {
475+
b[i*25] = "popular"
476+
}
477+
478+
sm := NewMatcher(a, b)
479+
480+
// The element "popular" should be filtered as popular
481+
if len(sm.bPopular) == 0 {
482+
t.Log("bPopular might be empty if threshold not exceeded")
483+
}
484+
485+
// Matcher should still produce valid results
486+
blocks := sm.GetMatchingBlocks()
487+
if blocks == nil {
488+
t.Error("expected non-nil matching blocks")
489+
}
490+
}
491+
492+
// TestFindLongestMatchWithJunkExtension tests the junk extension loops
493+
// at the end of findLongestMatch function.
494+
func TestFindLongestMatchWithJunkExtension(t *testing.T) {
495+
// Create sequences where junk elements are adjacent to matches
496+
// This should trigger the junk extension loops
497+
a := []string{"junk", "a", "b", "c", "junk"}
498+
b := []string{"junk", "a", "b", "c", "junk"}
499+
500+
sm := NewMatcher(nil, nil)
501+
sm.IsJunk = func(s string) bool {
502+
return s == "junk"
503+
}
504+
sm.SetSeqs(a, b)
505+
506+
blocks := sm.GetMatchingBlocks()
507+
// Should find matches including junk extension
508+
totalSize := 0
509+
for _, block := range blocks {
510+
totalSize += block.Size
511+
}
512+
// The non-junk elements (a, b, c) should definitely match.
513+
// Junk elements may or may not be included depending on extension behavior.
514+
if totalSize < 3 {
515+
t.Errorf("expected total match size >= 3, got %d", totalSize)
516+
}
517+
}
518+
519+
// TestFindLongestMatchEdgeCases tests edge cases in findLongestMatch.
520+
func TestFindLongestMatchEdgeCases(t *testing.T) {
521+
// Test case where matches are found at the end of sequences
522+
a := []string{"unique1", "unique2", "match"}
523+
b := []string{"other1", "other2", "match"}
524+
525+
sm := NewMatcher(a, b)
526+
blocks := sm.GetMatchingBlocks()
527+
528+
// Should find the "match" element
529+
found := false
530+
for _, block := range blocks {
531+
if block.Size == 1 && block.A == 2 && block.B == 2 {
532+
found = true
533+
break
534+
}
535+
}
536+
if !found {
537+
t.Error("expected to find a match at the end")
538+
}
539+
}
540+
541+
// TestMatcherWithBothSequencesSame tests the matcher with identical sequences.
542+
func TestMatcherWithBothSequencesSame(t *testing.T) {
543+
a := []string{"line1", "line2", "line3"}
544+
b := []string{"line1", "line2", "line3"}
545+
546+
sm := NewMatcher(a, b)
547+
blocks := sm.GetMatchingBlocks()
548+
549+
// Should find all lines match
550+
if len(blocks) < 1 {
551+
t.Error("expected at least one matching block")
552+
}
553+
554+
// The last block is always a sentinel with size 0
555+
for _, block := range blocks[:len(blocks)-1] {
556+
if block.Size != 3 {
557+
t.Errorf("expected matching block of size 3, got %d", block.Size)
558+
}
559+
}
560+
}
561+
562+
// TestWriteUnifiedDiffWithDefaultEol tests that default EOL is applied.
563+
func TestWriteUnifiedDiffWithDefaultEol(t *testing.T) {
564+
// Test that when Eol is empty, it defaults to "\n"
565+
diff := UnifiedDiff{
566+
A: splitChars("abc"),
567+
B: splitChars("abd"),
568+
FromFile: "file1",
569+
ToFile: "file2",
570+
// Eol not set - should default to "\n"
571+
}
572+
result, err := GetUnifiedDiffString(diff)
573+
if err != nil {
574+
t.Fatalf("unexpected error: %v", err)
575+
}
576+
if !strings.Contains(result, "\n") {
577+
t.Error("expected newlines in output")
578+
}
579+
}

0 commit comments

Comments
 (0)