|
| 1 | +import { describe, it, expect } from 'vitest'; |
| 2 | +import { tokenizeWords, computeWordDiff, getWordChanges } from '../../editor/word-diff'; |
| 3 | + |
| 4 | +describe('tokenizeWords', () => { |
| 5 | + it('should tokenize a basic sentence', () => { |
| 6 | + const tokens = tokenizeWords('The quick fox'); |
| 7 | + expect(tokens).toEqual([ |
| 8 | + { text: 'The', offset: 0 }, |
| 9 | + { text: ' ', offset: 3 }, |
| 10 | + { text: 'quick', offset: 4 }, |
| 11 | + { text: ' ', offset: 9 }, |
| 12 | + { text: 'fox', offset: 10 }, |
| 13 | + ]); |
| 14 | + }); |
| 15 | + |
| 16 | + it('should handle multiple spaces between words', () => { |
| 17 | + const tokens = tokenizeWords('hello world'); |
| 18 | + expect(tokens).toEqual([ |
| 19 | + { text: 'hello', offset: 0 }, |
| 20 | + { text: ' ', offset: 5 }, |
| 21 | + { text: 'world', offset: 7 }, |
| 22 | + ]); |
| 23 | + }); |
| 24 | + |
| 25 | + it('should handle leading and trailing whitespace', () => { |
| 26 | + const tokens = tokenizeWords(' hello '); |
| 27 | + expect(tokens).toEqual([ |
| 28 | + { text: ' ', offset: 0 }, |
| 29 | + { text: 'hello', offset: 2 }, |
| 30 | + { text: ' ', offset: 7 }, |
| 31 | + ]); |
| 32 | + }); |
| 33 | + |
| 34 | + it('should return empty array for empty string', () => { |
| 35 | + expect(tokenizeWords('')).toEqual([]); |
| 36 | + }); |
| 37 | + |
| 38 | + it('should handle a single word', () => { |
| 39 | + expect(tokenizeWords('hello')).toEqual([{ text: 'hello', offset: 0 }]); |
| 40 | + }); |
| 41 | + |
| 42 | + it('should handle punctuation attached to words', () => { |
| 43 | + const tokens = tokenizeWords('Hello, world!'); |
| 44 | + expect(tokens).toEqual([ |
| 45 | + { text: 'Hello,', offset: 0 }, |
| 46 | + { text: ' ', offset: 6 }, |
| 47 | + { text: 'world!', offset: 7 }, |
| 48 | + ]); |
| 49 | + }); |
| 50 | +}); |
| 51 | + |
| 52 | +describe('computeWordDiff', () => { |
| 53 | + it('should return empty array for identical strings', () => { |
| 54 | + expect(computeWordDiff('hello world', 'hello world')).toEqual([]); |
| 55 | + }); |
| 56 | + |
| 57 | + it('should detect a single word replacement', () => { |
| 58 | + const changes = getWordChanges('The quick fox', 'The fast fox'); |
| 59 | + expect(changes).toEqual([{ type: 'replace', oldFrom: 4, oldTo: 9, newText: 'fast' }]); |
| 60 | + }); |
| 61 | + |
| 62 | + it('should detect multiple word replacements', () => { |
| 63 | + const changes = getWordChanges( |
| 64 | + 'The quick brown fox jumps over the lazy dog', |
| 65 | + 'The fast brown fox leaps over the lazy cat', |
| 66 | + ); |
| 67 | + expect(changes).toEqual([ |
| 68 | + { type: 'replace', oldFrom: 4, oldTo: 9, newText: 'fast' }, |
| 69 | + { type: 'replace', oldFrom: 20, oldTo: 25, newText: 'leaps' }, |
| 70 | + { type: 'replace', oldFrom: 40, oldTo: 43, newText: 'cat' }, |
| 71 | + ]); |
| 72 | + }); |
| 73 | + |
| 74 | + it('should detect word insertion', () => { |
| 75 | + const changes = getWordChanges('The fox', 'The quick fox'); |
| 76 | + expect(changes).toHaveLength(1); |
| 77 | + expect(changes[0].type).toBe('insert'); |
| 78 | + // "quick " is inserted (word + trailing space before "fox") |
| 79 | + expect(changes[0]).toHaveProperty('newText', 'quick '); |
| 80 | + }); |
| 81 | + |
| 82 | + it('should detect word deletion', () => { |
| 83 | + const changes = getWordChanges('The quick fox', 'The fox'); |
| 84 | + expect(changes).toHaveLength(1); |
| 85 | + expect(changes[0].type).toBe('delete'); |
| 86 | + // "quick " (word + space) is removed as a contiguous block |
| 87 | + expect(changes[0]).toHaveProperty('oldFrom', 4); |
| 88 | + expect(changes[0]).toHaveProperty('oldTo', 10); |
| 89 | + }); |
| 90 | + |
| 91 | + it('should handle complete rewrite', () => { |
| 92 | + const changes = getWordChanges('hello world', 'goodbye earth'); |
| 93 | + // Each word is replaced separately since the space is a shared separator |
| 94 | + expect(changes.length).toBeGreaterThanOrEqual(1); |
| 95 | + expect(changes.every((c) => c.type === 'replace')).toBe(true); |
| 96 | + }); |
| 97 | + |
| 98 | + it('should handle empty old text', () => { |
| 99 | + const diff = computeWordDiff('', 'hello'); |
| 100 | + expect(diff).toEqual([{ type: 'insert', insertAt: 0, newText: 'hello' }]); |
| 101 | + }); |
| 102 | + |
| 103 | + it('should handle empty new text', () => { |
| 104 | + const diff = computeWordDiff('hello', ''); |
| 105 | + expect(diff).toEqual([{ type: 'delete', oldFrom: 0, oldTo: 5 }]); |
| 106 | + }); |
| 107 | + |
| 108 | + it('should handle both empty', () => { |
| 109 | + expect(computeWordDiff('', '')).toEqual([]); |
| 110 | + }); |
| 111 | + |
| 112 | + it('should preserve whitespace tokens as equal', () => { |
| 113 | + const diff = computeWordDiff('a b c', 'a x c'); |
| 114 | + const changes = diff.filter((op) => op.type !== 'equal'); |
| 115 | + expect(changes).toEqual([{ type: 'replace', oldFrom: 2, oldTo: 3, newText: 'x' }]); |
| 116 | + }); |
| 117 | + |
| 118 | + it('should handle sentence with punctuation changes', () => { |
| 119 | + const changes = getWordChanges('The company shall provide services.', 'The company must provide services.'); |
| 120 | + expect(changes).toEqual([{ type: 'replace', oldFrom: 12, oldTo: 17, newText: 'must' }]); |
| 121 | + }); |
| 122 | +}); |
0 commit comments