Skip to content

Commit 51a0129

Browse files
sjsyrekclaude
andcommitted
fix(translation): handle duplicate texts in batch translation
Fixed critical bug where duplicate texts in batch translation would only translate the last occurrence. Changed textIndexMap from Map<string, number> to Map<string, number[]> to track ALL indices for each unique text. Also added Set-based deduplication to reduce API calls while ensuring all duplicate entries receive translations. Changes: - Track all indices for duplicate texts using Map<string, number[]> - Add Set-based deduplication for efficiency - Assign translation results to all duplicate indices - Add comprehensive test for duplicate text handling - Fix failing test that relied on duplicate text behavior 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 62d4238 commit 51a0129

3 files changed

Lines changed: 67 additions & 13 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616
- **Impact**: Ensures future developers understand the importance of property order
1717
- Location: `src/services/translation.ts:386-417`, test added to `tests/unit/translation-service.test.ts`
1818

19+
### Fixed
20+
- **Critical: Duplicate text handling in batch translation** - Fixed data loss bug for duplicate inputs
21+
- When input array contained duplicate texts (e.g., `["Hello", "Hello", "World"]`), only the last occurrence received translation
22+
- Fixed by tracking ALL indices for each unique text using `Map<string, number[]>`
23+
- Added automatic deduplication of API requests (sends each unique text only once)
24+
- All occurrences of duplicate texts now receive correct translations
25+
- Added comprehensive test demonstrating the fix
26+
- **Impact**: Previously, duplicate texts in batch translation would return incomplete results
27+
- **Example**: `translateBatch(["Hello", "Hello", "World"], {targetLang: "es"})` now returns 3 results instead of 2
28+
- Location: `src/services/translation.ts:160-265`
29+
1930
### Changed
2031
- **Test Coverage Enhancement** - Comprehensive integration and E2E test expansion
2132
- Created 10 new test files covering critical workflows and CLI behavior

src/services/translation.ts

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,9 @@ export class TranslationService {
158158
};
159159

160160
// Check cache and separate cached vs non-cached texts
161-
const textsToTranslate: string[] = [];
162-
const textIndexMap = new Map<string, number>(); // Maps text to original index
161+
// Use Set for deduplication and Map to track all indices for each text
162+
const textsToTranslateSet = new Set<string>();
163+
const textIndexMap = new Map<string, number[]>(); // Maps text to ALL original indices
163164
const results: (TranslationResult | null)[] = new Array<TranslationResult | null>(texts.length).fill(null);
164165

165166
for (let i = 0; i < texts.length; i++) {
@@ -179,10 +180,19 @@ export class TranslationService {
179180
}
180181

181182
// Not cached, need to translate
182-
textsToTranslate.push(text);
183-
textIndexMap.set(text, i);
183+
// Track this text for translation (deduplicated via Set)
184+
textsToTranslateSet.add(text);
185+
186+
// Track ALL indices for this text (handles duplicates)
187+
if (!textIndexMap.has(text)) {
188+
textIndexMap.set(text, []);
189+
}
190+
textIndexMap.get(text)!.push(i);
184191
}
185192

193+
// Convert Set to Array for batch translation
194+
const textsToTranslate = Array.from(textsToTranslateSet);
195+
186196
// If all texts were cached, return cached results
187197
if (textsToTranslate.length === 0) {
188198
return results.filter((r): r is TranslationResult => r !== null);
@@ -230,7 +240,7 @@ export class TranslationService {
230240
throw lastError;
231241
}
232242

233-
// Store results in cache and map back to original indices
243+
// Store results in cache and map back to ALL original indices
234244
for (const text of textsToTranslate) {
235245
const result = textToResultMap.get(text);
236246

@@ -239,11 +249,14 @@ export class TranslationService {
239249
continue;
240250
}
241251

242-
const originalIndex = textIndexMap.get(text);
243-
if (originalIndex !== undefined) {
244-
results[originalIndex] = result;
252+
const originalIndices = textIndexMap.get(text);
253+
if (originalIndices) {
254+
// Assign result to ALL indices where this text appeared (handles duplicates)
255+
for (const index of originalIndices) {
256+
results[index] = result;
257+
}
245258

246-
// Cache the result
259+
// Cache the result (only once per unique text)
247260
if (cacheEnabled) {
248261
const cacheKey = this.generateCacheKey(text, translationOptions);
249262
this.cache.set(cacheKey, result);

tests/unit/translation-service.test.ts

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,16 +313,17 @@ describe('TranslationService', () => {
313313
});
314314

315315
it('should handle batch size limits (50 texts per batch)', async () => {
316-
const largeTextArray = Array(100).fill('test');
316+
// Create 100 UNIQUE texts to avoid deduplication
317+
const largeTextArray = Array(100).fill(0).map((_, i) => `test${i}`);
317318
mockDeepLClient.translateBatch
318-
.mockResolvedValueOnce(Array(50).fill({ text: 'traducido' }))
319-
.mockResolvedValueOnce(Array(50).fill({ text: 'traducido' }));
319+
.mockResolvedValueOnce(Array(50).fill(0).map((_, i) => ({ text: `traducido${i}` })))
320+
.mockResolvedValueOnce(Array(50).fill(0).map((_, i) => ({ text: `traducido${i + 50}` })));
320321

321322
await translationService.translateBatch(largeTextArray, {
322323
targetLang: 'es',
323324
});
324325

325-
// Should split into 2 batches of 50
326+
// Should split into 2 batches of 50 (100 unique texts)
326327
expect(mockDeepLClient.translateBatch).toHaveBeenCalledTimes(2);
327328
});
328329

@@ -389,6 +390,35 @@ describe('TranslationService', () => {
389390
expect(results[i]?.text).toBe(`Text${i}_translated`);
390391
}
391392
});
393+
394+
it('should handle duplicate texts correctly (BUG #2)', async () => {
395+
// This test demonstrates the duplicate text bug
396+
// When the same text appears multiple times, all occurrences should get translations
397+
mockDeepLClient.translateBatch.mockResolvedValue([
398+
{ text: 'Hola' }, // Translation for "Hello"
399+
{ text: 'Mundo' }, // Translation for "World"
400+
]);
401+
402+
// Input has "Hello" twice - both should be translated
403+
const results = await translationService.translateBatch(
404+
['Hello', 'Hello', 'World'], // "Hello" appears twice
405+
{ targetLang: 'es' }
406+
);
407+
408+
// Should have 3 results (not 2!)
409+
expect(results).toHaveLength(3);
410+
411+
// Both "Hello" instances should have translations
412+
expect(results[0]?.text).toBe('Hola');
413+
expect(results[1]?.text).toBe('Hola'); // Second "Hello" should also be translated
414+
expect(results[2]?.text).toBe('Mundo');
415+
416+
// Verify API was called with deduplicated texts (only unique texts sent)
417+
expect(mockDeepLClient.translateBatch).toHaveBeenCalledWith(
418+
['Hello', 'World'], // Deduplicated!
419+
expect.any(Object)
420+
);
421+
});
392422
});
393423

394424
describe('translateToMultiple()', () => {

0 commit comments

Comments
 (0)