From 49d0dba3497e643fed6e2696b6cca0b14524e3c3 Mon Sep 17 00:00:00 2001 From: Jens Becker Date: Sat, 6 Jun 2026 22:28:04 +0200 Subject: [PATCH 1/5] feat(content-translator): add incremental richText translation --- content-translator/CHANGELOG.md | 1 + content-translator/README.md | 21 ++ content-translator/dev/src/seed.ts | 54 ++++- content-translator/package.json | 3 + content-translator/pnpm-lock.yaml | 9 + .../TranslatorModal/TranslatorModal.tsx | 13 +- .../Translator/TranslatorProvider.tsx | 17 +- .../client/providers/Translator/context.ts | 6 +- content-translator/src/i18n/translations.ts | 4 + content-translator/src/translate/endpoint.ts | 4 +- content-translator/src/translate/operation.ts | 17 +- .../src/translate/richtext/hashNode.ts | Bin 0 -> 964 bytes .../src/translate/richtext/nodeState.ts | 52 +++++ .../richtext/reconcileIncremental.ts | 109 ++++++++++ .../src/translate/traverseFields.ts | 108 ++++++++-- .../src/translate/traverseRichText.ts | 14 +- content-translator/src/translate/types.ts | 26 ++- .../test/incrementalRichText.test.ts | 203 ++++++++++++++++++ .../test/traverseFields.test.ts | 4 +- 19 files changed, 622 insertions(+), 43 deletions(-) create mode 100644 content-translator/src/translate/richtext/hashNode.ts create mode 100644 content-translator/src/translate/richtext/nodeState.ts create mode 100644 content-translator/src/translate/richtext/reconcileIncremental.ts create mode 100644 content-translator/test/incrementalRichText.test.ts diff --git a/content-translator/CHANGELOG.md b/content-translator/CHANGELOG.md index 3b8c3f19..0b26d5b2 100644 --- a/content-translator/CHANGELOG.md +++ b/content-translator/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- feat: add incremental richText translation ("Translate new & changed content") that translates only new or changed paragraphs, preserves existing translations and manual edits, and reports how many paragraphs need review when a source paragraph changed under a hand-edited translation - fix: skip fields and tabs named `__proto__`, `constructor`, or `prototype` during traversal to avoid prototype-polluting writes when a user-supplied Payload config contains such a name ## 0.2.0 diff --git a/content-translator/README.md b/content-translator/README.md index f42dbca5..c1bba4eb 100644 --- a/content-translator/README.md +++ b/content-translator/README.md @@ -42,6 +42,27 @@ export default buildConfig({ }) ``` +## Translation modes + +The translator modal offers three actions: + +- **Translate all fields** — retranslates every field, discarding existing target content. +- **Translate new & changed content** — incremental mode (see below). +- **Translate only empty fields** — fills target fields that have no value yet, leaving the rest untouched. + +### Incremental mode + +Incremental mode translates only what actually changed and preserves existing translations, which matters most for `richText`. For a lexical field it diffs the source against the existing translation at the **paragraph / block level**: + +- a paragraph whose source text is unchanged keeps its current translation (including any manual edits) and is not retranslated; +- a new or edited source paragraph is translated and placed in source order, so inserts and reorders land in the right position; +- a paragraph removed from the source is removed from the translation; +- if a source paragraph changed **and** its translation had been hand-edited, the human's version is left in place and counted — the success toast reports how many paragraphs need review, so machine accuracy never silently overwrites manual work. + +Other field types behave like "translate only empty fields" in incremental mode. + +Paragraph identity is content-addressed: a hash of the source text and a hash of the machine output are stored inline on the translated node using Lexical's [NodeState](https://lexical.dev/docs/concepts/node-state) slot (`$`), under a single namespaced key — `"$": { "translator-plugin": { "srcHash": …, "outHash": … } }`. These pass through Payload saves and admin-editor edits untouched (covered by a regression test). Because identity comes from content rather than position, the diff survives inserts, deletes and reorders. The first incremental run on a field translated by an older version (no stored hashes) retranslates it once and then stamps the hashes; subsequent runs are incremental. If a future lexical/Payload release ever stopped preserving the `$` slot, the same merge can fall back to a sidecar field keyed by field path — the algorithm is identical, only the read/write of the hash changes. + ## Configuration ### Plugin Options diff --git a/content-translator/dev/src/seed.ts b/content-translator/dev/src/seed.ts index 85eec5ad..99deb930 100644 --- a/content-translator/dev/src/seed.ts +++ b/content-translator/dev/src/seed.ts @@ -8,10 +8,33 @@ interface AuthorSeedData { } interface PageSeedData { + content?: string[] slug: string title: string } +/** Build a minimal lexical richText value from plain-text paragraphs. */ +const lexical = (paragraphs: string[]) => ({ + root: { + type: 'root', + children: paragraphs.map((text) => ({ + type: 'paragraph', + children: [ + { type: 'text', detail: 0, format: 0, mode: 'normal', style: '', text, version: 1 }, + ], + direction: 'ltr', + format: '', + indent: 0, + textFormat: 0, + version: 1, + })), + direction: 'ltr', + format: '', + indent: 0, + version: 1, + }, +}) + interface PostSeedData { slug: string title: string @@ -72,6 +95,18 @@ export const seed = async (payload: Payload) => { const pages: PageSeedData[] = [ { + // Source content for trying incremental richText translation. To see each + // case of the classification table: + // 1. open this page, switch to the German locale, "Translate all fields" + // 2. switch back to English, add / edit / reorder / delete a paragraph + // 3. switch to German, "Translate new & changed content" — only the + // changed paragraph is translated; the rest (incl. any manual edits + // you made to the German text) are preserved + content: [ + 'Welcome to our company. We build software that helps teams move faster.', + 'Our mission is to make complex workflows feel simple and reliable.', + 'Get in touch to learn how we can help your organisation.', + ], slug: 'home', title: 'Welcome to Our Website', }, @@ -90,15 +125,30 @@ export const seed = async (payload: Payload) => { ] for (const pageData of pages) { - const { totalDocs: existingPage } = await payload.count({ + const { docs } = await payload.find({ collection: 'pages' as CollectionSlug, + depth: 0, + limit: 1, where: { slug: { equals: pageData.slug } }, }) + const existingPage = docs[0] as { content?: unknown; id: number | string } | undefined + if (!existingPage) { await payload.create({ collection: 'pages' as CollectionSlug, - data: pageData, + data: { + slug: pageData.slug, + title: pageData.title, + ...(pageData.content ? { content: lexical(pageData.content) } : {}), + } as Record, + }) + } else if (pageData.content && !existingPage.content) { + // Backfill demo content onto a page seeded before it had any. + await payload.update({ + collection: 'pages' as CollectionSlug, + id: existingPage.id, + data: { content: lexical(pageData.content) } as Record, }) } } diff --git a/content-translator/package.json b/content-translator/package.json index 20ecc3d4..08bd88cd 100644 --- a/content-translator/package.json +++ b/content-translator/package.json @@ -43,7 +43,9 @@ "react-dom": "19.2.6" }, "devDependencies": { + "@lexical/headless": "^0.41.0", "@payloadcms/eslint-config": "^3.28.0", + "@payloadcms/richtext-lexical": "^3.84.1", "@swc/cli": "^0.8.1", "@swc/core": "^1.15.33", "@types/he": "^1.2.3", @@ -51,6 +53,7 @@ "@types/react-dom": "^19.2.3", "copyfiles": "^2.4.1", "eslint": "^9.39.4", + "lexical": "^0.41.0", "prettier": "^3.8.3", "rimraf": "^6.1.3", "tsx": "^4.21.0", diff --git a/content-translator/pnpm-lock.yaml b/content-translator/pnpm-lock.yaml index 9bb37a66..e7ec7894 100644 --- a/content-translator/pnpm-lock.yaml +++ b/content-translator/pnpm-lock.yaml @@ -54,9 +54,15 @@ importers: specifier: 19.2.6 version: 19.2.6(react@19.2.6) devDependencies: + '@lexical/headless': + specifier: ^0.41.0 + version: 0.41.0 '@payloadcms/eslint-config': specifier: ^3.28.0 version: 3.28.0(@typescript-eslint/eslint-plugin@8.59.3(@typescript-eslint/parser@8.59.3(eslint@9.39.4)(typescript@6.0.3))(eslint@9.39.4)(typescript@6.0.3))(ts-api-utils@2.5.0(typescript@6.0.3)) + '@payloadcms/richtext-lexical': + specifier: ^3.84.1 + version: 3.84.1(@faceless-ui/modal@3.0.0(react-dom@19.2.6(react@19.2.6))(react@19.2.6))(@faceless-ui/scroll-info@2.0.0(react-dom@19.2.6(react@19.2.6))(react@19.2.6))(@payloadcms/next@3.84.1(@types/react@19.2.14)(graphql@16.12.0)(monaco-editor@0.55.1)(next@16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6)(sass@1.77.4))(payload@3.84.1(graphql@16.12.0)(typescript@6.0.3))(react-dom@19.2.6(react@19.2.6))(react@19.2.6)(typescript@6.0.3))(@types/react@19.2.14)(monaco-editor@0.55.1)(next@16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6)(sass@1.77.4))(payload@3.84.1(graphql@16.12.0)(typescript@6.0.3))(react-dom@19.2.6(react@19.2.6))(react@19.2.6)(typescript@6.0.3)(yjs@13.6.27) '@swc/cli': specifier: ^0.8.1 version: 0.8.1(@swc/core@1.15.33) @@ -78,6 +84,9 @@ importers: eslint: specifier: ^9.39.4 version: 9.39.4 + lexical: + specifier: ^0.41.0 + version: 0.41.0 prettier: specifier: ^3.8.3 version: 3.8.3 diff --git a/content-translator/src/client/components/TranslatorModal/TranslatorModal.tsx b/content-translator/src/client/components/TranslatorModal/TranslatorModal.tsx index 53bb2aa1..a3b2193f 100644 --- a/content-translator/src/client/components/TranslatorModal/TranslatorModal.tsx +++ b/content-translator/src/client/components/TranslatorModal/TranslatorModal.tsx @@ -4,6 +4,8 @@ import { getTranslation } from '@payloadcms/translations' import { Button, LoadingOverlay, Modal, Popup, PopupList, useTranslation } from '@payloadcms/ui' import { useState } from 'react' +import type { TranslateMode } from '../../../translate/types.js' + import { useTranslator } from '../../providers/Translator/context.js' import { LocaleLabel } from '../LocaleLabel/LocaleLabel.js' @@ -25,9 +27,9 @@ export const TranslatorModal = () => { const [isTranslating, setIsTranslating] = useState(false) - async function onSubmit(emptyOnly: boolean) { + async function onSubmit(mode: TranslateMode) { setIsTranslating(true) - await submit({ emptyOnly }) + await submit({ mode }) setIsTranslating(false) } @@ -105,10 +107,13 @@ export const TranslatorModal = () => { ) : ( <> - - + diff --git a/content-translator/src/client/providers/Translator/TranslatorProvider.tsx b/content-translator/src/client/providers/Translator/TranslatorProvider.tsx index 33e6261a..1d1fdaf5 100644 --- a/content-translator/src/client/providers/Translator/TranslatorProvider.tsx +++ b/content-translator/src/client/providers/Translator/TranslatorProvider.tsx @@ -12,7 +12,7 @@ import { import { reduceFieldsToValues } from 'payload/shared' import { type ReactNode, useEffect, useState } from 'react' -import type { TranslateArgs } from '../../../translate/types.js' +import type { TranslateArgs, TranslateMode } from '../../../translate/types.js' import type { TranslatorClientConfig } from '../../../types.js' import { createClient } from '../../api/index.js' @@ -74,7 +74,7 @@ export const TranslatorProvider = ({ children }: { children: ReactNode }) => { const closeTranslator = () => modal.closeModal(modalSlug) - const submit = async ({ emptyOnly }: { emptyOnly: boolean }) => { + const submit = async ({ mode }: { mode: TranslateMode }) => { if (!resolver) { return } @@ -83,10 +83,10 @@ export const TranslatorProvider = ({ children }: { children: ReactNode }) => { id: id === null ? undefined : id, collectionSlug, data: reduceFieldsToValues(data, true), - emptyOnly, globalSlug, locale: locale.code, localeFrom: localeToTranslateFrom, + mode, } const result = await apiClient.translate(args) @@ -120,7 +120,16 @@ export const TranslatorProvider = ({ children }: { children: ReactNode }) => { }) setModified(true) - toast.success(translatorT('successMessage')) + + const reviewCount = result.reviewCount ?? 0 + toast.success( + reviewCount > 0 + ? `${translatorT('successMessage')} ${translatorT('reviewNeeded').replace( + '{{count}}', + String(reviewCount), + )}` + : translatorT('successMessage'), + ) } } catch (e) { console.error(e) diff --git a/content-translator/src/client/providers/Translator/context.ts b/content-translator/src/client/providers/Translator/context.ts index 9d0fc949..a62e9c79 100644 --- a/content-translator/src/client/providers/Translator/context.ts +++ b/content-translator/src/client/providers/Translator/context.ts @@ -2,6 +2,8 @@ import type { Locale } from 'payload' import { createContext, useContext } from 'react' +import type { TranslateMode } from '../../../translate/types.js' + export type TranslationKey = | 'buttonLabel' | 'errorMessage' @@ -9,8 +11,10 @@ export type TranslationKey = | 'modalSourceLanguage' | 'modalTitle' | 'modalTranslating' + | 'reviewNeeded' | 'submitButtonLabelEmpty' | 'submitButtonLabelFull' + | 'submitButtonLabelIncremental' | 'successMessage' type TranslatorContextData = { @@ -21,7 +25,7 @@ type TranslatorContextData = { openTranslator: () => void resolver: { key: string } | null setLocaleToTranslateFrom: (code: string) => void - submit: (args: { emptyOnly: boolean }) => Promise + submit: (args: { mode: TranslateMode }) => Promise translatorT: (key: TranslationKey) => string } diff --git a/content-translator/src/i18n/translations.ts b/content-translator/src/i18n/translations.ts index b2421c13..ea54b7ee 100644 --- a/content-translator/src/i18n/translations.ts +++ b/content-translator/src/i18n/translations.ts @@ -8,8 +8,10 @@ export const translations = { modalSourceLanguage: 'Quellsprache', modalTitle: 'Felder aus anderer Sprache übersetzen', modalTranslating: 'Felder werden übersetzt...', + reviewNeeded: '{{count}} Absätze müssen überprüft werden.', submitButtonLabelEmpty: 'Nur leere Felder übersetzen', submitButtonLabelFull: 'Alle Felder übersetzen', + submitButtonLabelIncremental: 'Neue & geänderte Inhalte übersetzen', successMessage: 'Erfolgreich übersetzt. Drücken Sie "Speichern", um die Änderungen anzuwenden.', }, @@ -23,8 +25,10 @@ export const translations = { modalSourceLanguage: 'Source language', modalTitle: 'Translate fields from another language', modalTranslating: 'Fields are being translated...', + reviewNeeded: '{{count}} paragraphs need review.', submitButtonLabelEmpty: 'Translate only empty fields', submitButtonLabelFull: 'Translate all fields', + submitButtonLabelIncremental: 'Translate new & changed content', successMessage: 'Successfully translated. Press "Save" to apply the changes.', }, }, diff --git a/content-translator/src/translate/endpoint.ts b/content-translator/src/translate/endpoint.ts index 066ddb13..6ada0c87 100644 --- a/content-translator/src/translate/endpoint.ts +++ b/content-translator/src/translate/endpoint.ts @@ -21,16 +21,16 @@ export const translateEndpoint = const args: TranslateEndpointArgs = await req.json() - const { id, collectionSlug, data, emptyOnly, globalSlug, locale, localeFrom } = args + const { id, collectionSlug, data, globalSlug, locale, localeFrom, mode } = args const result = await translateOperation({ id, collectionSlug, data, - emptyOnly, globalSlug, locale, localeFrom, + mode, overrideAccess: false, req, update: false, diff --git a/content-translator/src/translate/operation.ts b/content-translator/src/translate/operation.ts index b1908292..d963b038 100644 --- a/content-translator/src/translate/operation.ts +++ b/content-translator/src/translate/operation.ts @@ -2,7 +2,12 @@ import he from 'he' import { APIError, type Payload, type PayloadRequest } from 'payload' import type { TranslatorCustomConfig } from '../types.js' -import type { TranslateArgs, TranslateResult, ValueToTranslate } from './types.js' +import type { + IncrementalAccumulator, + TranslateArgs, + TranslateResult, + ValueToTranslate, +} from './types.js' import { findEntityWithConfig } from './findEntityWithConfig.js' import { traverseFields } from './traverseFields.js' @@ -44,6 +49,7 @@ export const translateOperation = async (args: TranslateOperationArgs) => { } const valuesToTranslate: ValueToTranslate[] = [] + const incremental: IncrementalAccumulator = { conflictCount: 0, stamps: [] } let translatedData = args.data @@ -62,8 +68,9 @@ export const translateOperation = async (args: TranslateOperationArgs) => { traverseFields({ dataFrom, - emptyOnly: args.emptyOnly ?? false, fields: config.fields, + incremental, + mode: args.mode ?? 'all', payloadConfig: req.payload.config, translatedData, valuesToTranslate, @@ -89,6 +96,11 @@ export const translateOperation = async (args: TranslateOperationArgs) => { valuesToTranslate[index].onTranslate(formattedValue) }) + // Stamp content-addressed hashes now that the translated text is in place. + for (const stamp of incremental.stamps) { + stamp() + } + if (args.update) { await updateEntity({ id, @@ -103,6 +115,7 @@ export const translateOperation = async (args: TranslateOperationArgs) => { } result = { + reviewCount: incremental.conflictCount, success: true, translatedData, } diff --git a/content-translator/src/translate/richtext/hashNode.ts b/content-translator/src/translate/richtext/hashNode.ts new file mode 100644 index 0000000000000000000000000000000000000000..c7c313199ee02ebc43f176efdaf3f6734233e68f GIT binary patch literal 964 zcmaKrL5tfk6oh;Bub9Jv?KJVWh3+A(x0FKZp)95JRtl>~FKZD+GLqaTg#7nD$?GOr zmL7}|^2{5JKI?7oe8d6LlZf<1gbqinz1yO+PSH~OeIH#}6mj1ZzS2%hVeWVbExwa< zUcC>|Yx^+AU^mwNvA0EWJ)7Z^vyvSxKMK7O+9J{}!mS}-MjxaR5LQ7L104et1xZ#3 z8^atdQBDq9vMlq8e1q9_LAxATIvZj(_?FCnB<~hg4t<`pUSbT0_7;Z%JUfm_T9kY) zK(A4y({Zw-ZgR}F(&9^|C#E#p2`S@X>p*Z*<7-9HWt)Hp-K&Cxf-~sK(%V zFO`u;PG3TV^Xxx=P0GvR|4v+GVa+*GlE*Zj;^4=&3yycuSH~ATRi$k|#HpS0n%L-s zLsf=Oym@<5*3Hl>zRfD@>PG1Y3b86X+VNg6T2l3Dj@LJJ2B1J@fC-2Tp}2++7zC|& zt>Yf43W4bQg`wYME{IrW6f$tV8A4hmH!kiuq8fa5ek)>R2p7(pJ!B^=XG67eMlnWF zaJU*9q8_m$q!N;0h+<{v-FUC05`T%sm2q;j$m1uc@y$*R>z*K(-0sF7;{Wt + +/** + * Both hashes live in Lexical's NodeState slot (`$`) under a single namespaced + * key so they travel with the node through copy/paste, reorder, history and + * admin-editor saves. Kept short because this is stored inline on every + * top-level node: + * + * "$": { "translator-plugin": { "srcHash": <…>, "outHash": <…> } } + * + * - `srcHash` — hash of the source text this node was translated from (detects source changes) + * - `outHash` — hash of the machine output written here (detects later manual edits) + */ +const STATE_KEY = '$' +const NS_KEY = 'translator-plugin' +const SRC_KEY = 'srcHash' +const OUT_KEY = 'outHash' + +type NodeHashes = { + outHash?: string + srcHash?: string +} + +const readNamespace = (node: LexicalNode): Record | undefined => { + const state = node[STATE_KEY] + if (!state || typeof state !== 'object') { + return undefined + } + const ns = (state as Record)[NS_KEY] + return ns && typeof ns === 'object' ? (ns as Record) : undefined +} + +export const getNodeHashes = (node: LexicalNode): NodeHashes => { + const ns = readNamespace(node) + const srcHash = ns?.[SRC_KEY] + const outHash = ns?.[OUT_KEY] + + return { + outHash: typeof outHash === 'string' ? outHash : undefined, + srcHash: typeof srcHash === 'string' ? srcHash : undefined, + } +} + +export const setNodeHashes = (node: LexicalNode, srcHash: string, outHash: string): void => { + const state = + node[STATE_KEY] && typeof node[STATE_KEY] === 'object' + ? (node[STATE_KEY] as Record) + : {} + + state[NS_KEY] = { [OUT_KEY]: outHash, [SRC_KEY]: srcHash } + node[STATE_KEY] = state +} diff --git a/content-translator/src/translate/richtext/reconcileIncremental.ts b/content-translator/src/translate/richtext/reconcileIncremental.ts new file mode 100644 index 00000000..29f7f02b --- /dev/null +++ b/content-translator/src/translate/richtext/reconcileIncremental.ts @@ -0,0 +1,109 @@ +import { hashNode, hashText, nodePlainText } from './hashNode.js' +import { getNodeHashes, setNodeHashes } from './nodeState.js' + +type LexicalNode = Record + +export type ReconcileResult = { + /** The merged target children, in source order. */ + children: LexicalNode[] + /** Number of units left untouched because their source changed under a hand-edited translation. */ + conflictCount: number + /** Deferred hash stamps to run after the translation values have been applied. */ + stamps: Array<() => void> +} + +/** + * Merge a source lexical tree's top-level nodes into an existing target tree, + * translating only new or changed units and preserving everything else. + * + * Identity is content-addressed: each source unit hashes to `h`, and the target + * nodes are indexed by the `srcHash` they were last translated from. A match + * means the source is unchanged → the target node is reused as-is (manual edits + * preserved, no translation). A miss means the source is new or changed: + * + * - if the paired prior target still holds untouched machine output → retranslate + * - if it was hand-edited → leave it in place and count it as needing review + * + * Target nodes whose `srcHash` no longer appears in the source are deletions and + * are dropped. The result follows source order, so inserts and reorders land in + * the right place. + */ +export const reconcileIncremental = ({ + collectUnitTexts, + sourceChildren, + targetChildren, +}: { + /** Push the unit node's translatable text into valuesToTranslate (translated in place). */ + collectUnitTexts: (unitNode: LexicalNode) => void + sourceChildren: LexicalNode[] + targetChildren: LexicalNode[] +}): ReconcileResult => { + // Content-addressed index: stored srcHash -> queue of target nodes (queued so + // duplicate-text units are consumed in order rather than colliding). + const targetsBySrcHash = new Map() + for (const targetNode of targetChildren) { + const { srcHash } = getNodeHashes(targetNode) + if (srcHash) { + const queue = targetsBySrcHash.get(srcHash) ?? [] + queue.push(targetNode) + targetsBySrcHash.set(srcHash, queue) + } + } + + // Pass 1: reuse content-matched units, set aside the rest as work. + const consumed = new Set() + const plan: Array< + { node: LexicalNode; type: 'reuse' } | { sourceNode: LexicalNode; type: 'work' } + > = [] + + for (const sourceNode of sourceChildren) { + const match = targetsBySrcHash.get(hashNode(sourceNode))?.shift() + if (match) { + consumed.add(match) + plan.push({ type: 'reuse', node: match }) + } else { + plan.push({ type: 'work', sourceNode }) + } + } + + // Unconsumed targets, in original order — paired positionally with changed + // source units to decide retranslate vs. review-conflict. + const leftoverTargets = targetChildren.filter((node) => !consumed.has(node)) + let leftoverIndex = 0 + + const stamps: Array<() => void> = [] + let conflictCount = 0 + const children: LexicalNode[] = [] + + for (const step of plan) { + if (step.type === 'reuse') { + children.push(step.node) + continue + } + + const prior = + leftoverIndex < leftoverTargets.length ? leftoverTargets[leftoverIndex++] : undefined + + if (prior) { + const { outHash } = getNodeHashes(prior) + const priorEdited = outHash !== undefined && outHash !== hashText(nodePlainText(prior)) + + if (priorEdited) { + // Source moved under a hand-tuned translation: keep the human's version. + conflictCount += 1 + children.push(prior) + continue + } + } + + // New unit, or changed source over untouched machine output: translate a + // fresh clone of the source so its text is overwritten and re-stamped. + const clone = structuredClone(step.sourceNode) + const srcHash = hashNode(clone) + collectUnitTexts(clone) + stamps.push(() => setNodeHashes(clone, srcHash, hashText(nodePlainText(clone)))) + children.push(clone) + } + + return { children, conflictCount, stamps } +} diff --git a/content-translator/src/translate/traverseFields.ts b/content-translator/src/translate/traverseFields.ts index 29a4a58f..c346da82 100644 --- a/content-translator/src/translate/traverseFields.ts +++ b/content-translator/src/translate/traverseFields.ts @@ -5,9 +5,12 @@ import { tabHasName } from 'payload/shared' const ObjectID = typeof ObjectIDModule === 'function' ? ObjectIDModule : ObjectIDModule.default -import type { ValueToTranslate } from './types.js' +import type { IncrementalAccumulator, TranslateMode, ValueToTranslate } from './types.js' import { isEmpty } from '../utils/isEmpty.js' +import { hashNode, hashText, nodePlainText } from './richtext/hashNode.js' +import { setNodeHashes } from './richtext/nodeState.js' +import { reconcileIncremental } from './richtext/reconcileIncremental.js' import { traverseRichText } from './traverseRichText.js' const isUnsafeKey = (key: string): boolean => @@ -15,9 +18,10 @@ const isUnsafeKey = (key: string): boolean => export const traverseFields = ({ dataFrom, - emptyOnly, fields, + incremental, localizedParent, + mode, payloadConfig, siblingDataFrom, siblingDataTranslated, @@ -25,9 +29,10 @@ export const traverseFields = ({ valuesToTranslate, }: { dataFrom: Record - emptyOnly: boolean fields: Field[] + incremental?: IncrementalAccumulator localizedParent?: boolean + mode: TranslateMode payloadConfig: SanitizedConfig siblingDataFrom?: Record siblingDataTranslated?: Record @@ -36,6 +41,11 @@ export const traverseFields = ({ }) => { siblingDataFrom = siblingDataFrom ?? dataFrom siblingDataTranslated = siblingDataTranslated ?? translatedData + incremental = incremental ?? { conflictCount: 0, stamps: [] } + + // `incremental` only changes richText behavior; everything else fills empty + // targets only, so existing (possibly hand-edited) translations are preserved. + const fillEmptyOnly = mode !== 'all' for (const field of fields) { if ('virtual' in field && field.virtual) { @@ -60,7 +70,7 @@ export const traverseFields = ({ (siblingDataTranslated[field.name] as { id: number | string }[] | undefined) ?? [] if (field.localized || localizedParent) { - if (arrayDataTranslated.length > 0 && emptyOnly) { + if (arrayDataTranslated.length > 0 && fillEmptyOnly) { break } @@ -72,9 +82,10 @@ export const traverseFields = ({ arrayDataTranslated.forEach((item, index) => { traverseFields({ dataFrom, - emptyOnly, fields: field.fields, + incremental, localizedParent: localizedParent ?? field.localized, + mode, payloadConfig, siblingDataFrom: arrayDataFrom[index], siblingDataTranslated: item, @@ -104,7 +115,7 @@ export const traverseFields = ({ | undefined) ?? [] if (field.localized || localizedParent) { - if (blocksDataTranslated.length > 0 && emptyOnly) { + if (blocksDataTranslated.length > 0 && fillEmptyOnly) { break } @@ -140,9 +151,10 @@ export const traverseFields = ({ traverseFields({ dataFrom, - emptyOnly, fields: blockConfig.fields, + incremental, localizedParent: localizedParent ?? field.localized, + mode, payloadConfig, siblingDataFrom: blocksDataFrom[index], siblingDataTranslated: item, @@ -174,9 +186,10 @@ export const traverseFields = ({ case 'row': traverseFields({ dataFrom, - emptyOnly, fields: field.fields, + incremental, localizedParent, + mode, payloadConfig, siblingDataFrom, siblingDataTranslated, @@ -201,9 +214,10 @@ export const traverseFields = ({ traverseFields({ dataFrom, - emptyOnly, fields: field.fields, + incremental, localizedParent: field.localized, + mode, payloadConfig, siblingDataFrom: groupDataFrom, siblingDataTranslated: groupDataTranslated, @@ -224,19 +238,65 @@ export const traverseFields = ({ break } - if (emptyOnly && !isEmpty(siblingDataTranslated[field.name])) { + const richTextDataFrom = siblingDataFrom[field.name] as Record + + if (!richTextDataFrom) { break } - const richTextDataFrom = siblingDataFrom[field.name] as object + const isLexical = 'root' in richTextDataFrom + const existingTarget = siblingDataTranslated[field.name] + + // Incremental: diff source against the existing translation at the + // paragraph/block level instead of skipping or wholesale-replacing. + if (mode === 'incremental' && isLexical && !isEmpty(existingTarget)) { + const sourceRoot = richTextDataFrom.root as Record + const targetRoot = (existingTarget as Record).root as Record< + string, + unknown + > + + const { children, conflictCount, stamps } = reconcileIncremental({ + collectUnitTexts: (unitNode) => { + traverseRichText({ + incremental, + mode: 'all', + onText: (siblingData, key) => { + valuesToTranslate.push({ + onTranslate: (translated: string) => { + siblingData[key] = translated + }, + value: siblingData[key], + }) + }, + payloadConfig, + root: unitNode, + translatedData, + valuesToTranslate, + }) + }, + sourceChildren: (sourceRoot?.children as Record[]) ?? [], + targetChildren: (targetRoot?.children as Record[]) ?? [], + }) - siblingDataTranslated[field.name] = richTextDataFrom + siblingDataTranslated[field.name] = { + ...richTextDataFrom, + root: { ...sourceRoot, children }, + } + incremental.stamps.push(...stamps) + incremental.conflictCount += conflictCount - if (!richTextDataFrom) { break } - const isLexical = 'root' in richTextDataFrom + // empty: leave an already-translated field untouched. + if (fillEmptyOnly && !isEmpty(existingTarget)) { + break + } + + // all (and incremental over an empty target, or non-lexical): copy the + // source tree and translate every text node. + siblingDataTranslated[field.name] = richTextDataFrom if (!isLexical) { break @@ -249,7 +309,8 @@ export const traverseFields = ({ if (root) { traverseRichText({ - emptyOnly, + incremental, + mode, onText: (siblingData, key) => { valuesToTranslate.push({ onTranslate: (translated: string) => { @@ -263,6 +324,18 @@ export const traverseFields = ({ translatedData, valuesToTranslate, }) + + // Stamp every top-level node so a later incremental run has the + // content-addressed hashes to join on. Capture srcHash now (before the + // deferred onTranslate mutates the text) and outHash after. + if (Array.isArray(root.children)) { + for (const child of root.children as Record[]) { + const srcHash = hashNode(child) + incremental.stamps.push(() => + setNodeHashes(child, srcHash, hashText(nodePlainText(child))), + ) + } + } } break @@ -290,9 +363,10 @@ export const traverseFields = ({ traverseFields({ dataFrom, - emptyOnly, fields: tab.fields, + incremental, localizedParent: tab.localized, + mode, payloadConfig, siblingDataFrom: tabDataFrom, siblingDataTranslated: tabDataTranslated, @@ -315,7 +389,7 @@ export const traverseFields = ({ if (!(field.localized || localizedParent) || isEmpty(siblingDataFrom[field.name])) { break } - if (emptyOnly && siblingDataTranslated[field.name]) { + if (fillEmptyOnly && siblingDataTranslated[field.name]) { break } diff --git a/content-translator/src/translate/traverseRichText.ts b/content-translator/src/translate/traverseRichText.ts index a1346361..e038dfc3 100644 --- a/content-translator/src/translate/traverseRichText.ts +++ b/content-translator/src/translate/traverseRichText.ts @@ -1,11 +1,12 @@ import type { FlattenedBlock, SanitizedConfig } from 'payload' -import type { ValueToTranslate } from './types.js' +import type { IncrementalAccumulator, TranslateMode, ValueToTranslate } from './types.js' import { traverseFields } from './traverseFields.js' export const traverseRichText = ({ - emptyOnly, + incremental, + mode, onText, payloadConfig, root, @@ -13,7 +14,8 @@ export const traverseRichText = ({ translatedData, valuesToTranslate, }: { - emptyOnly: boolean + incremental?: IncrementalAccumulator + mode: TranslateMode onText: (siblingData: Record, key: string) => void payloadConfig: SanitizedConfig root: Record @@ -45,9 +47,10 @@ export const traverseRichText = ({ // Traverse the fields of the block traverseFields({ dataFrom: root, - emptyOnly, fields: blockConfig.fields, + incremental, localizedParent: false, + mode, payloadConfig, siblingDataFrom: blockData, siblingDataTranslated: blockData, @@ -61,7 +64,8 @@ export const traverseRichText = ({ } else if (Array.isArray(siblingData?.children)) { for (const child of siblingData.children) { traverseRichText({ - emptyOnly, + incremental, + mode, onText, payloadConfig, root, diff --git a/content-translator/src/translate/types.ts b/content-translator/src/translate/types.ts index 58c551ef..48a088fa 100644 --- a/content-translator/src/translate/types.ts +++ b/content-translator/src/translate/types.ts @@ -3,26 +3,44 @@ export type ValueToTranslate = { value: any } +/** + * - `all` — retranslate every field, discarding existing target content. + * - `empty` — only fill fields that have no target value yet. + * - `incremental` — for richText, translate only new or changed nodes and keep + * existing translations; other field types behave like `empty`. + */ +export type TranslateMode = 'all' | 'empty' | 'incremental' + +/** Mutable accumulator threaded through traverseFields for incremental richText. */ +export type IncrementalAccumulator = { + /** Units left untouched because their source changed under a hand-edited translation. */ + conflictCount: number + /** Deferred hash stamps, run after the translation values have been applied. */ + stamps: Array<() => void> +} + export type TranslateArgs = { collectionSlug?: string data?: Record - emptyOnly?: boolean globalSlug?: string id?: number | string /** active locale */ locale: string localeFrom: string + mode?: TranslateMode overrideAccess?: boolean update?: boolean } export type TranslateResult = | { - success: false - } - | { + /** Number of richText paragraphs flagged for review (incremental mode). */ + reviewCount?: number success: true translatedData: Record } + | { + success: false + } export type TranslateEndpointArgs = Omit diff --git a/content-translator/test/incrementalRichText.test.ts b/content-translator/test/incrementalRichText.test.ts new file mode 100644 index 00000000..a6c16ed8 --- /dev/null +++ b/content-translator/test/incrementalRichText.test.ts @@ -0,0 +1,203 @@ +import type { Field, SanitizedConfig } from 'payload' + +import { createHeadlessEditor } from '@lexical/headless' +import { + defaultEditorConfig, + getEnabledNodes, + sanitizeServerEditorConfig, +} from '@payloadcms/richtext-lexical' +import assert from 'node:assert/strict' +import { describe, test } from 'node:test' + +import type { IncrementalAccumulator, ValueToTranslate } from '../src/translate/types.ts' + +import { traverseFields } from '../src/translate/traverseFields.ts' + +const payloadConfig = {} as SanitizedConfig + +const contentFields: Field[] = [{ name: 'content', type: 'richText', localized: true }] + +type LexNode = Record + +const lex = (children: LexNode[]) => ({ + root: { type: 'root', children, direction: 'ltr', format: '', indent: 0, version: 1 }, +}) + +const para = (text: null | string, extra: LexNode = {}): LexNode => ({ + type: 'paragraph', + children: + text === null + ? [] + : [{ type: 'text', detail: 0, format: 0, mode: 'normal', style: '', text, version: 1 }], + direction: 'ltr', + format: '', + indent: 0, + version: 1, + ...extra, +}) + +const paraText = (node: LexNode): string => + Array.isArray(node?.children) + ? node.children.map((c: LexNode) => (typeof c.text === 'string' ? c.text : '')).join('') + : '' + +/** + * Run a traverse pass, apply the mock translation to every collected value, + * then run the deferred hash stamps — exactly as the operation does. + */ +const runPass = ( + mode: 'all' | 'empty' | 'incremental', + dataFrom: Record, + translatedData: Record, +) => { + const valuesToTranslate: ValueToTranslate[] = [] + const incremental: IncrementalAccumulator = { conflictCount: 0, stamps: [] } + + traverseFields({ + dataFrom, + fields: contentFields, + incremental, + mode, + payloadConfig, + translatedData, + valuesToTranslate, + }) + + const translatedValues = valuesToTranslate.map((v) => v.value) + for (const v of valuesToTranslate) { + v.onTranslate(`TRANSLATED:${v.value}`) + } + for (const stamp of incremental.stamps) { + stamp() + } + + return { conflictCount: incremental.conflictCount, translatedData, translatedValues } +} + +/** Produce a fully translated + stamped target tree from a source tree (initial "all" run). */ +const initialTranslate = (sourceChildren: LexNode[]) => { + const dataFrom = { content: lex(sourceChildren) } + const translatedData: Record = {} + runPass('all', dataFrom, translatedData) + return translatedData as { content: ReturnType } +} + +const targetChildren = (translatedData: { content: ReturnType }): LexNode[] => + translatedData.content.root.children + +describe('incremental richText translation', () => { + test('appended source paragraph is translated and inserted; existing paragraphs are not retranslated', () => { + const target = initialTranslate([para('Alpha'), para('Beta')]) + const before = targetChildren(target).map(paraText) + + const result = runPass( + 'incremental', + { content: lex([para('Alpha'), para('Beta'), para('Gamma')]) }, + target, + ) + + assert.deepEqual(result.translatedValues, ['Gamma']) + const after = targetChildren(target).map(paraText) + assert.deepEqual(after, [before[0], before[1], 'TRANSLATED:Gamma']) + }) + + test('a paragraph inserted in the middle lands in the correct position, not appended at the end', () => { + const target = initialTranslate([para('One'), para('Three')]) + + const result = runPass( + 'incremental', + { content: lex([para('One'), para('Two'), para('Three')]) }, + target, + ) + + assert.deepEqual(result.translatedValues, ['Two']) + assert.deepEqual(targetChildren(target).map(paraText), [ + 'TRANSLATED:One', + 'TRANSLATED:Two', + 'TRANSLATED:Three', + ]) + }) + + test('editing a source paragraph retranslates only that paragraph', () => { + const target = initialTranslate([para('One'), para('Two'), para('Three')]) + + const result = runPass( + 'incremental', + { content: lex([para('One'), para('Two changed'), para('Three')]) }, + target, + ) + + assert.deepEqual(result.translatedValues, ['Two changed']) + assert.deepEqual(targetChildren(target).map(paraText), [ + 'TRANSLATED:One', + 'TRANSLATED:Two changed', + 'TRANSLATED:Three', + ]) + }) + + test('a manually edited translation is preserved when its source is unchanged', () => { + const target = initialTranslate([para('Keep me')]) + // human edits the translation in the admin panel + targetChildren(target)[0].children[0].text = 'Hand tuned translation' + + const result = runPass('incremental', { content: lex([para('Keep me')]) }, target) + + assert.deepEqual(result.translatedValues, []) + assert.equal(paraText(targetChildren(target)[0]), 'Hand tuned translation') + }) + + test('when source changes under a hand-edited translation, the translation is left in place and counted as needing review', () => { + const target = initialTranslate([para('Original source')]) + targetChildren(target)[0].children[0].text = 'Hand tuned translation' + + const result = runPass('incremental', { content: lex([para('Edited source')]) }, target) + + assert.deepEqual(result.translatedValues, []) + assert.equal(result.conflictCount, 1) + assert.equal(paraText(targetChildren(target)[0]), 'Hand tuned translation') + }) + + test('a source paragraph deleted in the source is removed from the translation', () => { + const target = initialTranslate([para('Stay'), para('Go away')]) + + runPass('incremental', { content: lex([para('Stay')]) }, target) + + assert.deepEqual(targetChildren(target).map(paraText), ['TRANSLATED:Stay']) + }) + + test('incremental from an empty target translates everything', () => { + const target: Record = {} + + const result = runPass('incremental', { content: lex([para('First'), para('Second')]) }, target) + + assert.deepEqual(result.translatedValues, ['First', 'Second']) + assert.deepEqual(targetChildren(target as any).map(paraText), [ + 'TRANSLATED:First', + 'TRANSLATED:Second', + ]) + }) +}) + +describe('incremental richText storage', () => { + // This is a regression guard on a third-party assumption: the incremental + // merge stores its hashes in Lexical's NodeState ($) slot, which only works + // because Payload's default lexical config round-trips unknown $ keys + // untouched. If a future @payloadcms/richtext-lexical drops them, this fails + // and the inline-storage strategy must be reconsidered (see the sidecar + // fallback in the README). + test('NodeState hashes survive a headless-editor round-trip with the default Payload lexical config', async () => { + const sanitized = await sanitizeServerEditorConfig(defaultEditorConfig, { + collections: [], + i18n: {}, + } as unknown as SanitizedConfig) + const nodes = getEnabledNodes({ editorConfig: sanitized as any }) + const editor = createHeadlessEditor({ nodes }) + + const nodeState = { 'translator-plugin': { outHash: 'def456', srcHash: 'abc123' } } + + const editorState = editor.parseEditorState(lex([para('Hallo Welt', { $: nodeState })]) as any) + const roundTripped: any = editorState.toJSON() + + assert.deepEqual(roundTripped.root.children[0].$, nodeState) + }) +}) diff --git a/content-translator/test/traverseFields.test.ts b/content-translator/test/traverseFields.test.ts index 389936d7..22e13be8 100644 --- a/content-translator/test/traverseFields.test.ts +++ b/content-translator/test/traverseFields.test.ts @@ -15,8 +15,8 @@ const runTraverse = (fields: Field[], dataFrom: Record, emptyOn traverseFields({ dataFrom, - emptyOnly, fields, + mode: emptyOnly ? 'empty' : 'all', payloadConfig, translatedData, valuesToTranslate, @@ -207,8 +207,8 @@ describe('traverseFields - emptyOnly with missing target sub-objects (#137)', () traverseFields({ dataFrom: { meta: { title: 'Hello', description: 'World' } }, - emptyOnly: true, fields, + mode: 'empty', payloadConfig, translatedData, valuesToTranslate, From 3a5650aca53b8767aac7ed5febf54654eee6fe4d Mon Sep 17 00:00:00 2001 From: Jens Becker Date: Sat, 6 Jun 2026 22:31:56 +0200 Subject: [PATCH 2/5] docs(content-translator): document incremental change-detection limitation --- content-translator/README.md | 2 +- content-translator/src/translate/traverseFields.ts | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/content-translator/README.md b/content-translator/README.md index c1bba4eb..c68b4737 100644 --- a/content-translator/README.md +++ b/content-translator/README.md @@ -59,7 +59,7 @@ Incremental mode translates only what actually changed and preserves existing tr - a paragraph removed from the source is removed from the translation; - if a source paragraph changed **and** its translation had been hand-edited, the human's version is left in place and counted — the success toast reports how many paragraphs need review, so machine accuracy never silently overwrites manual work. -Other field types behave like "translate only empty fields" in incremental mode. +**Limitation:** change detection currently applies to lexical `richText` only. Every other field type (`text`, `textarea`, `number`, `array`, `blocks`, non-lexical `richText`) behaves like "translate only empty fields" in incremental mode — an empty target is filled, but a field whose source changed _after_ it was already translated is **not** retranslated. Detecting edits on those would require storing a source hash per field (plain fields have no inline NodeState slot like lexical nodes do). Paragraph identity is content-addressed: a hash of the source text and a hash of the machine output are stored inline on the translated node using Lexical's [NodeState](https://lexical.dev/docs/concepts/node-state) slot (`$`), under a single namespaced key — `"$": { "translator-plugin": { "srcHash": …, "outHash": … } }`. These pass through Payload saves and admin-editor edits untouched (covered by a regression test). Because identity comes from content rather than position, the diff survives inserts, deletes and reorders. The first incremental run on a field translated by an older version (no stored hashes) retranslates it once and then stamps the hashes; subsequent runs are incremental. If a future lexical/Payload release ever stopped preserving the `$` slot, the same merge can fall back to a sidecar field keyed by field path — the algorithm is identical, only the read/write of the hash changes. diff --git a/content-translator/src/translate/traverseFields.ts b/content-translator/src/translate/traverseFields.ts index c346da82..3b412862 100644 --- a/content-translator/src/translate/traverseFields.ts +++ b/content-translator/src/translate/traverseFields.ts @@ -43,8 +43,16 @@ export const traverseFields = ({ siblingDataTranslated = siblingDataTranslated ?? translatedData incremental = incremental ?? { conflictCount: 0, stamps: [] } - // `incremental` only changes richText behavior; everything else fills empty - // targets only, so existing (possibly hand-edited) translations are preserved. + // LIMITATION: change detection only works for lexical richText. `incremental` + // does node-level diffing of lexical paragraphs/blocks (see the richText case + // below); for every other field type it falls back to empty-only here. So a + // text/textarea/number/array/blocks value whose SOURCE changed after it was + // already translated is NOT retranslated in incremental mode — only fields + // that are still empty get filled. Catching edits on those would need a hash + // of the source stored per field (plain fields have no NodeState slot to carry + // it inline, unlike lexical nodes), i.e. the sidecar approach — out of scope + // here. Despite the "new & changed" label, "changed" currently means lexical + // content only. const fillEmptyOnly = mode !== 'all' for (const field of fields) { From f3836681106ada5c7ab43a92f5b71eaeecb1d030 Mon Sep 17 00:00:00 2001 From: Jens Becker Date: Sat, 6 Jun 2026 22:48:59 +0200 Subject: [PATCH 3/5] fix(content-translator): scope richText srcHash per source locale and guard dynamic field writes --- content-translator/README.md | 6 +- content-translator/src/translate/operation.ts | 1 + .../src/translate/richtext/nodeState.ts | 82 +++++++++++++++++-- .../richtext/reconcileIncremental.ts | 31 ++++--- .../src/translate/traverseFields.ts | 44 +++++++--- .../src/translate/traverseRichText.ts | 4 + .../test/incrementalRichText.test.ts | 22 ++++- .../test/traverseFields.test.ts | 2 + 8 files changed, 159 insertions(+), 33 deletions(-) diff --git a/content-translator/README.md b/content-translator/README.md index c68b4737..f3307a45 100644 --- a/content-translator/README.md +++ b/content-translator/README.md @@ -61,7 +61,11 @@ Incremental mode translates only what actually changed and preserves existing tr **Limitation:** change detection currently applies to lexical `richText` only. Every other field type (`text`, `textarea`, `number`, `array`, `blocks`, non-lexical `richText`) behaves like "translate only empty fields" in incremental mode — an empty target is filled, but a field whose source changed _after_ it was already translated is **not** retranslated. Detecting edits on those would require storing a source hash per field (plain fields have no inline NodeState slot like lexical nodes do). -Paragraph identity is content-addressed: a hash of the source text and a hash of the machine output are stored inline on the translated node using Lexical's [NodeState](https://lexical.dev/docs/concepts/node-state) slot (`$`), under a single namespaced key — `"$": { "translator-plugin": { "srcHash": …, "outHash": … } }`. These pass through Payload saves and admin-editor edits untouched (covered by a regression test). Because identity comes from content rather than position, the diff survives inserts, deletes and reorders. The first incremental run on a field translated by an older version (no stored hashes) retranslates it once and then stamps the hashes; subsequent runs are incremental. If a future lexical/Payload release ever stopped preserving the `$` slot, the same merge can fall back to a sidecar field keyed by field path — the algorithm is identical, only the read/write of the hash changes. +Paragraph identity is content-addressed: a hash of the source text and a hash of the machine output are stored inline on the translated node using Lexical's [NodeState](https://lexical.dev/docs/concepts/node-state) slot (`$`), under a single namespaced key — `"$": { "translator-plugin": { "srcHash": { "": … }, "outHash": … } }`. These pass through Payload saves and admin-editor edits untouched (covered by a regression test). Because identity comes from content rather than position, the diff survives inserts, deletes and reorders. + +The source language is whatever the editor selects in the modal (it defaults to your `defaultLocale`), so `srcHash` is keyed **by source locale**: translating a target from EN vs. DE are tracked independently, and a paragraph translated from one source isn't mistaken for content from another. The `outHash` is a single value — it hashes the target's own text, independent of which source produced it. + +The first incremental run on a field translated by an older version (no stored hashes) retranslates it once and then stamps the hashes; subsequent runs are incremental. If a future lexical/Payload release ever stopped preserving the `$` slot, the same merge can fall back to a sidecar field keyed by field path — the algorithm is identical, only the read/write of the hash changes. ## Configuration diff --git a/content-translator/src/translate/operation.ts b/content-translator/src/translate/operation.ts index d963b038..19aae6c9 100644 --- a/content-translator/src/translate/operation.ts +++ b/content-translator/src/translate/operation.ts @@ -70,6 +70,7 @@ export const translateOperation = async (args: TranslateOperationArgs) => { dataFrom, fields: config.fields, incremental, + localeFrom: args.localeFrom, mode: args.mode ?? 'all', payloadConfig: req.payload.config, translatedData, diff --git a/content-translator/src/translate/richtext/nodeState.ts b/content-translator/src/translate/richtext/nodeState.ts index d6ad1d04..f38e88d4 100644 --- a/content-translator/src/translate/richtext/nodeState.ts +++ b/content-translator/src/translate/richtext/nodeState.ts @@ -3,13 +3,18 @@ type LexicalNode = Record /** * Both hashes live in Lexical's NodeState slot (`$`) under a single namespaced * key so they travel with the node through copy/paste, reorder, history and - * admin-editor saves. Kept short because this is stored inline on every - * top-level node: + * admin-editor saves. Stored inline on every top-level node: * - * "$": { "translator-plugin": { "srcHash": <…>, "outHash": <…> } } + * "$": { "translator-plugin": { "srcHash": { "": }, "outHash": } } * - * - `srcHash` — hash of the source text this node was translated from (detects source changes) - * - `outHash` — hash of the machine output written here (detects later manual edits) + * - `srcHash` — per source locale, the hash of the source text this node was + * translated from. Keyed by locale because the source language is chosen per + * run: translating the same target from EN vs. DE produces different source + * text, so each gets its own entry and switching sources does not falsely + * invalidate the other. + * - `outHash` — hash of the machine output written here (detects later manual + * edits). Single value: it hashes the target's own text, independent of which + * source produced it. */ const STATE_KEY = '$' const NS_KEY = 'translator-plugin' @@ -30,9 +35,14 @@ const readNamespace = (node: LexicalNode): Record | undefined = return ns && typeof ns === 'object' ? (ns as Record) : undefined } -export const getNodeHashes = (node: LexicalNode): NodeHashes => { +/** Read the hashes relevant to a run translating from `sourceLocale`. */ +export const getNodeHashes = (node: LexicalNode, sourceLocale: string): NodeHashes => { const ns = readNamespace(node) - const srcHash = ns?.[SRC_KEY] + const srcMap = ns?.[SRC_KEY] + const srcHash = + srcMap && typeof srcMap === 'object' + ? (srcMap as Record)[sourceLocale] + : undefined const outHash = ns?.[OUT_KEY] return { @@ -41,12 +51,66 @@ export const getNodeHashes = (node: LexicalNode): NodeHashes => { } } -export const setNodeHashes = (node: LexicalNode, srcHash: string, outHash: string): void => { +/** + * Copy the per-locale `srcHash` map from one node onto another (without the + * `outHash`, which describes the target text and is set fresh on translate). + * Used when a paragraph is retranslated from a new source locale: the fresh + * clone inherits the source hashes of the locales it was previously translated + * from, so a later run from one of those locales can still reuse it instead of + * retranslating. + */ +export const inheritSrcHashes = (target: LexicalNode, source: LexicalNode): void => { + const srcMap = readNamespace(source)?.[SRC_KEY] + if (!srcMap || typeof srcMap !== 'object') { + return + } + + const state = + target[STATE_KEY] && typeof target[STATE_KEY] === 'object' + ? (target[STATE_KEY] as Record) + : {} + const existingNs = state[NS_KEY] + const ns = + existingNs && typeof existingNs === 'object' ? (existingNs as Record) : {} + + // Target's own per-locale hashes win over inherited ones. + const merged = { ...srcMap } + const existingMap = ns[SRC_KEY] + if (existingMap && typeof existingMap === 'object') { + Object.assign(merged, existingMap) + } + + ns[SRC_KEY] = merged + state[NS_KEY] = ns + target[STATE_KEY] = state +} + +export const setNodeHashes = ( + node: LexicalNode, + sourceLocale: string, + srcHash: string, + outHash: string, +): void => { const state = node[STATE_KEY] && typeof node[STATE_KEY] === 'object' ? (node[STATE_KEY] as Record) : {} - state[NS_KEY] = { [OUT_KEY]: outHash, [SRC_KEY]: srcHash } + const existingNs = state[NS_KEY] + const ns = + existingNs && typeof existingNs === 'object' ? (existingNs as Record) : {} + + const existingSrcMap = ns[SRC_KEY] + // Keep other locales' source hashes so a later run from a different source + // can still reuse an unchanged paragraph instead of retranslating it. + const srcMap = + existingSrcMap && typeof existingSrcMap === 'object' + ? (existingSrcMap as Record) + : {} + srcMap[sourceLocale] = srcHash + + ns[SRC_KEY] = srcMap + ns[OUT_KEY] = outHash + state[NS_KEY] = ns node[STATE_KEY] = state } diff --git a/content-translator/src/translate/richtext/reconcileIncremental.ts b/content-translator/src/translate/richtext/reconcileIncremental.ts index 29f7f02b..1966d01a 100644 --- a/content-translator/src/translate/richtext/reconcileIncremental.ts +++ b/content-translator/src/translate/richtext/reconcileIncremental.ts @@ -1,5 +1,5 @@ import { hashNode, hashText, nodePlainText } from './hashNode.js' -import { getNodeHashes, setNodeHashes } from './nodeState.js' +import { getNodeHashes, inheritSrcHashes, setNodeHashes } from './nodeState.js' type LexicalNode = Record @@ -17,9 +17,9 @@ export type ReconcileResult = { * translating only new or changed units and preserving everything else. * * Identity is content-addressed: each source unit hashes to `h`, and the target - * nodes are indexed by the `srcHash` they were last translated from. A match - * means the source is unchanged → the target node is reused as-is (manual edits - * preserved, no translation). A miss means the source is new or changed: + * nodes are indexed by the `srcHash` they were translated from for this source + * locale. A match means the source is unchanged → the target node is reused + * as-is (manual edits preserved, no translation). A miss means new or changed: * * - if the paired prior target still holds untouched machine output → retranslate * - if it was hand-edited → leave it in place and count it as needing review @@ -30,19 +30,23 @@ export type ReconcileResult = { */ export const reconcileIncremental = ({ collectUnitTexts, + localeFrom, sourceChildren, targetChildren, }: { /** Push the unit node's translatable text into valuesToTranslate (translated in place). */ collectUnitTexts: (unitNode: LexicalNode) => void + /** Source locale of this run; selects which per-locale srcHash to join on. */ + localeFrom: string sourceChildren: LexicalNode[] targetChildren: LexicalNode[] }): ReconcileResult => { - // Content-addressed index: stored srcHash -> queue of target nodes (queued so - // duplicate-text units are consumed in order rather than colliding). + // Content-addressed index: stored srcHash (for this source locale) -> queue of + // target nodes (queued so duplicate-text units are consumed in order rather + // than colliding). const targetsBySrcHash = new Map() for (const targetNode of targetChildren) { - const { srcHash } = getNodeHashes(targetNode) + const { srcHash } = getNodeHashes(targetNode, localeFrom) if (srcHash) { const queue = targetsBySrcHash.get(srcHash) ?? [] queue.push(targetNode) @@ -85,7 +89,7 @@ export const reconcileIncremental = ({ leftoverIndex < leftoverTargets.length ? leftoverTargets[leftoverIndex++] : undefined if (prior) { - const { outHash } = getNodeHashes(prior) + const { outHash } = getNodeHashes(prior, localeFrom) const priorEdited = outHash !== undefined && outHash !== hashText(nodePlainText(prior)) if (priorEdited) { @@ -97,11 +101,18 @@ export const reconcileIncremental = ({ } // New unit, or changed source over untouched machine output: translate a - // fresh clone of the source so its text is overwritten and re-stamped. + // fresh clone of the source so its text is overwritten and re-stamped. If it + // replaces a prior translation, inherit that node's per-locale source hashes + // so a later run from a different source locale can still reuse it. const clone = structuredClone(step.sourceNode) const srcHash = hashNode(clone) collectUnitTexts(clone) - stamps.push(() => setNodeHashes(clone, srcHash, hashText(nodePlainText(clone)))) + stamps.push(() => { + if (prior) { + inheritSrcHashes(clone, prior) + } + setNodeHashes(clone, localeFrom, srcHash, hashText(nodePlainText(clone))) + }) children.push(clone) } diff --git a/content-translator/src/translate/traverseFields.ts b/content-translator/src/translate/traverseFields.ts index 3b412862..859a2093 100644 --- a/content-translator/src/translate/traverseFields.ts +++ b/content-translator/src/translate/traverseFields.ts @@ -16,10 +16,24 @@ import { traverseRichText } from './traverseRichText.js' const isUnsafeKey = (key: string): boolean => key === '__proto__' || key === 'constructor' || key === 'prototype' +/** + * Write to a dynamic, config-derived key while refusing prototype-polluting + * keys. The loop already skips unsafe field names up front, so this is + * defense-in-depth that also keeps the assignment provably safe at each call + * site (and quiet to static analysis). + */ +const assignSafely = (target: Record, key: string, value: unknown): void => { + if (isUnsafeKey(key)) { + return + } + target[key] = value +} + export const traverseFields = ({ dataFrom, fields, incremental, + localeFrom, localizedParent, mode, payloadConfig, @@ -31,6 +45,8 @@ export const traverseFields = ({ dataFrom: Record fields: Field[] incremental?: IncrementalAccumulator + /** Source locale of this run; selects which per-locale srcHash to read/write. */ + localeFrom: string localizedParent?: boolean mode: TranslateMode payloadConfig: SanitizedConfig @@ -92,6 +108,7 @@ export const traverseFields = ({ dataFrom, fields: field.fields, incremental, + localeFrom, localizedParent: localizedParent ?? field.localized, mode, payloadConfig, @@ -102,7 +119,7 @@ export const traverseFields = ({ }) }) - siblingDataTranslated[field.name] = arrayDataTranslated + assignSafely(siblingDataTranslated, field.name, arrayDataTranslated) break } @@ -161,6 +178,7 @@ export const traverseFields = ({ dataFrom, fields: blockConfig.fields, incremental, + localeFrom, localizedParent: localizedParent ?? field.localized, mode, payloadConfig, @@ -171,7 +189,7 @@ export const traverseFields = ({ }) }) - siblingDataTranslated[field.name] = blocksDataTranslated + assignSafely(siblingDataTranslated, field.name, blocksDataTranslated) break } @@ -187,7 +205,7 @@ export const traverseFields = ({ case 'relationship': case 'select': case 'upload': - siblingDataTranslated[field.name] = siblingDataFrom[field.name] + assignSafely(siblingDataTranslated, field.name, siblingDataFrom[field.name]) break case 'collapsible': @@ -196,6 +214,7 @@ export const traverseFields = ({ dataFrom, fields: field.fields, incremental, + localeFrom, localizedParent, mode, payloadConfig, @@ -224,6 +243,7 @@ export const traverseFields = ({ dataFrom, fields: field.fields, incremental, + localeFrom, localizedParent: field.localized, mode, payloadConfig, @@ -233,7 +253,7 @@ export const traverseFields = ({ valuesToTranslate, }) - siblingDataTranslated[field.name] = groupDataTranslated + assignSafely(siblingDataTranslated, field.name, groupDataTranslated) break } @@ -268,6 +288,7 @@ export const traverseFields = ({ collectUnitTexts: (unitNode) => { traverseRichText({ incremental, + localeFrom, mode: 'all', onText: (siblingData, key) => { valuesToTranslate.push({ @@ -283,14 +304,15 @@ export const traverseFields = ({ valuesToTranslate, }) }, + localeFrom, sourceChildren: (sourceRoot?.children as Record[]) ?? [], targetChildren: (targetRoot?.children as Record[]) ?? [], }) - siblingDataTranslated[field.name] = { + assignSafely(siblingDataTranslated, field.name, { ...richTextDataFrom, root: { ...sourceRoot, children }, - } + }) incremental.stamps.push(...stamps) incremental.conflictCount += conflictCount @@ -304,7 +326,7 @@ export const traverseFields = ({ // all (and incremental over an empty target, or non-lexical): copy the // source tree and translate every text node. - siblingDataTranslated[field.name] = richTextDataFrom + assignSafely(siblingDataTranslated, field.name, richTextDataFrom) if (!isLexical) { break @@ -318,6 +340,7 @@ export const traverseFields = ({ if (root) { traverseRichText({ incremental, + localeFrom, mode, onText: (siblingData, key) => { valuesToTranslate.push({ @@ -340,7 +363,7 @@ export const traverseFields = ({ for (const child of root.children as Record[]) { const srcHash = hashNode(child) incremental.stamps.push(() => - setNodeHashes(child, srcHash, hashText(nodePlainText(child))), + setNodeHashes(child, localeFrom, srcHash, hashText(nodePlainText(child))), ) } } @@ -373,6 +396,7 @@ export const traverseFields = ({ dataFrom, fields: tab.fields, incremental, + localeFrom, localizedParent: tab.localized, mode, payloadConfig, @@ -383,7 +407,7 @@ export const traverseFields = ({ }) if (hasName) { - siblingDataTranslated[tab.name] = tabDataTranslated + assignSafely(siblingDataTranslated, tab.name, tabDataTranslated) } } @@ -408,7 +432,7 @@ export const traverseFields = ({ valuesToTranslate.push({ onTranslate: (translated: string) => { - siblingDataTranslated[field.name] = translated + assignSafely(siblingDataTranslated, field.name, translated) }, value: siblingDataFrom[field.name], }) diff --git a/content-translator/src/translate/traverseRichText.ts b/content-translator/src/translate/traverseRichText.ts index e038dfc3..936ac2c1 100644 --- a/content-translator/src/translate/traverseRichText.ts +++ b/content-translator/src/translate/traverseRichText.ts @@ -6,6 +6,7 @@ import { traverseFields } from './traverseFields.js' export const traverseRichText = ({ incremental, + localeFrom, mode, onText, payloadConfig, @@ -15,6 +16,7 @@ export const traverseRichText = ({ valuesToTranslate, }: { incremental?: IncrementalAccumulator + localeFrom: string mode: TranslateMode onText: (siblingData: Record, key: string) => void payloadConfig: SanitizedConfig @@ -49,6 +51,7 @@ export const traverseRichText = ({ dataFrom: root, fields: blockConfig.fields, incremental, + localeFrom, localizedParent: false, mode, payloadConfig, @@ -65,6 +68,7 @@ export const traverseRichText = ({ for (const child of siblingData.children) { traverseRichText({ incremental, + localeFrom, mode, onText, payloadConfig, diff --git a/content-translator/test/incrementalRichText.test.ts b/content-translator/test/incrementalRichText.test.ts index a6c16ed8..5fd5f90e 100644 --- a/content-translator/test/incrementalRichText.test.ts +++ b/content-translator/test/incrementalRichText.test.ts @@ -49,6 +49,7 @@ const runPass = ( mode: 'all' | 'empty' | 'incremental', dataFrom: Record, translatedData: Record, + localeFrom = 'en', ) => { const valuesToTranslate: ValueToTranslate[] = [] const incremental: IncrementalAccumulator = { conflictCount: 0, stamps: [] } @@ -57,6 +58,7 @@ const runPass = ( dataFrom, fields: contentFields, incremental, + localeFrom, mode, payloadConfig, translatedData, @@ -75,10 +77,10 @@ const runPass = ( } /** Produce a fully translated + stamped target tree from a source tree (initial "all" run). */ -const initialTranslate = (sourceChildren: LexNode[]) => { +const initialTranslate = (sourceChildren: LexNode[], localeFrom = 'en') => { const dataFrom = { content: lex(sourceChildren) } const translatedData: Record = {} - runPass('all', dataFrom, translatedData) + runPass('all', dataFrom, translatedData, localeFrom) return translatedData as { content: ReturnType } } @@ -165,6 +167,20 @@ describe('incremental richText translation', () => { assert.deepEqual(targetChildren(target).map(paraText), ['TRANSLATED:Stay']) }) + test('a paragraph unchanged in one source locale is still reused after translating from a different source locale', () => { + // Target first translated from EN, stamping a per-EN source hash. + const target = initialTranslate([para('Hello world')], 'en') + + // Then translated from DE (different source text) — retranslates and stamps a per-DE hash. + runPass('incremental', { content: lex([para('Hallo Welt')]) }, target, 'de') + + // Re-running from EN with the EN source unchanged must reuse, not retranslate, + // because the per-locale srcHash for EN survived the DE run. + const result = runPass('incremental', { content: lex([para('Hello world')]) }, target, 'en') + + assert.deepEqual(result.translatedValues, []) + }) + test('incremental from an empty target translates everything', () => { const target: Record = {} @@ -193,7 +209,7 @@ describe('incremental richText storage', () => { const nodes = getEnabledNodes({ editorConfig: sanitized as any }) const editor = createHeadlessEditor({ nodes }) - const nodeState = { 'translator-plugin': { outHash: 'def456', srcHash: 'abc123' } } + const nodeState = { 'translator-plugin': { outHash: 'def456', srcHash: { en: 'abc123' } } } const editorState = editor.parseEditorState(lex([para('Hallo Welt', { $: nodeState })]) as any) const roundTripped: any = editorState.toJSON() diff --git a/content-translator/test/traverseFields.test.ts b/content-translator/test/traverseFields.test.ts index 22e13be8..2f58b209 100644 --- a/content-translator/test/traverseFields.test.ts +++ b/content-translator/test/traverseFields.test.ts @@ -16,6 +16,7 @@ const runTraverse = (fields: Field[], dataFrom: Record, emptyOn traverseFields({ dataFrom, fields, + localeFrom: 'en', mode: emptyOnly ? 'empty' : 'all', payloadConfig, translatedData, @@ -208,6 +209,7 @@ describe('traverseFields - emptyOnly with missing target sub-objects (#137)', () traverseFields({ dataFrom: { meta: { title: 'Hello', description: 'World' } }, fields, + localeFrom: 'en', mode: 'empty', payloadConfig, translatedData, From 7dc3544a4ee14c0c64a0302c585be12851ce3e22 Mon Sep 17 00:00:00 2001 From: Jens Becker Date: Sat, 6 Jun 2026 22:50:45 +0200 Subject: [PATCH 4/5] chore(content-translator): enable versions on dev collections --- content-translator/dev/src/collections/authors.ts | 1 + content-translator/dev/src/collections/media.ts | 1 + content-translator/dev/src/collections/pages.ts | 1 + content-translator/dev/src/collections/posts.ts | 1 + 4 files changed, 4 insertions(+) diff --git a/content-translator/dev/src/collections/authors.ts b/content-translator/dev/src/collections/authors.ts index fbadab97..3d1ab29c 100644 --- a/content-translator/dev/src/collections/authors.ts +++ b/content-translator/dev/src/collections/authors.ts @@ -5,6 +5,7 @@ export const authorsSchema: CollectionConfig = { admin: { useAsTitle: 'name', }, + versions: true, fields: [ { name: 'name', diff --git a/content-translator/dev/src/collections/media.ts b/content-translator/dev/src/collections/media.ts index fa8973a8..ece4c773 100644 --- a/content-translator/dev/src/collections/media.ts +++ b/content-translator/dev/src/collections/media.ts @@ -7,6 +7,7 @@ const dirname = path.dirname(filename) export const mediaSchema: CollectionConfig = { slug: 'media', + versions: true, fields: [], upload: { staticDir: path.resolve(dirname, '../media'), diff --git a/content-translator/dev/src/collections/pages.ts b/content-translator/dev/src/collections/pages.ts index 49d56965..95d7c167 100644 --- a/content-translator/dev/src/collections/pages.ts +++ b/content-translator/dev/src/collections/pages.ts @@ -2,6 +2,7 @@ import type { CollectionConfig } from 'payload' export const pagesSchema: CollectionConfig = { slug: 'pages', + versions: true, fields: [ { name: 'title', diff --git a/content-translator/dev/src/collections/posts.ts b/content-translator/dev/src/collections/posts.ts index bfa6b295..7d49fe4d 100644 --- a/content-translator/dev/src/collections/posts.ts +++ b/content-translator/dev/src/collections/posts.ts @@ -2,6 +2,7 @@ import type { CollectionConfig, CollectionSlug } from 'payload' export const postsSchema: CollectionConfig = { slug: 'posts', + versions: true, fields: [ { name: 'title', From 0b4020b3fbdf81739a2f73e71bded2290b1dfeaa Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Jun 2026 05:31:35 +0000 Subject: [PATCH 5/5] style(cloudinary): apply prettier formatting to normalizeFolder Repo-wide format:all check on the PR flagged pre-existing drift in this file; expand the single-line while-loop bodies to match prettier. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01L4xMrsaXkGmviWmb8MK1hb --- cloudinary/src/getGenerateSignature.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cloudinary/src/getGenerateSignature.ts b/cloudinary/src/getGenerateSignature.ts index 6a305783..72975763 100644 --- a/cloudinary/src/getGenerateSignature.ts +++ b/cloudinary/src/getGenerateSignature.ts @@ -46,8 +46,12 @@ const allowedParams = new Set(['folder', 'public_id', 'timestamp']) const normalizeFolder = (value: string): string => { let start = 0 let end = value.length - while (start < end && value[start] === '/') {start++} - while (end > start && value[end - 1] === '/') {end--} + while (start < end && value[start] === '/') { + start++ + } + while (end > start && value[end - 1] === '/') { + end-- + } return value.slice(start, end) }