Skip to content

Commit 3c67f05

Browse files
committed
Localization: AI translation primitives
Reusable, unit-tested Ruby primitives for the AI translation tier of the localization pipeline — the service behind the `human ?? AI ?? English` floor whose AI stub was left open in #25688. Pure prompt-building and validation with the Anthropic SDK call injected, so the logic is testable without the gem or the network. Not wired into any lane yet. - TranslationValidator: format-specifier safety gate — a translation must preserve the source's placeholders (count and type; positional reordering allowed), or it is rejected and falls back to English. - Glossary: brand do-not-translate list plus per-locale terms and register. - AITranslator: single-string, per-key plural form-set (one consistent stem across CLDR forms), and batched string translation, with structured-output (output_config) enforcement. - AnthropicBatch: Message Batches submit/await/results/collect for bulk backfill. 50 unit tests, rubocop clean.
1 parent 5fbce59 commit 3c67f05

10 files changed

Lines changed: 1146 additions & 0 deletions

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
source 'https://rubygems.org'
44

5+
# Official Anthropic SDK — backs the AI translation tier of the localization pipeline (fastlane/lanes/ai_translator.rb).
6+
gem 'anthropic', '~> 1.50'
57
gem 'danger-dangermattic', '~> 1.3'
68
gem 'dotenv'
79
# 2.223.1 includes a fix for an ASC-interfacing issue

Gemfile.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ GEM
55
abbrev (0.1.2)
66
addressable (2.9.0)
77
public_suffix (>= 2.0.2, < 8.0)
8+
anthropic (1.50.0)
9+
cgi
10+
connection_pool
11+
standardwebhooks
812
artifactory (3.0.17)
913
ast (2.4.3)
1014
atomos (0.1.3)
@@ -33,6 +37,7 @@ GEM
3337
bigdecimal (4.1.2)
3438
buildkit (1.6.1)
3539
sawyer (>= 0.6)
40+
cgi (0.5.2)
3641
chroma (0.2.0)
3742
claide (1.1.0)
3843
claide-plugins (0.9.2)
@@ -43,6 +48,7 @@ GEM
4348
colored2 (3.1.2)
4449
commander (4.6.0)
4550
highline (~> 2.0.0)
51+
connection_pool (3.0.2)
4652
cork (0.3.0)
4753
colored2 (~> 3.1)
4854
csv (3.3.5)
@@ -348,6 +354,7 @@ GEM
348354
CFPropertyList
349355
naturally
350356
singleton (0.3.0)
357+
standardwebhooks (1.0.1)
351358
terminal-notifier (2.0.0)
352359
terminal-table (3.0.2)
353360
unicode-display_width (>= 1.1.1, < 3)
@@ -376,6 +383,7 @@ PLATFORMS
376383
ruby
377384

378385
DEPENDENCIES
386+
anthropic (~> 1.50)
379387
danger-dangermattic (~> 1.3)
380388
dotenv
381389
fastlane (~> 2.236)

fastlane/lanes/ai_translator.rb

Lines changed: 390 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
# frozen_string_literal: true
2+
3+
# Pure-Ruby unit suite for AITranslator. Run directly: `ruby fastlane/lanes/ai_translator_test.rb`.
4+
# Uses a canned-reply lambda for `complete:`, so it exercises all of the prompt-building / validation logic
5+
# without the `anthropic` gem or the network.
6+
require 'minitest/autorun'
7+
require_relative 'ai_translator'
8+
9+
# Exercises prompt-building and the validator gate via a canned-reply `complete:` lambda (no gem / network).
10+
class AITranslatorTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage
11+
# Builds a translator whose model "reply" is fixed, optionally recording the prompts it was called with.
12+
def translator(reply:, prompts: nil)
13+
complete = lambda do |system:, user:, schema: nil|
14+
prompts&.replace({ system: system, user: user, schema: schema })
15+
reply
16+
end
17+
AITranslator.new(complete: complete)
18+
end
19+
20+
def test_returns_cleaned_translation
21+
t = translator(reply: %("Réglages"\n)) # wrapped in quotes + trailing newline
22+
assert_equal 'Réglages', t.translate(source: 'Settings', locale: 'fr')
23+
end
24+
25+
def test_accepts_a_reply_that_preserves_placeholders
26+
t = translator(reply: '%2$@ wurde von %1$@ eingeladen')
27+
assert_equal '%2$@ wurde von %1$@ eingeladen',
28+
t.translate(source: '%1$@ invited %2$@', locale: 'de')
29+
end
30+
31+
def test_rejects_a_reply_that_breaks_placeholders
32+
t = translator(reply: '%1$d Beiträge') # object → int: must be discarded
33+
assert_nil t.translate(source: '%1$@ posts', locale: 'de')
34+
end
35+
36+
def test_blank_source_makes_no_model_call
37+
called = false
38+
complete = lambda do |**|
39+
called = true
40+
'x'
41+
end
42+
t = AITranslator.new(complete: complete)
43+
assert_nil t.translate(source: " \n", locale: 'fr')
44+
refute called
45+
end
46+
47+
def test_blank_reply_returns_nil
48+
assert_nil translator(reply: " \n").translate(source: 'Settings', locale: 'fr')
49+
end
50+
51+
def test_prompt_carries_language_brands_and_context
52+
prompts = {}
53+
t = translator(reply: 'Publier', prompts: prompts)
54+
t.translate(source: 'Publish', locale: 'fr', context: 'Button to publish a post')
55+
56+
assert_includes prompts[:system], 'French'
57+
assert_includes prompts[:system], 'WordPress'
58+
assert_includes prompts[:user], 'Button to publish a post'
59+
assert_includes prompts[:user], 'Publish'
60+
end
61+
62+
def test_for_plural_adapter_maps_arguments_and_cues_category
63+
prompts = {}
64+
t = translator(reply: '%1$d Beiträge pro Woche', prompts: prompts)
65+
out = t.for_plural(
66+
id: 'blogging.reminders.weeklyCount|==|plural.other',
67+
source: '%1$d times a week',
68+
category: 'other',
69+
note: 'Number of blogging reminders per week.',
70+
locale: 'de'
71+
)
72+
73+
assert_equal '%1$d Beiträge pro Woche', out
74+
assert_includes prompts[:user], 'Number of blogging reminders per week.'
75+
assert_includes prompts[:user], 'other' # the CLDR-category cue reaches the prompt
76+
end
77+
78+
def test_translate_plural_returns_all_requested_forms
79+
reply = '{"one":"%1$ld słowo","few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}'
80+
out = translator(reply: reply).translate_plural(
81+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
82+
categories: %w[one few many other], locale: 'pl', note: 'Number of words.'
83+
)
84+
assert_equal(
85+
{ 'one' => '%1$ld słowo', 'few' => '%1$ld słowa', 'many' => '%1$ld słów', 'other' => '%1$ld słowa' }, out
86+
)
87+
end
88+
89+
def test_translate_plural_drops_a_form_that_breaks_placeholders
90+
# 'few' switched %1$ld -> %1$d (length change) — drop it; the rest survive.
91+
reply = '{"one":"%1$ld słowo","few":"%1$d słowa","other":"%1$ld słowa"}'
92+
out = translator(reply: reply).translate_plural(
93+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
94+
categories: %w[one few other], locale: 'pl'
95+
)
96+
assert_equal %w[one other], out.keys.sort
97+
refute out.key?('few')
98+
end
99+
100+
def test_translate_plural_excludes_anchors_and_passes_them_as_context
101+
prompts = {}
102+
reply = '{"few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}'
103+
out = translator(reply: reply, prompts: prompts).translate_plural(
104+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
105+
categories: %w[one few many other], locale: 'pl', anchors: { 'one' => '%1$ld słowo' }
106+
)
107+
refute out.key?('one') # human-anchored — not produced
108+
assert_equal %w[few many other], out.keys.sort
109+
assert_includes prompts[:user], '%1$ld słowo' # anchor shown to the model as fixed context
110+
end
111+
112+
def test_translate_plural_falls_back_to_empty_on_bad_json
113+
out = translator(reply: 'sorry — here are your forms!').translate_plural(
114+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
115+
categories: %w[one other], locale: 'pl'
116+
)
117+
assert_empty out
118+
end
119+
120+
def test_translate_plural_tolerates_json_code_fences
121+
reply = "```json\n{\"one\":\"%1$ld słowo\",\"other\":\"%1$ld słowa\"}\n```"
122+
out = translator(reply: reply).translate_plural(
123+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
124+
categories: %w[one other], locale: 'pl'
125+
)
126+
assert_equal({ 'one' => '%1$ld słowo', 'other' => '%1$ld słowa' }, out)
127+
end
128+
129+
def test_translate_plural_validates_fallback_category_against_other
130+
# 'many' has no English form of its own → validated against the English 'other' (%1$ld words).
131+
out = translator(reply: '{"many":"%1$ld słów"}').translate_plural(
132+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' },
133+
categories: %w[many], locale: 'pl'
134+
)
135+
assert_equal({ 'many' => '%1$ld słów' }, out)
136+
end
137+
138+
def test_translate_all_maps_keys_and_validates
139+
reply = '{"1":"Réglages","2":"%1$@ articles"}'
140+
out = translator(reply: reply).translate_all(
141+
[{ key: 'settings.title', source: 'Settings', comment: 'Screen title' },
142+
{ key: 'posts.count', source: '%1$@ posts', comment: 'Count' }],
143+
locale: 'fr'
144+
)
145+
assert_equal({ 'settings.title' => 'Réglages', 'posts.count' => '%1$@ articles' }, out)
146+
end
147+
148+
def test_translate_all_drops_a_placeholder_breaker
149+
reply = '{"1":"Réglages","2":"%1$d articles"}' # item 2 changed %1$@ -> %1$d
150+
out = translator(reply: reply).translate_all(
151+
[{ key: 'settings.title', source: 'Settings' }, { key: 'posts.count', source: '%1$@ posts' }],
152+
locale: 'fr'
153+
)
154+
assert_equal({ 'settings.title' => 'Réglages' }, out)
155+
refute out.key?('posts.count')
156+
end
157+
158+
def test_translate_all_skips_blank_sources
159+
out = translator(reply: '{"1":"Réglages"}').translate_all(
160+
[{ key: 'settings.title', source: 'Settings' }, { key: 'blank', source: ' ' }],
161+
locale: 'fr'
162+
)
163+
assert_equal({ 'settings.title' => 'Réglages' }, out)
164+
end
165+
166+
def test_translate_all_chunks_and_merges
167+
calls = 0
168+
complete = lambda do |**|
169+
calls += 1
170+
'{"1":"x","2":"y"}'
171+
end
172+
out = AITranslator.new(complete: complete).translate_all(
173+
[{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }],
174+
locale: 'fr', batch_size: 2
175+
)
176+
assert_equal 2, calls # 3 items / batch 2 = 2 requests
177+
assert_equal({ 'a' => 'x', 'b' => 'y', 'c' => 'x' }, out)
178+
end
179+
180+
def test_translate_all_bad_json_batch_falls_back
181+
out = translator(reply: 'not json at all').translate_all([{ key: 'a', source: 'One' }], locale: 'fr')
182+
assert_empty out
183+
end
184+
185+
def test_translate_all_empty_input_makes_no_call
186+
called = false
187+
complete = lambda do |**|
188+
called = true
189+
'{}'
190+
end
191+
assert_empty AITranslator.new(complete: complete).translate_all([], locale: 'fr')
192+
refute called
193+
end
194+
195+
def test_translate_all_prompt_carries_key_context_and_language
196+
prompts = {}
197+
translator(reply: '{"1":"Publier"}', prompts: prompts).translate_all(
198+
[{ key: 'editor.publish', source: 'Publish', comment: 'Publish button' }], locale: 'fr'
199+
)
200+
assert_includes prompts[:system], 'French'
201+
assert_includes prompts[:user], 'editor.publish'
202+
assert_includes prompts[:user], 'Publish button'
203+
assert_includes prompts[:user], 'Publish'
204+
end
205+
206+
def test_translate_plural_passes_a_schema_of_its_categories
207+
prompts = {}
208+
translator(reply: '{"one":"%1$ld słowo","other":"%1$ld słowa"}', prompts: prompts).translate_plural(
209+
english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, categories: %w[one other], locale: 'pl'
210+
)
211+
assert_equal %w[one other], prompts[:schema]['required'].sort
212+
assert_equal false, prompts[:schema]['additionalProperties']
213+
end
214+
215+
def test_translate_all_passes_a_numbered_schema
216+
prompts = {}
217+
translator(reply: '{"1":"a","2":"b"}', prompts: prompts).translate_all(
218+
[{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }], locale: 'fr'
219+
)
220+
assert_equal %w[1 2], prompts[:schema]['required'].sort
221+
end
222+
223+
def test_single_translate_passes_no_schema
224+
prompts = {}
225+
translator(reply: 'Publier', prompts: prompts).translate(source: 'Publish', locale: 'fr')
226+
assert_nil prompts[:schema]
227+
end
228+
229+
def test_glossary_terms_and_register_reach_the_prompt
230+
prompts = {}
231+
glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' })
232+
complete = lambda do |system:, user:, schema: nil|
233+
prompts.replace({ system: system, user: user, schema: schema })
234+
'Publier'
235+
end
236+
AITranslator.new(complete: complete, glossary: glossary).translate(source: 'Publish', locale: 'fr')
237+
assert_includes prompts[:system], 'post -> article'
238+
assert_includes prompts[:system], 'Register: Use formal vous.'
239+
end
240+
241+
def test_prepare_batch_chunks_each_locale_into_jobs
242+
prep = translator(reply: '{}').prepare_batch(
243+
{ 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }],
244+
'de' => [{ key: 'a', source: 'One' }] },
245+
batch_size: 2
246+
)
247+
assert_equal(%w[fr_0 fr_1 de_0], prep[:jobs].map { |job| job[:custom_id] })
248+
assert_equal %w[1 2], prep[:jobs].first[:schema]['required'].sort
249+
end
250+
251+
def test_prepare_batch_manifest_maps_custom_id_to_locale_and_strings
252+
prep = translator(reply: '{}').prepare_batch(
253+
{ 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }] }, batch_size: 25
254+
)
255+
assert_equal 'fr', prep[:manifest]['fr_0'][:locale]
256+
assert_equal(%w[a b], prep[:manifest]['fr_0'][:numbered].values.map { |string| string[:key] })
257+
end
258+
259+
def test_prepare_batch_custom_ids_match_the_api_pattern
260+
# The Batch API requires custom_id =~ ^[a-zA-Z0-9_-]{1,64}$ — hyphenated locales like pt-BR must still pass.
261+
prep = translator(reply: '{}').prepare_batch({ 'pt-BR' => [{ key: 'a', source: 'One' }] }, batch_size: 25)
262+
prep[:jobs].each { |job| assert_match(/\A[a-zA-Z0-9_-]{1,64}\z/, job[:custom_id]) }
263+
end
264+
265+
def test_collect_batch_validates_and_groups_by_locale
266+
t = translator(reply: '{}')
267+
prep = t.prepare_batch(
268+
{ 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25
269+
)
270+
texts = { 'fr_0' => '{"1":"Réglages","2":"%1$@ éléments"}' }
271+
assert_equal({ 'fr' => { 'settings' => 'Réglages', 'count' => '%1$@ éléments' } },
272+
t.collect_batch(texts, prep[:manifest]))
273+
end
274+
275+
def test_collect_batch_drops_invalid_and_missing
276+
t = translator(reply: '{}')
277+
prep = t.prepare_batch(
278+
{ 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25
279+
)
280+
texts = { 'fr_0' => '{"1":"Réglages","2":"%1$d éléments"}' } # item 2 breaks the placeholder
281+
assert_equal({ 'fr' => { 'settings' => 'Réglages' } }, t.collect_batch(texts, prep[:manifest]))
282+
end
283+
284+
def test_collect_batch_handles_a_missing_batch_reply
285+
t = translator(reply: '{}')
286+
prep = t.prepare_batch({ 'fr' => [{ key: 'a', source: 'One' }] }, batch_size: 25)
287+
assert_equal({ 'fr' => {} }, t.collect_batch({}, prep[:manifest]))
288+
end
289+
end

0 commit comments

Comments
 (0)