From 3c67f058dedd5a84049c394f929af1681da6cf22 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:47:53 -0600 Subject: [PATCH 1/2] Localization: AI translation primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reusable, unit-tested Ruby primitives for the AI translation tier of the localization pipeline — the service behind the `human ?? AI ?? English` floor whose AI stub was left open in #25688. Pure prompt-building and validation with the Anthropic SDK call injected, so the logic is testable without the gem or the network. Not wired into any lane yet. - TranslationValidator: format-specifier safety gate — a translation must preserve the source's placeholders (count and type; positional reordering allowed), or it is rejected and falls back to English. - Glossary: brand do-not-translate list plus per-locale terms and register. - AITranslator: single-string, per-key plural form-set (one consistent stem across CLDR forms), and batched string translation, with structured-output (output_config) enforcement. - AnthropicBatch: Message Batches submit/await/results/collect for bulk backfill. 50 unit tests, rubocop clean. --- Gemfile | 2 + Gemfile.lock | 8 + fastlane/lanes/ai_translator.rb | 390 +++++++++++++++++++ fastlane/lanes/ai_translator_test.rb | 289 ++++++++++++++ fastlane/lanes/anthropic_batch.rb | 98 +++++ fastlane/lanes/anthropic_batch_test.rb | 104 +++++ fastlane/lanes/translation_glossary.rb | 51 +++ fastlane/lanes/translation_glossary_test.rb | 37 ++ fastlane/lanes/translation_validator.rb | 108 +++++ fastlane/lanes/translation_validator_test.rb | 59 +++ 10 files changed, 1146 insertions(+) create mode 100644 fastlane/lanes/ai_translator.rb create mode 100644 fastlane/lanes/ai_translator_test.rb create mode 100644 fastlane/lanes/anthropic_batch.rb create mode 100644 fastlane/lanes/anthropic_batch_test.rb create mode 100644 fastlane/lanes/translation_glossary.rb create mode 100644 fastlane/lanes/translation_glossary_test.rb create mode 100644 fastlane/lanes/translation_validator.rb create mode 100644 fastlane/lanes/translation_validator_test.rb diff --git a/Gemfile b/Gemfile index fa6bc472ba4c..9e74f95cc238 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,8 @@ source 'https://rubygems.org' +# Official Anthropic SDK — backs the AI translation tier of the localization pipeline (fastlane/lanes/ai_translator.rb). +gem 'anthropic', '~> 1.50' gem 'danger-dangermattic', '~> 1.3' gem 'dotenv' # 2.223.1 includes a fix for an ASC-interfacing issue diff --git a/Gemfile.lock b/Gemfile.lock index 8325e2b30df2..0b69002c4dd6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,6 +5,10 @@ GEM abbrev (0.1.2) addressable (2.9.0) public_suffix (>= 2.0.2, < 8.0) + anthropic (1.50.0) + cgi + connection_pool + standardwebhooks artifactory (3.0.17) ast (2.4.3) atomos (0.1.3) @@ -33,6 +37,7 @@ GEM bigdecimal (4.1.2) buildkit (1.6.1) sawyer (>= 0.6) + cgi (0.5.2) chroma (0.2.0) claide (1.1.0) claide-plugins (0.9.2) @@ -43,6 +48,7 @@ GEM colored2 (3.1.2) commander (4.6.0) highline (~> 2.0.0) + connection_pool (3.0.2) cork (0.3.0) colored2 (~> 3.1) csv (3.3.5) @@ -348,6 +354,7 @@ GEM CFPropertyList naturally singleton (0.3.0) + standardwebhooks (1.0.1) terminal-notifier (2.0.0) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) @@ -376,6 +383,7 @@ PLATFORMS ruby DEPENDENCIES + anthropic (~> 1.50) danger-dangermattic (~> 1.3) dotenv fastlane (~> 2.236) diff --git a/fastlane/lanes/ai_translator.rb b/fastlane/lanes/ai_translator.rb new file mode 100644 index 000000000000..c0228fa9a4a6 --- /dev/null +++ b/fastlane/lanes/ai_translator.rb @@ -0,0 +1,390 @@ +# frozen_string_literal: true + +require 'json' +require_relative 'anthropic_batch' +require_relative 'translation_glossary' +require_relative 'translation_validator' + +# AI translation tier for the localization pipeline — the service behind the `human ?? AI ?? English` floor. +# +# `localization_plurals.rb` currently stubs `ai_translate_plural(...)` to return nil; this is what replaces +# it. Given an English source string, a target locale, and the developer context, it asks Claude for a +# translation, then runs the result through `TranslationValidator` before returning it. Anything that fails +# the format-specifier gate (or comes back empty / refused) returns nil — the documented "no machine +# translation" signal the fold treats as English-fallback (flagged needs_review). It never returns a +# placeholder-broken string. +# +# The model call is INJECTED as a `complete` callable, not hard-wired, so the prompt-building and validation +# logic stays pure and unit-testable without the SDK or the network. `AITranslator.with_anthropic` builds the +# live, Claude-backed instance; the unit tests build one around a canned-reply lambda. +class AITranslator # rubocop:disable Metrics/ClassLength -- mostly static localization config (33-locale name map + prompt templates) + DEFAULT_MODEL = 'claude-opus-4-8' + + # lproj code → human language name for the prompt. Covers the current ship locales; an unmapped code falls + # back to itself (the model still does something reasonable, but add the name here for best results). + LANGUAGE_NAMES = { + 'ar' => 'Arabic', 'bg' => 'Bulgarian', 'cs' => 'Czech', 'cy' => 'Welsh', 'da' => 'Danish', + 'de' => 'German', 'en-AU' => 'English (Australia)', 'en-CA' => 'English (Canada)', + 'en-GB' => 'English (United Kingdom)', 'es' => 'Spanish', 'fr' => 'French', 'he' => 'Hebrew', + 'hr' => 'Croatian', 'hu' => 'Hungarian', 'id' => 'Indonesian', 'is' => 'Icelandic', 'it' => 'Italian', + 'ja' => 'Japanese', 'ko' => 'Korean', 'nb' => 'Norwegian Bokmål', 'nl' => 'Dutch', 'pl' => 'Polish', + 'pt' => 'Portuguese', 'pt-BR' => 'Portuguese (Brazil)', 'ro' => 'Romanian', 'ru' => 'Russian', + 'sk' => 'Slovak', 'sq' => 'Albanian', 'sv' => 'Swedish', 'th' => 'Thai', 'tr' => 'Turkish', + 'zh-Hans' => 'Chinese (Simplified)', 'zh-Hant' => 'Chinese (Traditional)' + }.freeze + + # `{{language}}` / `{{brands}}` are substituted by literal gsub (NOT `format`/`%`, which would choke on the + # literal `%@` / `%1$@` examples below). Shared by the single-string and plural prompts. + TRANSLATION_RULES = <<~PROMPT + You are an expert software localizer translating user-facing UI strings for the WordPress and Jetpack iOS apps into {{language}}. + + Rules: + - Translate into natural, concise {{language}} suitable for a mobile app UI. Screen space is limited, so prefer the shorter faithful phrasing. + - Keep these names EXACTLY as written, untranslated: {{brands}}. + - Preserve every format specifier (e.g. %@, %1$@, %d, %lld, %1$d) EXACTLY — same count and type. You may reorder positional specifiers such as %1$@ and %2$d to suit the target grammar, but each must appear exactly once and keep its number. + - Preserve any HTML tags, markup, and leading/trailing whitespace exactly as in the source. + - Do not translate URLs, email addresses, file paths, or code. + - Follow the tone and terminology conventions of the WordPress.org {{language}} translation community, including its formal/informal form-of-address convention. + PROMPT + + # Output instruction for a single string. + SINGLE_OUTPUT = 'Output ONLY the translated string — no quotation marks, no explanation, no notes, nothing else.' + + # Output instruction for a plural form-set. The consistency rule is the whole reason to translate the forms + # together (one request) rather than per category: it stops the model drifting between synonyms across forms + # (e.g. Polish słowo -> wyrazy -> słów), which a per-cell call structurally cannot prevent. + PLURAL_OUTPUT = <<~PROMPT + You are translating the plural forms of ONE UI string. Use a single consistent word and stem across every form — only the grammatical inflection (ending) changes between forms; never switch to a synonym between forms. + + Return ONLY a JSON object mapping each requested CLDR plural category to its translation, e.g. {"one": "...", "other": "..."}. No markdown fences, no commentary — just the JSON object. + PROMPT + + # Brief, locale-agnostic cue per CLDR category (the model knows the language's actual rules; this just + # disambiguates which form we're asking for). + CLDR_CUES = { + 'zero' => 'the zero form', + 'one' => 'singular (n = 1)', + 'two' => 'the dual form (n = 2)', + 'few' => 'the "few" form (e.g. 2-4 in many Slavic languages)', + 'many' => 'the "many" form (e.g. 5+ in many Slavic languages)', + 'other' => 'the general / catch-all form (also used for fractions)' + }.freeze + + # Default number of strings per batched request. Small enough to keep each JSON reply parseable and bound the + # blast radius if one reply is malformed (only that batch falls back to English); large enough to amortize the + # cached system prompt across many strings. + DEFAULT_BATCH_SIZE = 25 + + # Output instruction for a batch of independent strings (keyed by item number, not the long reverse-DNS key, + # so the model can't garble the mapping). + BATCH_OUTPUT = <<~PROMPT + You are translating a batch of independent UI strings. Translate each on its own; the items are unrelated unless a context note says otherwise. + + Return ONLY a JSON object mapping each item's number (as a string) to its translation, e.g. {"1": "...", "2": "..."}. Include every number you are given, and translate nothing else. No markdown fences, no commentary — just the JSON object. + PROMPT + + # @param complete [#call] callable invoked as `complete.call(system:, user:, schema: nil)` returning the + # model's raw text reply. Injected so the translator is testable without the SDK. + # @param glossary [Glossary] brand do-not-translate list + per-locale terms/register (translation_glossary.rb). + # @param language_names [Hash{String=>String}] lproj code → language name. + def initialize(complete:, glossary: Glossary.default, language_names: LANGUAGE_NAMES) + @complete = complete + @glossary = glossary + @language_names = language_names + end + + # Validated translation of `source` into `locale`, or nil if one can't be produced SAFELY: blank source, a + # blank/garbled reply, or — critically — a reply that breaks the format-specifier contract. + # + # @param source [String] the English source string. + # @param locale [String] target lproj code (e.g. "fr", "pt-BR", "zh-Hans"). + # @param context [String, nil] developer comment / context for the string (the `comment:` field). Feeding + # this is the single biggest quality lever, so pass it whenever available. + def translate(source:, locale:, context: nil) + source = source.to_s + return nil if source.strip.empty? + + candidate = clean(@complete.call(system: system_prompt(locale), user: user_prompt(source, context)).to_s) + return nil if candidate.empty? + return nil unless TranslationValidator.placeholders_match?(source, candidate) + + candidate + end + + # Adapter matching the `ai_translate_plural(id:, source:, category:, note:, locale:)` contract in + # `localization_plurals.rb`, so wiring the live tier is a one-line swap of the `ai_translator:` argument: + # translator = AITranslator.with_anthropic + # PluralStrings.fold_translations!(catalog, ..., ai_translator: translator.method(:for_plural)) + # rubocop:disable Lint/UnusedMethodArgument -- keyword names are the documented call contract + def for_plural(id:, source:, category:, note:, locale:) + translate(source: source, locale: locale, context: plural_context(note, category)) + end + # rubocop:enable Lint/UnusedMethodArgument + + # Translates a whole plural form-set for one key in a SINGLE request, so the model keeps one consistent + # word/stem across the forms (the fix for per-cell lemma drift). Returns { category => translation } for the + # requested categories, each placeholder-validated against its English source; forms that fail the gate or + # are absent from the reply are omitted, so the caller falls back to English (needs_review) for those. + # + # @param english_forms [Hash{String=>String}] English plural forms by CLDR category (must include "other"; + # a requested category with no English form of its own falls back to the "other" English value). + # @param categories [Array] the CLDR categories to produce (the ones the target locale needs). + # @param locale [String] target lproj code. + # @param note [String, nil] developer context / comment for the string. + # @param anchors [Hash{String=>String}] already-finalized (e.g. human-translated) forms — shown to the model + # as fixed context to stay consistent with, and excluded from what it is asked to produce. + def translate_plural(english_forms:, categories:, locale:, note: nil, anchors: {}) + english_forms = to_string_keys(english_forms) + anchors = to_string_keys(anchors) + return {} if english_forms['other'].to_s.strip.empty? + + needed = categories.map(&:to_s) - anchors.keys + return {} if needed.empty? + + reply = @complete.call( + system: plural_system_prompt(locale), + user: plural_user_prompt(english_forms, needed, note, anchors), + schema: object_schema(needed) + ) + validated_forms(parse_forms(reply), needed, english_forms) + end + + # Translates many independent strings in batched requests (default DEFAULT_BATCH_SIZE per request), returning + # { key => translation } for those that pass the placeholder gate. Strings absent from the result (gate + # failure, blank source, or a malformed batch reply) fall back to human/English at the call site. Pass the + # strings already sorted by key so each batch naturally groups one feature (reader.*, editor.*) — better + # terminology consistency within a batch. + # + # @param strings [Array] each { key:, source:, comment: } (string or symbol keys both accepted). + # @param locale [String] target lproj code. + # @param batch_size [Integer] strings per request. + def translate_all(strings, locale:, batch_size: DEFAULT_BATCH_SIZE) + items = batchable_items(strings) + return {} if items.empty? + + items.each_slice(batch_size).with_object({}) do |chunk, out| + out.merge!(translate_batch(chunk, locale)) + end + end + + # Builds Message Batch jobs for many strings across many locales (the async / cheaper bulk path). Returns + # { jobs:, manifest: }: `jobs` ({ custom_id:, system:, user:, schema: }) go to `AnthropicBatch.submit`; + # `manifest` (custom_id => { locale:, numbered: }) is handed back to `collect_batch` with the batch results. + # Pure — no model or SDK here; `AnthropicBatch.submit` adds the model when it builds the requests. + # + # @param strings_by_locale [Hash{String=>Array}] locale => array of { key:, source:, comment: }. + def prepare_batch(strings_by_locale, batch_size: DEFAULT_BATCH_SIZE) + jobs = [] + manifest = {} + strings_by_locale.each do |locale, strings| + batchable_items(strings).each_slice(batch_size).with_index do |chunk, index| + numbered = number_chunk(chunk) + custom_id = "#{locale}_#{index}" # must match ^[a-zA-Z0-9_-]{1,64}$; locale codes have hyphens, not underscores, so this stays unique + jobs << batch_job(custom_id, locale, numbered) + manifest[custom_id] = { locale: locale, numbered: numbered } + end + end + { jobs: jobs, manifest: manifest } + end + + # Validates the batch replies and assembles { locale => { key => translation } }. `texts_by_custom_id` comes + # from `AnthropicBatch.results`; `manifest` from `prepare_batch`. A custom_id with no reply (errored batch + # request) or a per-string gate failure simply doesn't appear → the caller falls back to human/English. Pure. + def collect_batch(texts_by_custom_id, manifest) + manifest.each_with_object({}) do |(custom_id, entry), result| + bucket = (result[entry[:locale]] ||= {}) + text = texts_by_custom_id[custom_id] + next if text.nil? + + bucket.merge!(validated_batch(parse_forms(text), entry[:numbered])) + end + end + + # Builds a translator backed by the Anthropic Ruby SDK (`gem 'anthropic'`, in the Gemfile) — needs + # ANTHROPIC_API_KEY in the env. This `complete` lambda is the only part of the file the unit tests don't + # exercise, by design: everything the tests cover stays on the pure side of the injection boundary. + def self.with_anthropic(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil), model: DEFAULT_MODEL, **) + client = AnthropicBatch.client(api_key: api_key) + complete = lambda do |system:, user:, schema: nil| + AnthropicBatch.text_of(client.messages.create(**AnthropicBatch.message_params(model: model, system: system, user: user, schema: schema))) + end + new(complete: complete, **) + rescue LoadError + raise LoadError, "The `anthropic` gem (in the Gemfile) isn't installed — run `bundle install` (or `gem install anthropic`)." + end + + private + + # Shared rule block (brands, format specifiers) with {{language}}/{{brands}} filled in, plus the glossary's + # per-locale terms + register note appended when present. + def render_rules(locale) + language = @language_names.fetch(locale, locale) + rules = TRANSLATION_RULES.gsub('{{language}}') { language }.gsub('{{brands}}') { @glossary.do_not_translate.join(', ') } + guidance = @glossary.guidance(locale) + guidance.empty? ? rules : "#{rules}\n#{guidance}" + end + + def system_prompt(locale) + "#{render_rules(locale)}\n#{SINGLE_OUTPUT}" + end + + def plural_system_prompt(locale) + "#{render_rules(locale)}\n#{PLURAL_OUTPUT}" + end + + def user_prompt(source, context) + parts = [] + parts << "Context: #{context}" if context && !context.to_s.strip.empty? + parts << "English source string:\n#{source}" + parts.join("\n\n") + end + + def plural_user_prompt(english_forms, needed, note, anchors) + sections = [] + sections << "Context: #{note}" if note && !note.to_s.strip.empty? + sections << "English source forms:\n#{format_forms(english_forms)}" + sections << "Already-finalized forms — match their exact word choice and stem, and do not re-output them:\n#{format_forms(anchors)}" unless anchors.empty? + catalog = needed.map { |category| " #{category} - #{CLDR_CUES.fetch(category, category)}" }.join("\n") + sections << "Translate these CLDR plural categories, returning a JSON object keyed exactly by these category names:\n#{catalog}" + sections.join("\n\n") + end + + def format_forms(forms) + forms.map { |category, value| " #{category} = #{value}" }.join("\n") + end + + # Keep only the parsed forms whose placeholders match their English source (the form's own English, or the + # "other" value for categories English doesn't distinguish). Failed/empty forms are dropped → English fallback. + def validated_forms(parsed, needed, english_forms) + other = english_forms['other'] + needed.each_with_object({}) do |category, out| + candidate = clean(parsed[category].to_s) + next if candidate.empty? + + source = english_forms[category] || other + out[category] = candidate if TranslationValidator.placeholders_match?(source, candidate) + end + end + + # JSON Schema for a flat object whose values are all required strings — passed as `output_config.format` to + # make the model emit exactly this shape (structured outputs). additionalProperties must be false; that's the + # only form structured outputs support, and it also stops the model inventing extra keys. + def object_schema(keys) + { + 'type' => 'object', + 'properties' => keys.to_h { |key| [key, { 'type' => 'string' }] }, + 'required' => keys, + 'additionalProperties' => false + } + end + + # Parse the model's JSON reply into { key => value }; tolerate ```json fences; {} on any parse failure + # (every entry then falls back to English — safe, though structured outputs make a failure very unlikely). + def parse_forms(reply) + text = reply.to_s.strip.sub(/\A```(?:json)?\s*/i, '').sub(/```\s*\z/, '').strip + data = JSON.parse(text) + data.is_a?(Hash) ? data : {} + rescue JSON::ParserError + {} + end + + def to_string_keys(hash) + (hash || {}).each_with_object({}) { |(key, value), acc| acc[key.to_s] = value } + end + + # One batched request: number the chunk, ask for a JSON {number => translation}, keep the validated ones. + def translate_batch(chunk, locale) + numbered = number_chunk(chunk) + reply = @complete.call( + system: batch_system_prompt(locale), + user: batch_user_prompt(numbered), + schema: object_schema(numbered.keys.map(&:to_s)) + ) + validated_batch(parse_forms(reply), numbered) + end + + # Map each numbered item to its validated translation by key; drop empty/placeholder-breaking ones. + def validated_batch(parsed, numbered) + numbered.each_with_object({}) do |(index, string), out| + candidate = clean(parsed[index.to_s].to_s) + next if candidate.empty? + + out[string[:key]] = candidate if TranslationValidator.placeholders_match?(string[:source], candidate) + end + end + + def batch_system_prompt(locale) + "#{render_rules(locale)}\n#{BATCH_OUTPUT}" + end + + def batch_user_prompt(numbered) + items = numbered.map { |index, string| batch_item_line(index, string) } + "Translate each numbered UI string below into the target language.\n\n#{items.join("\n")}" + end + + # One prompt line per string: number, the reverse-DNS key (UI-role context), the English, and the dev note. + def batch_item_line(index, string) + line = "[#{index}] " + line << "(#{string[:key]}) " unless string[:key].to_s.empty? + line << string[:source].to_s + line << " — #{string[:comment]}" unless string[:comment].to_s.strip.empty? + line + end + + def normalize_string(string) + { key: field(string, :key), source: field(string, :source), comment: field(string, :comment) } + end + + def field(hash, name) + hash[name] || hash[name.to_s] + end + + # Normalize to { key:, source:, comment: } hashes and drop entries with a blank source (nothing to translate). + def batchable_items(strings) + strings.map { |string| normalize_string(string) }.reject { |string| string[:source].to_s.strip.empty? } + end + + # Number a chunk 1..N → { 1 => string, … } (the index the model maps its JSON reply by). + def number_chunk(chunk) + chunk.each_with_index.to_h { |string, index| [index + 1, string] } + end + + def batch_job(custom_id, locale, numbered) + { + custom_id: custom_id, + system: batch_system_prompt(locale), + user: batch_user_prompt(numbered), + schema: object_schema(numbered.keys.map(&:to_s)) + } + end + + # Models occasionally wrap the answer in quotation marks or add a trailing newline despite the + # "only the translation" instruction; strip those cosmetic wrappers. Anything more substantial (a prose + # explanation that slipped through) almost always breaks the placeholder gate and is discarded there. + def clean(text) + stripped = text.strip + if stripped.length >= 2 && + ((stripped.start_with?('"') && stripped.end_with?('"')) || + (stripped.start_with?('“') && stripped.end_with?('”'))) + stripped = stripped[1...-1].strip + end + stripped + end + + # The dev note plus an explicit CLDR-category cue, so the model produces the correct grammatical plural + # form (e.g. the Polish `few` form) rather than guessing from the English source alone. + def plural_context(note, category) + [note, "Plural category: #{category}. Render the grammatically correct plural form for this category."] + .compact.reject(&:empty?).join(' ') + end +end + +# Tiny CLI to eyeball quality against the real model (needs the `anthropic` gem + ANTHROPIC_API_KEY): +# ruby fastlane/lanes/ai_translator.rb fr "You have %1$d new posts" "Notification text. %1$d is the count." +if __FILE__ == $PROGRAM_NAME + locale, source, context = ARGV + abort("usage: ruby #{File.basename(__FILE__)} \"\" [\"\"]") unless locale && source + + result = AITranslator.with_anthropic.translate(source: source, locale: locale, context: context) + puts result.nil? ? '(no safe translation — placeholder check failed or empty reply)' : result +end diff --git a/fastlane/lanes/ai_translator_test.rb b/fastlane/lanes/ai_translator_test.rb new file mode 100644 index 000000000000..1f0e77953547 --- /dev/null +++ b/fastlane/lanes/ai_translator_test.rb @@ -0,0 +1,289 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for AITranslator. Run directly: `ruby fastlane/lanes/ai_translator_test.rb`. +# Uses a canned-reply lambda for `complete:`, so it exercises all of the prompt-building / validation logic +# without the `anthropic` gem or the network. +require 'minitest/autorun' +require_relative 'ai_translator' + +# Exercises prompt-building and the validator gate via a canned-reply `complete:` lambda (no gem / network). +class AITranslatorTest < Minitest::Test # rubocop:disable Metrics/ClassLength -- exhaustive scenario coverage + # Builds a translator whose model "reply" is fixed, optionally recording the prompts it was called with. + def translator(reply:, prompts: nil) + complete = lambda do |system:, user:, schema: nil| + prompts&.replace({ system: system, user: user, schema: schema }) + reply + end + AITranslator.new(complete: complete) + end + + def test_returns_cleaned_translation + t = translator(reply: %("Réglages"\n)) # wrapped in quotes + trailing newline + assert_equal 'Réglages', t.translate(source: 'Settings', locale: 'fr') + end + + def test_accepts_a_reply_that_preserves_placeholders + t = translator(reply: '%2$@ wurde von %1$@ eingeladen') + assert_equal '%2$@ wurde von %1$@ eingeladen', + t.translate(source: '%1$@ invited %2$@', locale: 'de') + end + + def test_rejects_a_reply_that_breaks_placeholders + t = translator(reply: '%1$d Beiträge') # object → int: must be discarded + assert_nil t.translate(source: '%1$@ posts', locale: 'de') + end + + def test_blank_source_makes_no_model_call + called = false + complete = lambda do |**| + called = true + 'x' + end + t = AITranslator.new(complete: complete) + assert_nil t.translate(source: " \n", locale: 'fr') + refute called + end + + def test_blank_reply_returns_nil + assert_nil translator(reply: " \n").translate(source: 'Settings', locale: 'fr') + end + + def test_prompt_carries_language_brands_and_context + prompts = {} + t = translator(reply: 'Publier', prompts: prompts) + t.translate(source: 'Publish', locale: 'fr', context: 'Button to publish a post') + + assert_includes prompts[:system], 'French' + assert_includes prompts[:system], 'WordPress' + assert_includes prompts[:user], 'Button to publish a post' + assert_includes prompts[:user], 'Publish' + end + + def test_for_plural_adapter_maps_arguments_and_cues_category + prompts = {} + t = translator(reply: '%1$d Beiträge pro Woche', prompts: prompts) + out = t.for_plural( + id: 'blogging.reminders.weeklyCount|==|plural.other', + source: '%1$d times a week', + category: 'other', + note: 'Number of blogging reminders per week.', + locale: 'de' + ) + + assert_equal '%1$d Beiträge pro Woche', out + assert_includes prompts[:user], 'Number of blogging reminders per week.' + assert_includes prompts[:user], 'other' # the CLDR-category cue reaches the prompt + end + + def test_translate_plural_returns_all_requested_forms + reply = '{"one":"%1$ld słowo","few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}' + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few many other], locale: 'pl', note: 'Number of words.' + ) + assert_equal( + { 'one' => '%1$ld słowo', 'few' => '%1$ld słowa', 'many' => '%1$ld słów', 'other' => '%1$ld słowa' }, out + ) + end + + def test_translate_plural_drops_a_form_that_breaks_placeholders + # 'few' switched %1$ld -> %1$d (length change) — drop it; the rest survive. + reply = '{"one":"%1$ld słowo","few":"%1$d słowa","other":"%1$ld słowa"}' + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few other], locale: 'pl' + ) + assert_equal %w[one other], out.keys.sort + refute out.key?('few') + end + + def test_translate_plural_excludes_anchors_and_passes_them_as_context + prompts = {} + reply = '{"few":"%1$ld słowa","many":"%1$ld słów","other":"%1$ld słowa"}' + out = translator(reply: reply, prompts: prompts).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one few many other], locale: 'pl', anchors: { 'one' => '%1$ld słowo' } + ) + refute out.key?('one') # human-anchored — not produced + assert_equal %w[few many other], out.keys.sort + assert_includes prompts[:user], '%1$ld słowo' # anchor shown to the model as fixed context + end + + def test_translate_plural_falls_back_to_empty_on_bad_json + out = translator(reply: 'sorry — here are your forms!').translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one other], locale: 'pl' + ) + assert_empty out + end + + def test_translate_plural_tolerates_json_code_fences + reply = "```json\n{\"one\":\"%1$ld słowo\",\"other\":\"%1$ld słowa\"}\n```" + out = translator(reply: reply).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[one other], locale: 'pl' + ) + assert_equal({ 'one' => '%1$ld słowo', 'other' => '%1$ld słowa' }, out) + end + + def test_translate_plural_validates_fallback_category_against_other + # 'many' has no English form of its own → validated against the English 'other' (%1$ld words). + out = translator(reply: '{"many":"%1$ld słów"}').translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, + categories: %w[many], locale: 'pl' + ) + assert_equal({ 'many' => '%1$ld słów' }, out) + end + + def test_translate_all_maps_keys_and_validates + reply = '{"1":"Réglages","2":"%1$@ articles"}' + out = translator(reply: reply).translate_all( + [{ key: 'settings.title', source: 'Settings', comment: 'Screen title' }, + { key: 'posts.count', source: '%1$@ posts', comment: 'Count' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages', 'posts.count' => '%1$@ articles' }, out) + end + + def test_translate_all_drops_a_placeholder_breaker + reply = '{"1":"Réglages","2":"%1$d articles"}' # item 2 changed %1$@ -> %1$d + out = translator(reply: reply).translate_all( + [{ key: 'settings.title', source: 'Settings' }, { key: 'posts.count', source: '%1$@ posts' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages' }, out) + refute out.key?('posts.count') + end + + def test_translate_all_skips_blank_sources + out = translator(reply: '{"1":"Réglages"}').translate_all( + [{ key: 'settings.title', source: 'Settings' }, { key: 'blank', source: ' ' }], + locale: 'fr' + ) + assert_equal({ 'settings.title' => 'Réglages' }, out) + end + + def test_translate_all_chunks_and_merges + calls = 0 + complete = lambda do |**| + calls += 1 + '{"1":"x","2":"y"}' + end + out = AITranslator.new(complete: complete).translate_all( + [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }], + locale: 'fr', batch_size: 2 + ) + assert_equal 2, calls # 3 items / batch 2 = 2 requests + assert_equal({ 'a' => 'x', 'b' => 'y', 'c' => 'x' }, out) + end + + def test_translate_all_bad_json_batch_falls_back + out = translator(reply: 'not json at all').translate_all([{ key: 'a', source: 'One' }], locale: 'fr') + assert_empty out + end + + def test_translate_all_empty_input_makes_no_call + called = false + complete = lambda do |**| + called = true + '{}' + end + assert_empty AITranslator.new(complete: complete).translate_all([], locale: 'fr') + refute called + end + + def test_translate_all_prompt_carries_key_context_and_language + prompts = {} + translator(reply: '{"1":"Publier"}', prompts: prompts).translate_all( + [{ key: 'editor.publish', source: 'Publish', comment: 'Publish button' }], locale: 'fr' + ) + assert_includes prompts[:system], 'French' + assert_includes prompts[:user], 'editor.publish' + assert_includes prompts[:user], 'Publish button' + assert_includes prompts[:user], 'Publish' + end + + def test_translate_plural_passes_a_schema_of_its_categories + prompts = {} + translator(reply: '{"one":"%1$ld słowo","other":"%1$ld słowa"}', prompts: prompts).translate_plural( + english_forms: { 'one' => '%1$ld word', 'other' => '%1$ld words' }, categories: %w[one other], locale: 'pl' + ) + assert_equal %w[one other], prompts[:schema]['required'].sort + assert_equal false, prompts[:schema]['additionalProperties'] + end + + def test_translate_all_passes_a_numbered_schema + prompts = {} + translator(reply: '{"1":"a","2":"b"}', prompts: prompts).translate_all( + [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }], locale: 'fr' + ) + assert_equal %w[1 2], prompts[:schema]['required'].sort + end + + def test_single_translate_passes_no_schema + prompts = {} + translator(reply: 'Publier', prompts: prompts).translate(source: 'Publish', locale: 'fr') + assert_nil prompts[:schema] + end + + def test_glossary_terms_and_register_reach_the_prompt + prompts = {} + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' }) + complete = lambda do |system:, user:, schema: nil| + prompts.replace({ system: system, user: user, schema: schema }) + 'Publier' + end + AITranslator.new(complete: complete, glossary: glossary).translate(source: 'Publish', locale: 'fr') + assert_includes prompts[:system], 'post -> article' + assert_includes prompts[:system], 'Register: Use formal vous.' + end + + def test_prepare_batch_chunks_each_locale_into_jobs + prep = translator(reply: '{}').prepare_batch( + { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }, { key: 'c', source: 'Three' }], + 'de' => [{ key: 'a', source: 'One' }] }, + batch_size: 2 + ) + assert_equal(%w[fr_0 fr_1 de_0], prep[:jobs].map { |job| job[:custom_id] }) + assert_equal %w[1 2], prep[:jobs].first[:schema]['required'].sort + end + + def test_prepare_batch_manifest_maps_custom_id_to_locale_and_strings + prep = translator(reply: '{}').prepare_batch( + { 'fr' => [{ key: 'a', source: 'One' }, { key: 'b', source: 'Two' }] }, batch_size: 25 + ) + assert_equal 'fr', prep[:manifest]['fr_0'][:locale] + assert_equal(%w[a b], prep[:manifest]['fr_0'][:numbered].values.map { |string| string[:key] }) + end + + def test_prepare_batch_custom_ids_match_the_api_pattern + # The Batch API requires custom_id =~ ^[a-zA-Z0-9_-]{1,64}$ — hyphenated locales like pt-BR must still pass. + prep = translator(reply: '{}').prepare_batch({ 'pt-BR' => [{ key: 'a', source: 'One' }] }, batch_size: 25) + prep[:jobs].each { |job| assert_match(/\A[a-zA-Z0-9_-]{1,64}\z/, job[:custom_id]) } + end + + def test_collect_batch_validates_and_groups_by_locale + t = translator(reply: '{}') + prep = t.prepare_batch( + { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25 + ) + texts = { 'fr_0' => '{"1":"Réglages","2":"%1$@ éléments"}' } + assert_equal({ 'fr' => { 'settings' => 'Réglages', 'count' => '%1$@ éléments' } }, + t.collect_batch(texts, prep[:manifest])) + end + + def test_collect_batch_drops_invalid_and_missing + t = translator(reply: '{}') + prep = t.prepare_batch( + { 'fr' => [{ key: 'settings', source: 'Settings' }, { key: 'count', source: '%1$@ items' }] }, batch_size: 25 + ) + texts = { 'fr_0' => '{"1":"Réglages","2":"%1$d éléments"}' } # item 2 breaks the placeholder + assert_equal({ 'fr' => { 'settings' => 'Réglages' } }, t.collect_batch(texts, prep[:manifest])) + end + + def test_collect_batch_handles_a_missing_batch_reply + t = translator(reply: '{}') + prep = t.prepare_batch({ 'fr' => [{ key: 'a', source: 'One' }] }, batch_size: 25) + assert_equal({ 'fr' => {} }, t.collect_batch({}, prep[:manifest])) + end +end diff --git a/fastlane/lanes/anthropic_batch.rb b/fastlane/lanes/anthropic_batch.rb new file mode 100644 index 000000000000..89ec02dd62e2 --- /dev/null +++ b/fastlane/lanes/anthropic_batch.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require 'json' + +# SDK glue for the Anthropic Ruby client: the message create-params shape, response-text extraction, and the +# Message Batches submit/poll/collect cycle. Isolated here so `AITranslator` stays pure prompt-building + +# validation, and all knowledge of the SDK's request/response shape lives in ONE place — the synchronous path +# (`AITranslator.with_anthropic`) and the async batch path share `message_params` / `text_of`, so the request +# shape can't drift between them. +# +# The batch path is the cost/throughput lever for a full backfill: one async job covering many (locale, chunk) +# requests at ~50% the per-token price. Flow: `AITranslator#prepare_batch` → `submit` → poll `ready?` → +# `results` → `AITranslator#collect_batch`. +module AnthropicBatch + MAX_TOKENS = 8192 # generous so a batch's JSON object can't truncate (a truncated reply fails the JSON parse) + + module_function + + # `messages.create` params for one request; adds output_config (structured outputs) when a schema is given. + def message_params(model:, system:, user:, schema: nil) + params = { + model: model.to_sym, + max_tokens: MAX_TOKENS, + system_: [{ type: 'text', text: system, cache_control: { type: 'ephemeral' } }], + messages: [{ role: 'user', content: user }] + } + params[:output_config] = { format: { type: :json_schema, schema: schema } } unless schema.nil? + params + end + + # Concatenate the text blocks of a Message response. + def text_of(message) + message.content.select { |block| block.type == :text }.map(&:text).join("\n") + end + + # Submit jobs ({ custom_id:, system:, user:, schema: }) as one Message Batch; returns the batch id. + def submit(jobs, client:, model:) + requests = jobs.map do |job| + { custom_id: job[:custom_id], params: message_params(model: model, system: job[:system], user: job[:user], schema: job[:schema]) } + end + client.messages.batches.create(requests: requests).id + end + + # True once the batch has finished processing (results are available to stream). + def ready?(batch_id, client:) + client.messages.batches.retrieve(batch_id).processing_status.to_s == 'ended' + end + + # { custom_id => reply text } for the succeeded requests. `results_streaming` yields raw JSONL lines (one per + # request) — the SDK's lenient coercion passes the line through as a String — so each is parsed here. + # Errored/expired/canceled entries (and any unparseable line) are skipped, so the strings they covered fall + # back to human/English at collect time. + def results(batch_id, client:) + client.messages.batches.results_streaming(batch_id).each_with_object({}) do |line, out| + record = parse_line(line) + result = record['result'] || {} + out[record['custom_id']] = content_text(result.dig('message', 'content')) if result['type'] == 'succeeded' + end + end + + # Parse a JSONL result line into a Hash; {} on anything unparseable. Tolerates a Hash (already parsed). + def parse_line(line) + line.is_a?(String) ? JSON.parse(line) : line + rescue JSON::ParserError + {} + end + + # Join the text blocks of a parsed message-content array (Hash blocks, not the typed objects `text_of` takes). + def content_text(content) + Array(content).select { |block| block['type'] == 'text' }.map { |block| block['text'] }.join("\n") + end + + # Poll until the batch finishes, then return its results (same shape as `results`); returns nil if it hasn't + # finished within `timeout`. `interval`/`timeout` are seconds; `sleeper` is injected so tests run instantly. + # Yields elapsed seconds after each not-ready check (progress reporting). Timeout is approximate (summed + # intervals, not wall clock). + # + # This is the simple synchronous "submit and wait" mechanism. For a huge backfill that may run for a long + # time, prefer submitting, persisting the batch id, and collecting in a later step over blocking on this — + # `submit` returns the id immediately, and `ready?` / `results` let a separate step pick it up. + def await(batch_id, client:, interval: 30, timeout: 3600, sleeper: ->(seconds) { sleep(seconds) }) + waited = 0 + loop do + return results(batch_id, client: client) if ready?(batch_id, client: client) + return nil if waited >= timeout + + yield waited if block_given? + sleeper.call(interval) + waited += interval + end + end + + # A raw Anthropic client for the batch calls (needs the `anthropic` gem + ANTHROPIC_API_KEY). + def client(api_key: ENV.fetch('ANTHROPIC_API_KEY', nil)) + require 'anthropic' + Anthropic::Client.new(api_key: api_key) + end +end diff --git a/fastlane/lanes/anthropic_batch_test.rb b/fastlane/lanes/anthropic_batch_test.rb new file mode 100644 index 000000000000..978aa104203f --- /dev/null +++ b/fastlane/lanes/anthropic_batch_test.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for AnthropicBatch. Run: `ruby fastlane/lanes/anthropic_batch_test.rb`. +# Drives the submit / poll / results glue against a fake client that mimics the SDK's shape (no gem, no network). +require 'minitest/autorun' +require 'json' +require_relative 'anthropic_batch' + +# Exercises the submit / poll / results glue via a fake client that mimics the SDK shape. `create`/`retrieve` +# return typed-ish objects (a Batch struct); `results_streaming` yields raw JSONL strings, as the real SDK does. +class AnthropicBatchTest < Minitest::Test + Batch = Struct.new(:id, :processing_status) + + # Mimics client.messages.batches.{create,retrieve,results_streaming}. + class FakeBatches + attr_reader :created_requests + + def initialize(status:, entries:, ready_after: nil) + @status = status + @entries = entries + @ready_after = ready_after # report :ended only once `retrieve` has been called this many times + @retrieve_calls = 0 + end + + def create(requests:) + @created_requests = requests + Batch.new('batch_1', :in_progress) + end + + def retrieve(_id) + @retrieve_calls += 1 + Batch.new('batch_1', effective_status) + end + + def results_streaming(_id) + @entries + end + + private + + def effective_status + return @status if @ready_after.nil? + + @retrieve_calls >= @ready_after ? :ended : :in_progress + end + end + + def fake_client(status: :ended, entries: [], ready_after: nil) + batches = FakeBatches.new(status: status, entries: entries, ready_after: ready_after) + Struct.new(:messages).new(Struct.new(:batches).new(batches)) + end + + # Build a raw JSONL result line, the way results_streaming yields them. + def succeeded_line(custom_id, json) + JSON.generate('custom_id' => custom_id, + 'result' => { 'type' => 'succeeded', 'message' => { 'content' => [{ 'type' => 'text', 'text' => json }] } }) + end + + def errored_line(custom_id) + JSON.generate('custom_id' => custom_id, 'result' => { 'type' => 'errored' }) + end + + def test_message_params_adds_output_config_only_with_a_schema + bare = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u') + refute bare.key?(:output_config) + assert_equal :'claude-opus-4-8', bare[:model] + + with_schema = AnthropicBatch.message_params(model: 'claude-opus-4-8', system: 's', user: 'u', schema: { 'type' => 'object' }) + assert_equal({ format: { type: :json_schema, schema: { 'type' => 'object' } } }, with_schema[:output_config]) + end + + def test_submit_builds_requests_and_returns_the_id + client = fake_client + jobs = [{ custom_id: 'fr_0', system: 'sys', user: 'usr', schema: { 'type' => 'object' } }] + id = AnthropicBatch.submit(jobs, client: client, model: 'claude-opus-4-8') + + assert_equal 'batch_1', id + request = client.messages.batches.created_requests.first + assert_equal 'fr_0', request[:custom_id] + assert_equal :'claude-opus-4-8', request[:params][:model] + end + + def test_ready_reflects_processing_status + refute AnthropicBatch.ready?('b', client: fake_client(status: :in_progress)) + assert AnthropicBatch.ready?('b', client: fake_client(status: :ended)) + end + + def test_results_returns_text_for_succeeded_requests_only + entries = [succeeded_line('fr_0', '{"1":"Bonjour"}'), errored_line('fr_1')] + out = AnthropicBatch.results('b', client: fake_client(entries: entries)) + assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out) + end + + def test_await_polls_until_ready_then_returns_results + client = fake_client(ready_after: 3, entries: [succeeded_line('fr_0', '{"1":"Bonjour"}')]) + out = AnthropicBatch.await('b', client: client, interval: 1, sleeper: ->(_seconds) {}) + assert_equal({ 'fr_0' => '{"1":"Bonjour"}' }, out) + end + + def test_await_returns_nil_on_timeout + client = fake_client(status: :in_progress) + assert_nil AnthropicBatch.await('b', client: client, interval: 30, timeout: 60, sleeper: ->(_seconds) {}) + end +end diff --git a/fastlane/lanes/translation_glossary.rb b/fastlane/lanes/translation_glossary.rb new file mode 100644 index 000000000000..c7ed0d6a5c2e --- /dev/null +++ b/fastlane/lanes/translation_glossary.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +# Terminology configuration for the translator: brand/product names kept verbatim, plus per-locale glossary +# terms (the preferred translation for an English term) and a register/style note. A pure value object — +# SOURCING this data (the WordPress.org per-locale glossaries + style guides, a committed YAML, …) is +# pre-processing done elsewhere and handed in here, so this stays I/O-free and unit-testable. +class Glossary + # Brand / product proper nouns kept verbatim in every locale. Deliberately tight to unambiguous proper nouns + # — feature words locales legitimately translate ("Reader", "Stats") are intentionally NOT here. + DEFAULT_DO_NOT_TRANSLATE = [ + 'WordPress', 'WordPress.com', 'Jetpack', 'WooCommerce', 'Woo', + 'Akismet', 'Gravatar', 'Gutenberg', 'Tumblr', 'Simplenote', 'Crowdsignal' + ].freeze + + attr_reader :do_not_translate + + # @param do_not_translate [Array] brand/product names kept verbatim. + # @param terms [Hash{String=>Hash{String=>String}}] locale => { english term => preferred translation }. + # @param register [Hash{String=>String}] locale => style/register note (e.g. "Use the informal 'du' form."). + def initialize(do_not_translate: DEFAULT_DO_NOT_TRANSLATE, terms: {}, register: {}) + @do_not_translate = do_not_translate + @terms = terms + @register = register + end + + # The default brand-only glossary (no per-locale terms or register). + def self.default + new + end + + # Prompt fragment with this locale's preferred terms + register note (or '' if neither applies). Appended to + # the shared rules so the model uses the community's terminology and tone. + def guidance(locale) + [term_guidance(locale), register_note(locale)].reject(&:empty?).join("\n") + end + + private + + def term_guidance(locale) + pairs = @terms[locale] + return '' if pairs.nil? || pairs.empty? + + lines = pairs.map { |english, translation| " #{english} -> #{translation}" } + "Use these exact translations for these terms, consistently:\n#{lines.join("\n")}" + end + + def register_note(locale) + note = @register[locale].to_s.strip + note.empty? ? '' : "Register: #{note}" + end +end diff --git a/fastlane/lanes/translation_glossary_test.rb b/fastlane/lanes/translation_glossary_test.rb new file mode 100644 index 000000000000..4bc29817f3e2 --- /dev/null +++ b/fastlane/lanes/translation_glossary_test.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for Glossary. Run directly: `ruby fastlane/lanes/translation_glossary_test.rb`. +require 'minitest/autorun' +require_relative 'translation_glossary' + +# Covers the brand list, per-locale term guidance, register note, the combination, and empty cases. +class GlossaryTest < Minitest::Test + def test_default_is_brands_only + glossary = Glossary.default + assert_includes glossary.do_not_translate, 'WordPress' + assert_equal '', glossary.guidance('fr') + end + + def test_term_guidance_is_per_locale + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article', 'tag' => 'étiquette' } }) + assert_includes glossary.guidance('fr'), 'post -> article' + assert_includes glossary.guidance('fr'), 'tag -> étiquette' + assert_equal '', glossary.guidance('de') # no terms for de + end + + def test_register_note + glossary = Glossary.new(register: { 'de' => "Use the informal 'du' form." }) + assert_includes glossary.guidance('de'), "Register: Use the informal 'du' form." + end + + def test_terms_and_register_combined + glossary = Glossary.new(terms: { 'fr' => { 'post' => 'article' } }, register: { 'fr' => 'Use formal vous.' }) + guidance = glossary.guidance('fr') + assert_includes guidance, 'post -> article' + assert_includes guidance, 'Register: Use formal vous.' + end + + def test_custom_do_not_translate + assert_equal %w[Foo Bar], Glossary.new(do_not_translate: %w[Foo Bar]).do_not_translate + end +end diff --git a/fastlane/lanes/translation_validator.rb b/fastlane/lanes/translation_validator.rb new file mode 100644 index 000000000000..29b9e9c887fa --- /dev/null +++ b/fastlane/lanes/translation_validator.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +# Format-specifier safety gate for machine-translated strings. +# +# The one correctness invariant for a translated `.strings` / `.xcstrings` value: it must preserve the +# source's printf / NSString format ARGUMENTS exactly — same count, same types, and (for positional +# `%1$@` specifiers) the same index→type mapping. The surrounding prose is free to change; the argument +# contract is not. Break it and the app reads the wrong vararg off the stack — a crash or garbage at +# runtime, in a locale the author can't read and CI can't catch. +# +# This is deliberately plain Ruby with no dependencies, so it can gate EVERY machine translation before it +# is written and be unit-tested directly. It's the floor under the `human ?? AI ?? English` resolution in +# `PluralStrings.fold_cell`: an AI cell that fails this check is discarded (the caller falls through to the +# English source, flagged needs_review) rather than shipped. +module TranslationValidator + module_function + + # printf / NSString format specifier: optional positional `N$`, flags, width, precision, length modifier, + # conversion. The space flag (`% d`) is deliberately EXCLUDED — exactly as `CatalogHelper::FORMAT_SPECIFIER` + # excludes it — because `% ` matches inside ordinary prose ("100% done" → "% d"), which would make + # the validator hallucinate an argument in plain text and reject a perfectly good translation. + FORMAT_SPECIFIER = / + % # leading percent + (?:(?\d+)\$)? # optional positional index: 1$, 2$, … + [\#0\-+']* # flags (NOT space — see note above) + (?:\d+|\*)? # field width + (?:\.(?:\d+|\*))? # precision + (?hh|h|ll|l|L|q|z|t|j)? # length modifier + (?[@dDiuUxXoOfFeEgGaAcCsSpn%]) # conversion + /x + + # Conversion char → coarse argument type-class. We compare by class, not by exact letter, so cosmetic + # swaps that don't change the consumed argument (`%x`↔`%X`, `%d`↔`%i`) pass, while a real type change + # that WOULD crash (`%@`→`%d`: object vs integer) is caught. The length modifier is kept separately in the + # signature, because `%d`↔`%ld` is a genuine ABI difference (int vs long) that can crash on mismatch. + TYPE_CLASS = { + '@' => :object, + 'd' => :int, 'D' => :int, 'i' => :int, 'u' => :int, 'U' => :int, + 'x' => :int, 'X' => :int, 'o' => :int, 'O' => :int, + 'f' => :float, 'F' => :float, 'e' => :float, 'E' => :float, + 'g' => :float, 'G' => :float, 'a' => :float, 'A' => :float, + 's' => :cstring, 'S' => :cstring, 'c' => :char, 'C' => :char, 'p' => :pointer + }.freeze + private_constant :TYPE_CLASS + + # Two parallel views of a string's format arguments: + # positional — { index => "length:type-class" }; order-INDEPENDENT (reordering `%1$@`/`%2$@` to suit + # target grammar is the whole point of positional specifiers). + # sequential — [ "length:type-class", … ]; order-DEPENDENT (a non-positional specifier's argument is + # bound by appearance order, so `%@ %d` and `%d %@` are NOT interchangeable). + # `%%` (a literal percent) consumes no argument and is excluded from both. + Signature = Struct.new(:positional, :sequential) + private_constant :Signature + + # True when `candidate` preserves `source`'s format-argument contract. + def placeholders_match?(source, candidate) + mismatch_reason(source, candidate).nil? + end + + # nil when the contract is preserved; otherwise a short human-readable reason (for logging which AI cells + # were rejected and why). + def mismatch_reason(source, candidate) + src = signature(source) + cand = signature(candidate) + + if src.positional != cand.positional + "positional placeholders differ (source: #{describe_positional(src.positional)}; " \ + "translation: #{describe_positional(cand.positional)})" + elsif src.sequential != cand.sequential + "sequential placeholders differ (source: #{src.sequential.inspect}; translation: #{cand.sequential.inspect})" + end + end + + # Parsed argument signature of `str` (see the Signature struct above). + def signature(str) + positional = {} + sequential = [] + each_specifier(str.to_s) do |match| + next if match[:conv] == '%' # literal %% — not an argument + + token = "#{match[:length]}:#{TYPE_CLASS.fetch(match[:conv], match[:conv])}" + if match[:position] + positional[match[:position].to_i] = token + else + sequential << token + end + end + Signature.new(positional, sequential) + end + + # Yields each format-specifier MatchData in appearance order. Scans forward from the end of each match, so + # adjacent specifiers (`%d%@`) and specifiers embedded in text are all found. + def each_specifier(str) + pos = 0 + while (match = FORMAT_SPECIFIER.match(str, pos)) + yield match + pos = match.end(0) + end + end + private_class_method :each_specifier + + def describe_positional(positional) + return 'none' if positional.empty? + + positional.sort.map { |index, token| "%#{index}$(#{token})" }.join(', ') + end + private_class_method :describe_positional +end diff --git a/fastlane/lanes/translation_validator_test.rb b/fastlane/lanes/translation_validator_test.rb new file mode 100644 index 000000000000..8cbd91a2f38f --- /dev/null +++ b/fastlane/lanes/translation_validator_test.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +# Pure-Ruby unit suite for TranslationValidator. Run directly: `ruby fastlane/lanes/translation_validator_test.rb`. +require 'minitest/autorun' +require_relative 'translation_validator' + +# Exercises the format-specifier contract: positional reordering allowed, type/length/count changes rejected. +class TranslationValidatorTest < Minitest::Test + V = TranslationValidator + + def test_no_specifiers_anything_matches + assert V.placeholders_match?('Settings', 'Réglages') + assert V.placeholders_match?('', '') + end + + def test_positional_reordering_is_allowed + # Reordering %1$@ / %2$@ to suit target grammar is the whole point of positional specifiers. + assert V.placeholders_match?('%1$@ invited %2$@', '%2$@ wurde von %1$@ eingeladen') + end + + def test_positional_type_change_is_rejected + # %1$@ (object) → %1$d (int) would read the wrong vararg — a crash vector. + refute V.placeholders_match?('%1$@ posts', '%1$d posts') + end + + def test_sequential_order_must_be_preserved + refute V.placeholders_match?('%@: %d', '%d : %@') # flipped non-positional args + assert V.placeholders_match?('%@: %d', 'Total %@: %d') # same order, prose changed + end + + def test_count_mismatch_is_rejected + refute V.placeholders_match?('Hello %@', 'Bonjour') # dropped an argument + refute V.placeholders_match?('Hello %@', 'Bonjour %@ %@') # added an argument + end + + def test_literal_percent_is_ignored + assert V.placeholders_match?('100% done', '100% terminé') # no real specifier (space after %) + assert V.placeholders_match?('%d%% complete', '%d%% terminé') # %% literal, %d preserved + refute V.placeholders_match?('%d%% complete', '%% terminé') # dropped the %d argument + end + + def test_length_modifier_change_is_rejected + # %ld (long) → %d (int) is a genuine ABI difference that can crash on mismatch. + refute V.placeholders_match?('%1$ld words', '%1$d words') + assert V.placeholders_match?('%1$ld words', '%1$ld mots') + end + + def test_case_only_conversion_change_is_allowed + assert V.placeholders_match?('%x', '%X') # cosmetic; same integer type-class + end + + def test_mismatch_reason_is_descriptive + reason = V.mismatch_reason('%1$@ posts', '%1$d posts') + refute_nil reason + assert_includes reason, 'positional' + + assert_nil V.mismatch_reason('%1$@ invited %2$@', '%2$@ a invité %1$@') + end +end From 74128501416124f3961c99d3c6756afd54be5fcd Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:55:21 -0600 Subject: [PATCH 2/2] Localization: run the AI translation tooling unit tests in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pure-Ruby unit suites (TranslationValidator, Glossary, AnthropicBatch, AITranslator) weren't executed by any pipeline step — the "Unit Tests" jobs are the Xcode/XCTest suites, and rubocop (via Danger) only lints them. Add a lightweight Buildkite step that runs each fastlane/lanes/*_test.rb with plain ruby (stdlib minitest — no Xcode, no app build, no bundle). Runs unconditionally rather than behind should-skip-job.sh --job-type validation, which skips on tooling-only changes — i.e. exactly the PRs that touch these files. --- .../commands/test-localization-tooling.sh | 22 +++++++++++++++++++ .buildkite/pipeline.yml | 7 ++++++ 2 files changed, 29 insertions(+) create mode 100755 .buildkite/commands/test-localization-tooling.sh diff --git a/.buildkite/commands/test-localization-tooling.sh b/.buildkite/commands/test-localization-tooling.sh new file mode 100755 index 000000000000..2e129cd95470 --- /dev/null +++ b/.buildkite/commands/test-localization-tooling.sh @@ -0,0 +1,22 @@ +#!/bin/bash -eu + +# Runs the localization tooling's pure-Ruby unit suites (stdlib minitest — no Xcode, no app build, no bundle). +# Intentionally always runs (no should-skip-job guard): these guard the fastlane localization helpers, and the +# `validation` skip rule skips on tooling-only changes — exactly when these tests matter most. + +echo "--- :test_tube: Localization tooling unit tests" + +shopt -s nullglob +tests=(fastlane/lanes/*_test.rb) +if [[ ${#tests[@]} -eq 0 ]]; then + echo "No *_test.rb files found under fastlane/lanes/." + exit 0 +fi + +status=0 +for test in "${tests[@]}"; do + echo "+++ :ruby: ${test}" + ruby "${test}" || status=1 +done + +exit "${status}" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6cb0910aa3fe..9ef4cd1b7e58 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -163,6 +163,13 @@ steps: - github_commit_status: context: "Verify String Catalog Coverage" + - label: ":test_tube: Localization Tooling Unit Tests" + command: .buildkite/commands/test-localization-tooling.sh + plugins: [$CI_TOOLKIT_PLUGIN] + notify: + - github_commit_status: + context: "Localization Tooling Unit Tests" + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait #################