11import os
2- import sys
32import json
43import logging
54from anthropic import Anthropic
6- from translation import Translation
75from iso639 import Lang
86from iso639 .exceptions import InvalidLanguageValue
97
@@ -28,78 +26,78 @@ def narrow_down_source_and_target(source_content, target_content, missing_keys):
2826 """
2927 # Find common keys between source and target
3028 common_keys = [k for k in source_content if k in target_content ]
31-
29+
3230 # Find the maximum number of parts in any missing key
3331 max_parts = 0
3432 for key in missing_keys :
3533 parts = key .split ('.' )
3634 max_parts = max (max_parts , len (parts ))
37-
35+
3836 # Start with an empty set of related keys
3937 related_keys = set ()
40-
38+
4139 # Iterate from max_parts-1 down to 0
4240 for num_matching_parts in range (max_parts - 1 , - 1 , - 1 ):
4341 # For each common key, check if it matches any missing key with at least num_matching_parts
4442 for common_key in common_keys :
4543 # Skip keys we've already added
4644 if common_key in related_keys :
4745 continue
48-
46+
4947 common_parts = common_key .split ('.' )
50-
48+
5149 # Check against each missing key
5250 for missing_key in missing_keys :
5351 missing_parts = missing_key .split ('.' )
54-
52+
5553 # Check if we have at least num_matching_parts matching parts
5654 matches = 0
5755 for i in range (min (len (common_parts ), len (missing_parts ))):
5856 if common_parts [i ] == missing_parts [i ]:
5957 matches += 1
6058 else :
6159 break
62-
60+
6361 # If we have enough matching parts, add this key to our context
6462 if matches >= num_matching_parts :
6563 related_keys .add (common_key )
6664 break
67-
65+
6866 # If we have enough context keys, we can stop
6967 if len (related_keys ) >= 5 :
7068 break
71-
69+
7270 # Convert to list and limit to 5 keys if needed
7371 related_keys = list (related_keys )
7472 if len (related_keys ) > 5 :
7573 related_keys = related_keys [:5 ]
76-
74+
7775 # Create narrowed down dictionaries with only the keys to keep
7876 narrowed_source = {k : source_content [k ] for k in related_keys if k in source_content }
7977 narrowed_target = {k : target_content [k ] for k in related_keys if k in target_content }
80-
78+
8179 # Add missing keys to the source dictionary
8280 for key in missing_keys :
8381 if key in source_content :
8482 narrowed_source [key ] = source_content [key ]
85-
83+
8684 return narrowed_source , narrowed_target , missing_keys
8785
8886def fill_up_gaps_in_content (source_content , target_content , missing_keys_list , language_code ):
8987 # Get API key from environment if not provided
9088 api_key = os .getenv ('ANTHROPIC_API_KEY' )
9189 if not api_key :
9290 raise ValueError ("API key must be set in ANTHROPIC_API_KEY environment variable" )
93-
91+
9492 # Create client
9593 client = Anthropic (api_key = api_key )
96-
94+
9795 # Get the full language name
9896 language_name = get_language_name (language_code )
99-
97+
10098 # Check if target has any existing translations
10199 has_existing_translations = len (target_content ) > 0
102-
100+
103101 if has_existing_translations :
104102 # Original prompt when we have existing translations to learn from
105103 prompt = f"""Here are two translation files:
@@ -140,7 +138,7 @@ def fill_up_gaps_in_content(source_content, target_content, missing_keys_list, l
140138 schema_properties = {}
141139 for key in missing_keys_list :
142140 schema_properties [key ] = {"type" : "string" }
143-
141+
144142 # Build the required keys list
145143 required_keys = missing_keys_list
146144
@@ -163,61 +161,61 @@ def fill_up_gaps_in_content(source_content, target_content, missing_keys_list, l
163161 }
164162 )
165163 logger .debug (message .content )
166-
164+
167165 return json .loads (message .content [0 ].text .strip ())
168166
169167def fill_up_translation (source_translation , target_translation , language_code , output_path , batch_size = 10 ):
170168 source_content = source_translation .entries
171169 target_content = target_translation .entries
172-
170+
173171 all_missing_keys = find_missing_keys (source_content , target_content )
174-
172+
175173 if not all_missing_keys :
176174 logger .debug ("No missing keys found. Translation is complete." )
177175 return target_translation
178-
176+
179177 total_missing = len (all_missing_keys )
180178 language_name = get_language_name (language_code )
181179 logger .info (f"Found { total_missing } missing keys for { language_name } ({ language_code } ). Processing in batches of { batch_size } ." )
182-
180+
183181 # Process missing keys in batches using while loop
184182 batch_start = 0
185183 while batch_start < total_missing :
186184 batch_end = min (batch_start + batch_size , total_missing )
187185 batch_missing_keys = all_missing_keys [batch_start :batch_end ]
188-
186+
189187 batch_num = (batch_start // batch_size ) + 1
190188 total_batches = (total_missing + batch_size - 1 ) // batch_size
191189 logger .info (f"Processing batch { batch_num } /{ total_batches } ({ len (batch_missing_keys )} keys)" )
192-
190+
193191 # Narrow down source and target content for this batch
194192 narrowed_source , narrowed_target , _ = narrow_down_source_and_target (
195193 source_content , target_content , batch_missing_keys
196194 )
197-
195+
198196 # Fill up translations using Claude for this batch
199197 partial_json = fill_up_gaps_in_content (narrowed_source , narrowed_target , batch_missing_keys , language_code )
200-
198+
201199 # Update target translation with new translations
202200 for key , value in partial_json .items ():
203201 if key in batch_missing_keys :
204202 logger .info (f"Filling up key: { key } " )
205203 target_translation .set (key , value )
206204 # Update target_content so subsequent batches see this translation
207205 target_content [key ] = value
208-
206+
209207 # Save to disk after each batch
210208 target_translation .save_as_json (output_path )
211209 logger .info (f"Batch { batch_num } /{ total_batches } completed and saved to { output_path } " )
212-
210+
213211 # Move to next batch
214212 batch_start = batch_end
215-
213+
216214 # Final summary
217215 remaining_missing = find_missing_keys (source_content , target_translation .entries )
218216 if remaining_missing :
219217 logger .warning (f"Translation completed with { len (remaining_missing )} keys still missing" )
220218 else :
221219 logger .info ("All translations completed successfully!" )
222-
220+
223221 return target_translation
0 commit comments