This repository was archived by the owner on May 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslator.py
More file actions
30 lines (26 loc) · 1.46 KB
/
translator.py
File metadata and controls
30 lines (26 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import argparse
import tensorflow as tf
from transformers import MarianTokenizer, TFAutoModelForSeq2SeqLM
def main(args):
# Load the pre-trained model and tokenizer for translation
model_name = f"Helsinki-NLP/opus-mt-{args.source_lang}-{args.target_lang}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
while True:
input_text = input(f"Enter text in {args.source_lang} (or 'exit' to quit): ")
if input_text.lower() == "exit":
break
translation = translate_text(input_text, model, tokenizer)
print(f"Translation: {translation}")
def translate_text(input_text, model, tokenizer):
input_text = input_text.lower() # Preprocess text if needed
inputs = tokenizer.encode(input_text, return_tensors="tf", padding=True, max_length=512, truncation=True)
translated = model.generate(inputs, max_length=100, num_return_sequences=1)
translation = tokenizer.batch_decode(translated, skip_special_tokens=True)
return translation[0]
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Transformer Translator")
parser.add_argument("--source_lang", type=str, default="en", help="Source language code (e.g. 'en' for English)")
parser.add_argument("--target_lang", type=str, default="fr", help="Target language code (e.g. 'fr' for French)")
args = parser.parse_args()
main(args)