import gradio as gr from transformers import AutoModelForSeq2SeqLM, NllbTokenizerFast import torch # Load model model_id = "ClaudBarbara/Open_Access_Khmer" model = AutoModelForSeq2SeqLM.from_pretrained(model_id) tokenizer = NllbTokenizerFast.from_pretrained(model_id) def translate(text, direction): if direction == "English to Khmer": src_lang, tgt_lang = "eng_Latn", "khm_Khmr" else: src_lang, tgt_lang = "khm_Khmr", "eng_Latn" tokenizer.src_lang = src_lang inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True) with torch.no_grad(): outputs = model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang), max_length=512, num_beams=4 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) demo = gr.Interface( fn=translate, inputs=[ gr.Textbox(label="Input Text", lines=5), gr.Radio(["English to Khmer", "Khmer to English"], label="Direction", value="English to Khmer") ], outputs=gr.Textbox(label="Translation", lines=5), title="Khmer Legal Bridge", description="English-Khmer Legal Translation" ) demo.launch()