Update app.py
Browse files
app.py
CHANGED
|
@@ -44,7 +44,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Lego-MT/Lego-MT", "HPLT", "HPLT-O
|
|
| 44 |
"t5-small", "t5-base", "t5-large",
|
| 45 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 46 |
"google/madlad400-3b-mt", "Heng666/madlad400-3b-mt-ct2", "Heng666/madlad400-3b-mt-ct2-int8", "Heng666/madlad400-7b-mt-ct2-int8",
|
| 47 |
-
"BSC-LT/salamandraTA-2b-instruct", "BSC-LT/salamandraTA-7b-instruct",
|
| 48 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 49 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 50 |
"HuggingFaceTB/SmolLM3-3B",
|
|
@@ -214,35 +214,6 @@ class Translators:
|
|
| 214 |
pipe = pipeline("text-generation", model=self.model_name)
|
| 215 |
messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
|
| 216 |
return pipe(messages, max_new_tokens=512, early_stopping=True, num_beams=5)[0]["generated_text"][1]["content"]
|
| 217 |
-
|
| 218 |
-
def salamandrata(self):
|
| 219 |
-
text = f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"
|
| 220 |
-
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 221 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 222 |
-
self.model_name,
|
| 223 |
-
device_map="auto",
|
| 224 |
-
dtype=torch.bfloat16
|
| 225 |
-
)
|
| 226 |
-
message = [{"role": "user", "content": text}]
|
| 227 |
-
from datetime import datetime
|
| 228 |
-
date_string = datetime.today().strftime('%Y-%m-%d')
|
| 229 |
-
prompt = tokenizer.apply_chat_template(
|
| 230 |
-
message,
|
| 231 |
-
tokenize=False,
|
| 232 |
-
add_generation_prompt=True,
|
| 233 |
-
date_string=date_string
|
| 234 |
-
)
|
| 235 |
-
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
| 236 |
-
attention_mask = inputs["attention_mask"]
|
| 237 |
-
input_length = inputs.shape[1]
|
| 238 |
-
outputs = model.generate(input_ids=inputs.to(model.device),
|
| 239 |
-
max_new_tokens=512,
|
| 240 |
-
early_stopping=True,
|
| 241 |
-
num_beams=5,
|
| 242 |
-
attention_mask=attention_mask,
|
| 243 |
-
pad_token_id=tokenizer.eos_token_id,
|
| 244 |
-
eos_token_id=tokenizer.eos_token_id)
|
| 245 |
-
return tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
|
| 246 |
|
| 247 |
def HelsinkiNLP_mulroa(self):
|
| 248 |
try:
|
|
@@ -621,9 +592,6 @@ def translate_text(model_name: str, s_language: str, t_language: str, input_text
|
|
| 621 |
elif model_name == 'Google':
|
| 622 |
translated_text = Translators(model_name, sl, tl, input_text).google()
|
| 623 |
|
| 624 |
-
elif "academic" in model_name.lower():
|
| 625 |
-
translated_text = Translators(model_name, s_language, t_language, input_text).salamandrata()
|
| 626 |
-
|
| 627 |
elif "salamandra" in model_name.lower():
|
| 628 |
translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
|
| 629 |
|
|
|
|
| 44 |
"t5-small", "t5-base", "t5-large",
|
| 45 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 46 |
"google/madlad400-3b-mt", "Heng666/madlad400-3b-mt-ct2", "Heng666/madlad400-3b-mt-ct2-int8", "Heng666/madlad400-7b-mt-ct2-int8",
|
| 47 |
+
"BSC-LT/salamandraTA-2b-instruct", "BSC-LT/salamandraTA-7b-instruct",
|
| 48 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 49 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 50 |
"HuggingFaceTB/SmolLM3-3B",
|
|
|
|
| 214 |
pipe = pipeline("text-generation", model=self.model_name)
|
| 215 |
messages = [{"role": "user", "content": f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text} \n{self.tl}:"}]
|
| 216 |
return pipe(messages, max_new_tokens=512, early_stopping=True, num_beams=5)[0]["generated_text"][1]["content"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
def HelsinkiNLP_mulroa(self):
|
| 219 |
try:
|
|
|
|
| 592 |
elif model_name == 'Google':
|
| 593 |
translated_text = Translators(model_name, sl, tl, input_text).google()
|
| 594 |
|
|
|
|
|
|
|
|
|
|
| 595 |
elif "salamandra" in model_name.lower():
|
| 596 |
translated_text = Translators(model_name, s_language, t_language, input_text).salamandratapipe()
|
| 597 |
|