Update app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,24 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
|
| 3 |
import torch
|
| 4 |
import os
|
| 5 |
import httpx
|
| 6 |
|
| 7 |
-
logging.set_verbosity_error()
|
| 8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
# Language options and mappings
|
| 10 |
options = ["German", "Romanian", "English", "French", "Spanish", "Italian"]
|
| 11 |
langs = {"English": "en", "Romanian": "ro", "German": "de", "French": "fr", "Spanish": "es", "Italian": "it"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
models = ["Helsinki-NLP", "Argos", "t5-base", "t5-small", "t5-large", "Unbabel/Tower-Plus-2B",
|
| 13 |
"Unbabel/TowerInstruct-Mistral-7B-v0.2", "winninghealth/WiNGPT-Babel-2", "Google"]
|
| 14 |
allmodels = ["Helsinki-NLP",
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import polars as pl
|
| 3 |
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, logging, AutoModelForCausalLM
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
import httpx
|
| 7 |
|
|
|
|
| 8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
# Language options and mappings
|
| 10 |
options = ["German", "Romanian", "English", "French", "Spanish", "Italian"]
|
| 11 |
langs = {"English": "en", "Romanian": "ro", "German": "de", "French": "fr", "Spanish": "es", "Italian": "it"}
|
| 12 |
+
favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
|
| 13 |
+
df = pl.read_parquet("isolanguages.parquet")
|
| 14 |
+
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 15 |
+
# all_langs = languagecodes.iso_languages_byname
|
| 16 |
+
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 17 |
+
# iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
|
| 18 |
+
iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
|
| 19 |
+
langs = list(favourite_langs.keys())
|
| 20 |
+
langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
|
| 21 |
+
|
| 22 |
models = ["Helsinki-NLP", "Argos", "t5-base", "t5-small", "t5-large", "Unbabel/Tower-Plus-2B",
|
| 23 |
"Unbabel/TowerInstruct-Mistral-7B-v0.2", "winninghealth/WiNGPT-Babel-2", "Google"]
|
| 24 |
allmodels = ["Helsinki-NLP",
|