Spaces:
Running
Running
map_new_names (#10)
Browse files- handle_new_names (7e8e535c0cf34b319162b659dd1adb17474e54b5)
- data/dataset_handler.py +18 -7
data/dataset_handler.py
CHANGED
|
@@ -34,8 +34,12 @@ DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [
|
|
| 34 |
"rse_restaurant",
|
| 35 |
"mit_biomedical",
|
| 36 |
"economics_macro",
|
|
|
|
|
|
|
|
|
|
| 37 |
]
|
| 38 |
|
|
|
|
| 39 |
def get_datasets_nickname(dataset_name) -> str:
|
| 40 |
if dataset_name == "VidoreArxivQARetrieval":
|
| 41 |
return "ArxivQA"
|
|
@@ -70,6 +74,7 @@ def get_datasets_nickname(dataset_name) -> str:
|
|
| 70 |
else:
|
| 71 |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
|
| 72 |
|
|
|
|
| 73 |
def deprecated_get_datasets_nickname(dataset_name) -> str:
|
| 74 |
if "arxivqa" in dataset_name:
|
| 75 |
return "ArxivQA"
|
|
@@ -99,25 +104,31 @@ def deprecated_get_datasets_nickname(dataset_name) -> str:
|
|
| 99 |
elif "healthcare_industry" in dataset_name:
|
| 100 |
return "Healthcare Industry"
|
| 101 |
|
| 102 |
-
elif "restaurant_esg" in dataset_name:
|
| 103 |
return "ESG Restaurant Human"
|
| 104 |
|
| 105 |
-
elif "rse_restaurant" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
|
|
| 106 |
return "ESG Restaurant Synthetic Multilingual"
|
| 107 |
|
| 108 |
-
elif "rse_restaurant" in dataset_name:
|
| 109 |
return "ESG Restaurant Synthetic"
|
| 110 |
|
| 111 |
-
elif "mit_biomedical" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
|
|
| 112 |
return "MIT Biomedical Multilingual"
|
| 113 |
|
| 114 |
-
elif "mit_biomedical" in dataset_name:
|
| 115 |
return "MIT Biomedical"
|
| 116 |
|
| 117 |
-
elif "economics_macro" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
|
|
| 118 |
return "Economics Macro Multilingual"
|
| 119 |
|
| 120 |
-
elif "economics_macro" in dataset_name:
|
| 121 |
return "Economics Macro"
|
| 122 |
|
| 123 |
else:
|
|
|
|
| 34 |
"rse_restaurant",
|
| 35 |
"mit_biomedical",
|
| 36 |
"economics_macro",
|
| 37 |
+
"biomedical_lectures",
|
| 38 |
+
"esg_reports",
|
| 39 |
+
"economics_reports",
|
| 40 |
]
|
| 41 |
|
| 42 |
+
|
| 43 |
def get_datasets_nickname(dataset_name) -> str:
|
| 44 |
if dataset_name == "VidoreArxivQARetrieval":
|
| 45 |
return "ArxivQA"
|
|
|
|
| 74 |
else:
|
| 75 |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
|
| 76 |
|
| 77 |
+
|
| 78 |
def deprecated_get_datasets_nickname(dataset_name) -> str:
|
| 79 |
if "arxivqa" in dataset_name:
|
| 80 |
return "ArxivQA"
|
|
|
|
| 104 |
elif "healthcare_industry" in dataset_name:
|
| 105 |
return "Healthcare Industry"
|
| 106 |
|
| 107 |
+
elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name):
|
| 108 |
return "ESG Restaurant Human"
|
| 109 |
|
| 110 |
+
elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or (
|
| 111 |
+
"esg_reports" in dataset_name and not "_eng_" in dataset_name
|
| 112 |
+
):
|
| 113 |
return "ESG Restaurant Synthetic Multilingual"
|
| 114 |
|
| 115 |
+
elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name):
|
| 116 |
return "ESG Restaurant Synthetic"
|
| 117 |
|
| 118 |
+
elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or (
|
| 119 |
+
"biomedical_lectures" in dataset_name and not "_eng_" in dataset_name
|
| 120 |
+
):
|
| 121 |
return "MIT Biomedical Multilingual"
|
| 122 |
|
| 123 |
+
elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name):
|
| 124 |
return "MIT Biomedical"
|
| 125 |
|
| 126 |
+
elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or (
|
| 127 |
+
"economics_reports" in dataset_name and not "_eng_" in dataset_name
|
| 128 |
+
):
|
| 129 |
return "Economics Macro Multilingual"
|
| 130 |
|
| 131 |
+
elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name):
|
| 132 |
return "Economics Macro"
|
| 133 |
|
| 134 |
else:
|